diff options
-rw-r--r-- | libbutl/builtin.cxx | 64 | ||||
-rw-r--r-- | libbutl/fdstream.cxx | 18 | ||||
-rw-r--r-- | libbutl/fdstream.hxx | 3 | ||||
-rw-r--r-- | libbutl/filesystem.cxx | 12 | ||||
-rw-r--r-- | libbutl/process.cxx | 96 | ||||
-rw-r--r-- | libbutl/regex.hxx | 2 | ||||
-rw-r--r-- | libbutl/regex.txx | 19 | ||||
-rw-r--r-- | libbutl/small-forward-list.hxx | 1 | ||||
-rw-r--r-- | libbutl/small-list.hxx | 1 | ||||
-rw-r--r-- | libbutl/small-vector.hxx | 1 | ||||
-rw-r--r-- | libbutl/string-parser.cxx | 27 | ||||
-rw-r--r-- | libbutl/string-parser.hxx | 10 | ||||
-rw-r--r-- | tests/builtin/find.testscript | 24 | ||||
-rw-r--r-- | tests/regex/testscript | 23 | ||||
-rw-r--r-- | tests/string-parser/driver.cxx | 24 | ||||
-rw-r--r-- | tests/string-parser/testscript | 30 |
16 files changed, 293 insertions, 62 deletions
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index a5861d4..f428194 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -18,6 +18,10 @@ #include <exception> #include <system_error> +#ifndef _WIN32 +# include <thread> // this_thread::sleep_for() +#endif + #include <libbutl/regex.hxx> #include <libbutl/path-io.hxx> #include <libbutl/utility.hxx> // operator<<(ostream,exception), @@ -1070,7 +1074,13 @@ namespace butl try { - pe = path_entry (fp); + // Note that POSIX makes no distinction between symlink path with + // and without trailing directory separator and specify that it + // should not be deferenced. The major implementations, however, + // dereference symlink paths with the trailing directory separator. + // We will follow that behavior. + // + pe = path_entry (fp, p.to_directory () /* follow_symlinks */); } catch (const system_error& e) { @@ -2483,22 +2493,44 @@ namespace butl const dir_path& cwd, const builtin_callbacks& cbs) { - unique_ptr<builtin::async_state> s ( - new builtin::async_state ( - r, - [fn, - &args, - in = move (in), out = move (out), err = move (err), - &cwd, - &cbs] () mutable noexcept -> uint8_t - { - return fn (args, - move (in), move (out), move (err), - cwd, - cbs); - })); +#ifndef _WIN32 + // Retry to create the thread after the "resource temporarily unavailable" + // (EAGAIN) failure for 1050ms. + // + for (size_t i (0);; ++i) + try +#endif + { + unique_ptr<builtin::async_state> s ( + new builtin::async_state ( + r, + [fn, + &args, + in = move (in), out = move (out), err = move (err), + &cwd, + &cbs] () mutable noexcept -> uint8_t + { + return fn (args, + move (in), move (out), move (err), + cwd, + cbs); + })); - return builtin (r, move (s)); + return builtin (r, move (s)); + } +#ifndef _WIN32 + catch (const system_error& e) + { + if (i != 15 && + e.code ().category () == generic_category () && + e.code ().value () == EAGAIN) + { + this_thread::sleep_for (i * 10ms); + } + else + throw; + } +#endif } template <builtin_impl fn> diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx index df5b531..9008923 100644 --- a/libbutl/fdstream.cxx +++ b/libbutl/fdstream.cxx @@ -1447,6 +1447,8 @@ namespace butl bool fdterm_color (int, bool) { + // Note: Windows version of fdterm_color() uses the same logic for MSYS. + // const char* t (std::getenv ("TERM")); // This test was lifted from GCC (Emacs shell sets TERM=dumb). @@ -1875,6 +1877,8 @@ namespace butl { // @@ Both GCC and Clang simply call GetConsoleMode() for this check. I // wonder why we don't do the same? See also fdterm_color() below. + // Answer: probably because it would fail for the MSYS pipe (see + // below). // We don't need to close it (see fd_to_handle()). // @@ -1978,7 +1982,19 @@ namespace butl // DWORD m; if (!GetConsoleMode (h, &m)) - throw_system_ios_failure (GetLastError ()); + { + DWORD e (GetLastError ()); + + // This might be the MSYS terminal connected via a pipe, in which case + // we use the POSIX semantics (we can assume it's not any pipe because + // calling this function is only valid if fdterm() returned true). + // + if (GetFileType (h) != FILE_TYPE_PIPE) + throw_system_ios_failure (e); + + const char* t (std::getenv ("TERM")); + return t != nullptr && strcmp (t, "dumb") != 0; + } // Some terminals (e.g. Windows Terminal) enable VT processing by default. // diff --git a/libbutl/fdstream.hxx b/libbutl/fdstream.hxx index 9c8f786..11abf53 100644 --- a/libbutl/fdstream.hxx +++ b/libbutl/fdstream.hxx @@ -906,6 +906,9 @@ namespace butl // applicable on some platforms, such as Windows). Throw ios::failure on the // underlying OS error. // + // Note that calling this function is only valid if fdterm() above returned + // true for this file descriptor. + // LIBBUTL_SYMEXPORT bool fdterm_color (int, bool enable); diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx index 28a0de8..7c43aa0 100644 --- a/libbutl/filesystem.cxx +++ b/libbutl/filesystem.cxx @@ -268,7 +268,17 @@ namespace butl ec == ERROR_INVALID_NAME || ec == ERROR_INVALID_DRIVE || ec == ERROR_BAD_PATHNAME || - ec == ERROR_BAD_NETPATH; + ec == ERROR_BAD_NETPATH || + // + // Note that for reasons unknown, filesystem entry stat functions + // (GetFileAttributesExA(), etc) may end up with the + // ERROR_NOT_READY or ERROR_INVALID_PARAMETER error code rather + // than ERROR_INVALID_DRIVE for paths on non-existent drives. Thus, + // we treat the ERROR_NOT_READY and ERROR_INVALID_PARAMETER error + // codes in the same way as ERROR_INVALID_DRIVE here. + // + ec == ERROR_NOT_READY || + ec == ERROR_INVALID_PARAMETER; } static inline bool diff --git a/libbutl/process.cxx b/libbutl/process.cxx index 1b8da98..f6c1745 100644 --- a/libbutl/process.cxx +++ b/libbutl/process.cxx @@ -596,25 +596,42 @@ namespace butl new_env.push_back (nullptr); } - ulock l (process_spawn_mutex); // Note: won't be released in child. - - // Note that in most non-fork based implementations this call suspends - // the parent thread until the child process calls exec() or terminates. - // This avoids "text file busy" issue (see the fork-based code below): - // due to the process_spawn_mutex lock the execution of the script is - // delayed until the child closes the descriptor. + // Retry to create the child process after the "resource temporarily + // unavailable" (EAGAIN) failure for 1050ms. // - r = posix_spawn (&handle, - pp.effect_string (), - &fa, - nullptr /* attrp */, - const_cast<char* const*> (&args[0]), - new_env.empty () - ? environ - : const_cast<char* const*> (new_env.data ())); - if (r != 0) + ulock l (process_spawn_mutex); + + for (size_t i (0);; ++i) + { + // Note that in most non-fork based implementations this call suspends + // the parent thread until the child process calls exec() or + // terminates. This avoids "text file busy" issue (see the fork-based + // code below): due to the process_spawn_mutex lock the execution of + // the script is delayed until the child closes the descriptor. + // + r = posix_spawn (&handle, + pp.effect_string (), + &fa, + nullptr /* attrp */, + const_cast<char* const*> (&args[0]), + new_env.empty () + ? environ + : const_cast<char* const*> (new_env.data ())); + + l.unlock (); // Release the lock in parent. + + if (r == 0) + break; + + if (i != 15 && r == EAGAIN) + { + this_thread::sleep_for (i * 10ms); + l.lock (); + } + else fail (r); - } // Release the lock in parent. + } + } #ifndef LIBBUTL_POSIX_SPAWN_CHDIR else #endif @@ -632,21 +649,38 @@ namespace butl throw process_error (errno); }; - ulock l (process_spawn_mutex); // Will not be released in child. - - // Note that the file descriptors with the FD_CLOEXEC flag stay open in - // the child process between fork() and exec() calls. This may cause the - // "text file busy" issue: if some other thread creates a shell script - // and the write-open file descriptor leaks into some child process, - // then exec() for this script fails with ETXTBSY (see exec() man page - // for details). If that's the case, it feels like such a descriptor - // should not stay open for too long. Thus, we will retry the exec() - // calls for about half a second. + // Retry to create the child process after the "resource temporarily + // unavailable" (EAGAIN) failure for 1050ms. // - handle = fork (); + ulock l (process_spawn_mutex); // Note: should not be released in child. + + for (size_t i (0);; ++i) + { + // Note that the file descriptors with the FD_CLOEXEC flag stay open + // in the child process between fork() and exec() calls. This may + // cause the "text file busy" issue: if some other thread creates a + // shell script and the write-open file descriptor leaks into some + // child process, then exec() for this script fails with ETXTBSY (see + // exec() man page for details). If that's the case, it feels like + // such a descriptor should not stay open for too long. Thus, we will + // retry the exec() calls for about half a second. + // + handle = fork (); + + if (handle != 0) // Parent. + l.unlock (); - if (handle == -1) - fail (false /* child */); + if (handle != -1) + break; + + if (i != 15 && errno == EAGAIN) + { + this_thread::sleep_for (i * 10ms); + l.lock (); + } + else + fail (false /* child */); + } // Release the lock in parent. if (handle == 0) { @@ -752,7 +786,7 @@ namespace butl fail (true /* child */); } - } // Release the lock in parent. + } #endif // LIBBUTL_POSIX_SPAWN_CHDIR assert (handle != 0); // Shouldn't get here unless in the parent process. diff --git a/libbutl/regex.hxx b/libbutl/regex.hxx index 9b31075..69009c3 100644 --- a/libbutl/regex.hxx +++ b/libbutl/regex.hxx @@ -52,7 +52,7 @@ namespace butl const std::basic_string<C>& fmt, F&& append, std::regex_constants::match_flag_type = - std::regex_constants::match_default); + std::regex_constants::match_default); // As above but concatenate non-matched substrings and matched substring // replacements into a string returning it as well as whether the search diff --git a/libbutl/regex.txx b/libbutl/regex.txx index 214d949..ec9f7af 100644 --- a/libbutl/regex.txx +++ b/libbutl/regex.txx @@ -217,6 +217,25 @@ namespace butl bool first_only ((flags & regex_constants::format_first_only) != 0); bool no_copy ((flags & regex_constants::format_no_copy) != 0); + // Note that by default the std::regex_search(), std::regex_replace(), and + // std::regex_iterator() functions match the empty substrings in non-empty + // strings for all the major implementations. For example: + // + // - regex_search("bb", "a*") call returns true. + // + // - regex_replace("bb", "a*", "x") call returns "xbxbx". + // + // - regex_replace("a", ".*", "x") call returns "xx". + // + // - Iterating using the regex_iterator("a", ".*") object ends up with the + // two matches: "a" and "". + // + // Since such a behavior feels counter-intuitive, we suppress it using the + // match_not_null flag, except for the empty string. + // + if (!s.empty ()) + flags |= regex_constants::match_not_null; + // Beginning of the last unmatched substring. // str_it ub (s.begin ()); diff --git a/libbutl/small-forward-list.hxx b/libbutl/small-forward-list.hxx index 8d1cf68..db87d69 100644 --- a/libbutl/small-forward-list.hxx +++ b/libbutl/small-forward-list.hxx @@ -57,6 +57,7 @@ namespace butl using buffer_type = small_forward_list_buffer<T, N>; using allocator_type = small_allocator<T, N, buffer_type>; using base_type = std::forward_list<T, allocator_type>; + using typename base_type::value_type; small_forward_list () : base_type (allocator_type (this)) {} diff --git a/libbutl/small-list.hxx b/libbutl/small-list.hxx index 7cb51fd..c75f49a 100644 --- a/libbutl/small-list.hxx +++ b/libbutl/small-list.hxx @@ -59,6 +59,7 @@ namespace butl using buffer_type = small_list_buffer<T, N>; using allocator_type = small_allocator<T, N, buffer_type>; using base_type = std::list<T, allocator_type>; + using typename base_type::value_type; small_list () : base_type (allocator_type (this)) {} diff --git a/libbutl/small-vector.hxx b/libbutl/small-vector.hxx index 44a3ef5..787d73a 100644 --- a/libbutl/small-vector.hxx +++ b/libbutl/small-vector.hxx @@ -38,6 +38,7 @@ namespace butl using buffer_type = small_allocator_buffer<T, N>; using allocator_type = small_allocator<T, N>; using base_type = std::vector<T, allocator_type>; + using typename base_type::value_type; small_vector () : base_type (allocator_type (this)) diff --git a/libbutl/string-parser.cxx b/libbutl/string-parser.cxx index af5c1b3..aa7d52c 100644 --- a/libbutl/string-parser.cxx +++ b/libbutl/string-parser.cxx @@ -18,16 +18,34 @@ namespace butl } vector<pair<string, size_t>> - parse_quoted_position (const string& s, bool unquote) + parse_quoted_position (const string& s, bool unquote, bool comments) { vector<pair<string, size_t>> r; + + bool newline (true); for (auto b (s.begin ()), i (b), e (s.end ()); i != e; ) { - for (; i != e && space (*i); ++i) ; // Skip spaces. + // Skip spaces. + // + for (; i != e && space (*i); ++i) + { + if (*i == '\n') + newline = true; + } + + // Skip comment line. + // + if (comments && newline && i != e && *i == '#') + { + for (++i; i != e && *i != '\n'; ++i) ; + continue; + } if (i == e) // No more strings. break; + newline = false; + string s; char quoting ('\0'); // Current quoting mode, can be used as bool. size_t pos (i - b); // String position. @@ -74,9 +92,10 @@ namespace butl } vector<string> - parse_quoted (const string& s, bool unquote) + parse_quoted (const string& s, bool unquote, bool comments) { - vector<pair<string, size_t>> sp (parse_quoted_position (s, unquote)); + vector<pair<string, size_t>> sp ( + parse_quoted_position (s, unquote, comments)); vector<string> r; r.reserve (sp.size ()); diff --git a/libbutl/string-parser.hxx b/libbutl/string-parser.hxx index 9fc20c0..74280a2 100644 --- a/libbutl/string-parser.hxx +++ b/libbutl/string-parser.hxx @@ -26,18 +26,22 @@ namespace butl // Parse a whitespace-separated list of strings. Can contain single or // double quoted substrings. No escaping is supported. If unquote is true, - // return one-level unquoted values. Throw invalid_string in case of + // return one-level unquoted values. Optionally, assume that the passed + // string may contain comment lines (lines with `#` as the first non- + // whitespace character) and ignore them. Throw invalid_string in case of // invalid quoting. // LIBBUTL_SYMEXPORT std::vector<std::string> - parse_quoted (const std::string&, bool unquote); + parse_quoted (const std::string&, bool unquote, bool comments = false); // As above but return a list of string and zero-based position pairs. // Position is useful for issuing diagnostics about an invalid string // during second-level parsing. // LIBBUTL_SYMEXPORT std::vector<std::pair<std::string, std::size_t>> - parse_quoted_position (const std::string&, bool unquote); + parse_quoted_position (const std::string&, + bool unquote, + bool comments = false); // Remove a single level of quotes. Note that the format or the // correctness of the quotation is not validated. diff --git a/tests/builtin/find.testscript b/tests/builtin/find.testscript index b09822c..5971108 100644 --- a/tests/builtin/find.testscript +++ b/tests/builtin/find.testscript @@ -44,6 +44,30 @@ $* . -mindepth 12a 2>"find: invalid value '12a' for primary '-mindepth'" == 1 : path : { + : dir-symlink + : + { + mkdir -p a/c; + ln -s a c; + + # If 'c' path is a symlink (may not be the case on Windows), then check + # that the find builtin only dereferences it if it is terminated with the + # directory separator. + # + $* c -type l | set p; + + if ($p == 'c') + $* c >>EOO + c + EOO + end; + + $* c/ -type d >>EOO + c/ + c/c + EOO + } + : relative : { diff --git a/tests/regex/testscript b/tests/regex/testscript index 93ad4b6..137469d 100644 --- a/tests/regex/testscript +++ b/tests/regex/testscript @@ -63,6 +63,21 @@ : $* xay '/a/\lVZ/' >xvZy } + + : empty-substring + : + : Note that the regex search-based replacement with the match_not_null flag + : is broken for older versions of libstdc++ and libc++ (may ignore + : match_not_null for the former and may hang for some string/pattern for the + : latter). + : + if (($cxx.id != 'gcc' || $cxx.version.major >= 7) && \ + ($cxx.id != 'clang' || $cxx.version.major >= 6)) + { + $* '' '/.*/x/' >'x' : empty + $* a '/a*/x/' >'x' : match + $* aa '/b*/x/' == 1 : no-match + } } : replace-match @@ -72,6 +87,14 @@ $* abc '/a(b)c/x\1y/' >xby : match $* abcd '/a(b)c/x\1yd/' == 1 : no-match + + : empty-substring + : + { + $* '' '/.*/x/' >'x' : empty + $* a '/a*/x/' >'x' : match + $* ab '/a(c*)(b)/\1\2/' >'b' : match-mid + } } : invalid-regex-fmt diff --git a/tests/string-parser/driver.cxx b/tests/string-parser/driver.cxx index 8cba912..93d2088 100644 --- a/tests/string-parser/driver.cxx +++ b/tests/string-parser/driver.cxx @@ -14,13 +14,14 @@ using namespace std; using namespace butl::string_parser; -// Usage: argv[0] [-l] [-u] [-p] +// Usage: argv[0] [-l] [-u] [-p] [-c] // // Read and parse lines into strings from STDIN and print them to STDOUT. // // -l output each string on a separate line // -u unquote strings // -p output positions +// -c comments // int main (int argc, char* argv[]) @@ -29,6 +30,7 @@ try bool spl (false); // Print string per line. bool unquote (false); bool pos (false); + bool comments (false); for (int i (1); i != argc; ++i) { @@ -40,6 +42,8 @@ try unquote = true; else if (o == "-p") pos = true; + else if (o == "-c") + comments = true; else assert (false); } @@ -51,11 +55,8 @@ try cout.exceptions (ios::failbit | ios::badbit); - string l; - while (getline (cin, l)) + auto print = [spl, pos] (const vector<pair<string, size_t>>& v) { - vector<pair<string, size_t>> v (parse_quoted_position (l, unquote)); - if (!spl) { for (auto b (v.cbegin ()), i (b), e (v.cend ()); i != e; ++i) @@ -81,6 +82,19 @@ try cout << s.first << endl; } } + }; + + if (!comments) + { + string l; + while (getline (cin, l)) + print (parse_quoted_position (l, unquote)); + } + else + { + string s; + getline (cin, s, '\0'); + print (parse_quoted_position (s, unquote, true /* comments */)); } return 0; diff --git a/tests/string-parser/testscript b/tests/string-parser/testscript index d484e01..05c2807 100644 --- a/tests/string-parser/testscript +++ b/tests/string-parser/testscript @@ -30,6 +30,36 @@ x "y z EOO } + + : comments + : + { + $* -c <<EOI >>EOO + # Comment 1 + # + abc #xyz + + # Comment 2 + # + abc# + + "# not a comment 3" #not-a-comment4 + + "abc + # not a comment 5 + " + # Comment 6 + EOI + abc + #xyz + abc# + "# not a comment 3" + #not-a-comment4 + "abc + # not a comment 5 + " + EOO + } } : invalid |