aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libbutl/builtin.cxx64
-rw-r--r--libbutl/fdstream.cxx18
-rw-r--r--libbutl/fdstream.hxx3
-rw-r--r--libbutl/filesystem.cxx12
-rw-r--r--libbutl/process.cxx96
-rw-r--r--libbutl/regex.hxx2
-rw-r--r--libbutl/regex.txx19
-rw-r--r--libbutl/small-forward-list.hxx1
-rw-r--r--libbutl/small-list.hxx1
-rw-r--r--libbutl/small-vector.hxx1
-rw-r--r--libbutl/string-parser.cxx27
-rw-r--r--libbutl/string-parser.hxx10
-rw-r--r--tests/builtin/find.testscript24
-rw-r--r--tests/regex/testscript23
-rw-r--r--tests/string-parser/driver.cxx24
-rw-r--r--tests/string-parser/testscript30
16 files changed, 293 insertions, 62 deletions
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx
index a5861d4..f428194 100644
--- a/libbutl/builtin.cxx
+++ b/libbutl/builtin.cxx
@@ -18,6 +18,10 @@
#include <exception>
#include <system_error>
+#ifndef _WIN32
+# include <thread> // this_thread::sleep_for()
+#endif
+
#include <libbutl/regex.hxx>
#include <libbutl/path-io.hxx>
#include <libbutl/utility.hxx> // operator<<(ostream,exception),
@@ -1070,7 +1074,13 @@ namespace butl
try
{
- pe = path_entry (fp);
+ // Note that POSIX makes no distinction between symlink path with
+ // and without trailing directory separator and specify that it
+ // should not be deferenced. The major implementations, however,
+ // dereference symlink paths with the trailing directory separator.
+ // We will follow that behavior.
+ //
+ pe = path_entry (fp, p.to_directory () /* follow_symlinks */);
}
catch (const system_error& e)
{
@@ -2483,22 +2493,44 @@ namespace butl
const dir_path& cwd,
const builtin_callbacks& cbs)
{
- unique_ptr<builtin::async_state> s (
- new builtin::async_state (
- r,
- [fn,
- &args,
- in = move (in), out = move (out), err = move (err),
- &cwd,
- &cbs] () mutable noexcept -> uint8_t
- {
- return fn (args,
- move (in), move (out), move (err),
- cwd,
- cbs);
- }));
+#ifndef _WIN32
+ // Retry to create the thread after the "resource temporarily unavailable"
+ // (EAGAIN) failure for 1050ms.
+ //
+ for (size_t i (0);; ++i)
+ try
+#endif
+ {
+ unique_ptr<builtin::async_state> s (
+ new builtin::async_state (
+ r,
+ [fn,
+ &args,
+ in = move (in), out = move (out), err = move (err),
+ &cwd,
+ &cbs] () mutable noexcept -> uint8_t
+ {
+ return fn (args,
+ move (in), move (out), move (err),
+ cwd,
+ cbs);
+ }));
- return builtin (r, move (s));
+ return builtin (r, move (s));
+ }
+#ifndef _WIN32
+ catch (const system_error& e)
+ {
+ if (i != 15 &&
+ e.code ().category () == generic_category () &&
+ e.code ().value () == EAGAIN)
+ {
+ this_thread::sleep_for (i * 10ms);
+ }
+ else
+ throw;
+ }
+#endif
}
template <builtin_impl fn>
diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx
index df5b531..9008923 100644
--- a/libbutl/fdstream.cxx
+++ b/libbutl/fdstream.cxx
@@ -1447,6 +1447,8 @@ namespace butl
bool
fdterm_color (int, bool)
{
+ // Note: Windows version of fdterm_color() uses the same logic for MSYS.
+ //
const char* t (std::getenv ("TERM"));
// This test was lifted from GCC (Emacs shell sets TERM=dumb).
@@ -1875,6 +1877,8 @@ namespace butl
{
// @@ Both GCC and Clang simply call GetConsoleMode() for this check. I
// wonder why we don't do the same? See also fdterm_color() below.
+ // Answer: probably because it would fail for the MSYS pipe (see
+ // below).
// We don't need to close it (see fd_to_handle()).
//
@@ -1978,7 +1982,19 @@ namespace butl
//
DWORD m;
if (!GetConsoleMode (h, &m))
- throw_system_ios_failure (GetLastError ());
+ {
+ DWORD e (GetLastError ());
+
+ // This might be the MSYS terminal connected via a pipe, in which case
+ // we use the POSIX semantics (we can assume it's not any pipe because
+ // calling this function is only valid if fdterm() returned true).
+ //
+ if (GetFileType (h) != FILE_TYPE_PIPE)
+ throw_system_ios_failure (e);
+
+ const char* t (std::getenv ("TERM"));
+ return t != nullptr && strcmp (t, "dumb") != 0;
+ }
// Some terminals (e.g. Windows Terminal) enable VT processing by default.
//
diff --git a/libbutl/fdstream.hxx b/libbutl/fdstream.hxx
index 9c8f786..11abf53 100644
--- a/libbutl/fdstream.hxx
+++ b/libbutl/fdstream.hxx
@@ -906,6 +906,9 @@ namespace butl
// applicable on some platforms, such as Windows). Throw ios::failure on the
// underlying OS error.
//
+ // Note that calling this function is only valid if fdterm() above returned
+ // true for this file descriptor.
+ //
LIBBUTL_SYMEXPORT bool
fdterm_color (int, bool enable);
diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx
index 28a0de8..7c43aa0 100644
--- a/libbutl/filesystem.cxx
+++ b/libbutl/filesystem.cxx
@@ -268,7 +268,17 @@ namespace butl
ec == ERROR_INVALID_NAME ||
ec == ERROR_INVALID_DRIVE ||
ec == ERROR_BAD_PATHNAME ||
- ec == ERROR_BAD_NETPATH;
+ ec == ERROR_BAD_NETPATH ||
+ //
+ // Note that for reasons unknown, filesystem entry stat functions
+ // (GetFileAttributesExA(), etc) may end up with the
+ // ERROR_NOT_READY or ERROR_INVALID_PARAMETER error code rather
+ // than ERROR_INVALID_DRIVE for paths on non-existent drives. Thus,
+ // we treat the ERROR_NOT_READY and ERROR_INVALID_PARAMETER error
+ // codes in the same way as ERROR_INVALID_DRIVE here.
+ //
+ ec == ERROR_NOT_READY ||
+ ec == ERROR_INVALID_PARAMETER;
}
static inline bool
diff --git a/libbutl/process.cxx b/libbutl/process.cxx
index 1b8da98..f6c1745 100644
--- a/libbutl/process.cxx
+++ b/libbutl/process.cxx
@@ -596,25 +596,42 @@ namespace butl
new_env.push_back (nullptr);
}
- ulock l (process_spawn_mutex); // Note: won't be released in child.
-
- // Note that in most non-fork based implementations this call suspends
- // the parent thread until the child process calls exec() or terminates.
- // This avoids "text file busy" issue (see the fork-based code below):
- // due to the process_spawn_mutex lock the execution of the script is
- // delayed until the child closes the descriptor.
+ // Retry to create the child process after the "resource temporarily
+ // unavailable" (EAGAIN) failure for 1050ms.
//
- r = posix_spawn (&handle,
- pp.effect_string (),
- &fa,
- nullptr /* attrp */,
- const_cast<char* const*> (&args[0]),
- new_env.empty ()
- ? environ
- : const_cast<char* const*> (new_env.data ()));
- if (r != 0)
+ ulock l (process_spawn_mutex);
+
+ for (size_t i (0);; ++i)
+ {
+ // Note that in most non-fork based implementations this call suspends
+ // the parent thread until the child process calls exec() or
+ // terminates. This avoids "text file busy" issue (see the fork-based
+ // code below): due to the process_spawn_mutex lock the execution of
+ // the script is delayed until the child closes the descriptor.
+ //
+ r = posix_spawn (&handle,
+ pp.effect_string (),
+ &fa,
+ nullptr /* attrp */,
+ const_cast<char* const*> (&args[0]),
+ new_env.empty ()
+ ? environ
+ : const_cast<char* const*> (new_env.data ()));
+
+ l.unlock (); // Release the lock in parent.
+
+ if (r == 0)
+ break;
+
+ if (i != 15 && r == EAGAIN)
+ {
+ this_thread::sleep_for (i * 10ms);
+ l.lock ();
+ }
+ else
fail (r);
- } // Release the lock in parent.
+ }
+ }
#ifndef LIBBUTL_POSIX_SPAWN_CHDIR
else
#endif
@@ -632,21 +649,38 @@ namespace butl
throw process_error (errno);
};
- ulock l (process_spawn_mutex); // Will not be released in child.
-
- // Note that the file descriptors with the FD_CLOEXEC flag stay open in
- // the child process between fork() and exec() calls. This may cause the
- // "text file busy" issue: if some other thread creates a shell script
- // and the write-open file descriptor leaks into some child process,
- // then exec() for this script fails with ETXTBSY (see exec() man page
- // for details). If that's the case, it feels like such a descriptor
- // should not stay open for too long. Thus, we will retry the exec()
- // calls for about half a second.
+ // Retry to create the child process after the "resource temporarily
+ // unavailable" (EAGAIN) failure for 1050ms.
//
- handle = fork ();
+ ulock l (process_spawn_mutex); // Note: should not be released in child.
+
+ for (size_t i (0);; ++i)
+ {
+ // Note that the file descriptors with the FD_CLOEXEC flag stay open
+ // in the child process between fork() and exec() calls. This may
+ // cause the "text file busy" issue: if some other thread creates a
+ // shell script and the write-open file descriptor leaks into some
+ // child process, then exec() for this script fails with ETXTBSY (see
+ // exec() man page for details). If that's the case, it feels like
+ // such a descriptor should not stay open for too long. Thus, we will
+ // retry the exec() calls for about half a second.
+ //
+ handle = fork ();
+
+ if (handle != 0) // Parent.
+ l.unlock ();
- if (handle == -1)
- fail (false /* child */);
+ if (handle != -1)
+ break;
+
+ if (i != 15 && errno == EAGAIN)
+ {
+ this_thread::sleep_for (i * 10ms);
+ l.lock ();
+ }
+ else
+ fail (false /* child */);
+ } // Release the lock in parent.
if (handle == 0)
{
@@ -752,7 +786,7 @@ namespace butl
fail (true /* child */);
}
- } // Release the lock in parent.
+ }
#endif // LIBBUTL_POSIX_SPAWN_CHDIR
assert (handle != 0); // Shouldn't get here unless in the parent process.
diff --git a/libbutl/regex.hxx b/libbutl/regex.hxx
index 9b31075..69009c3 100644
--- a/libbutl/regex.hxx
+++ b/libbutl/regex.hxx
@@ -52,7 +52,7 @@ namespace butl
const std::basic_string<C>& fmt,
F&& append,
std::regex_constants::match_flag_type =
- std::regex_constants::match_default);
+ std::regex_constants::match_default);
// As above but concatenate non-matched substrings and matched substring
// replacements into a string returning it as well as whether the search
diff --git a/libbutl/regex.txx b/libbutl/regex.txx
index 214d949..ec9f7af 100644
--- a/libbutl/regex.txx
+++ b/libbutl/regex.txx
@@ -217,6 +217,25 @@ namespace butl
bool first_only ((flags & regex_constants::format_first_only) != 0);
bool no_copy ((flags & regex_constants::format_no_copy) != 0);
+ // Note that by default the std::regex_search(), std::regex_replace(), and
+ // std::regex_iterator() functions match the empty substrings in non-empty
+ // strings for all the major implementations. For example:
+ //
+ // - regex_search("bb", "a*") call returns true.
+ //
+ // - regex_replace("bb", "a*", "x") call returns "xbxbx".
+ //
+ // - regex_replace("a", ".*", "x") call returns "xx".
+ //
+ // - Iterating using the regex_iterator("a", ".*") object ends up with the
+ // two matches: "a" and "".
+ //
+ // Since such a behavior feels counter-intuitive, we suppress it using the
+ // match_not_null flag, except for the empty string.
+ //
+ if (!s.empty ())
+ flags |= regex_constants::match_not_null;
+
// Beginning of the last unmatched substring.
//
str_it ub (s.begin ());
diff --git a/libbutl/small-forward-list.hxx b/libbutl/small-forward-list.hxx
index 8d1cf68..db87d69 100644
--- a/libbutl/small-forward-list.hxx
+++ b/libbutl/small-forward-list.hxx
@@ -57,6 +57,7 @@ namespace butl
using buffer_type = small_forward_list_buffer<T, N>;
using allocator_type = small_allocator<T, N, buffer_type>;
using base_type = std::forward_list<T, allocator_type>;
+ using typename base_type::value_type;
small_forward_list ()
: base_type (allocator_type (this)) {}
diff --git a/libbutl/small-list.hxx b/libbutl/small-list.hxx
index 7cb51fd..c75f49a 100644
--- a/libbutl/small-list.hxx
+++ b/libbutl/small-list.hxx
@@ -59,6 +59,7 @@ namespace butl
using buffer_type = small_list_buffer<T, N>;
using allocator_type = small_allocator<T, N, buffer_type>;
using base_type = std::list<T, allocator_type>;
+ using typename base_type::value_type;
small_list ()
: base_type (allocator_type (this)) {}
diff --git a/libbutl/small-vector.hxx b/libbutl/small-vector.hxx
index 44a3ef5..787d73a 100644
--- a/libbutl/small-vector.hxx
+++ b/libbutl/small-vector.hxx
@@ -38,6 +38,7 @@ namespace butl
using buffer_type = small_allocator_buffer<T, N>;
using allocator_type = small_allocator<T, N>;
using base_type = std::vector<T, allocator_type>;
+ using typename base_type::value_type;
small_vector ()
: base_type (allocator_type (this))
diff --git a/libbutl/string-parser.cxx b/libbutl/string-parser.cxx
index af5c1b3..aa7d52c 100644
--- a/libbutl/string-parser.cxx
+++ b/libbutl/string-parser.cxx
@@ -18,16 +18,34 @@ namespace butl
}
vector<pair<string, size_t>>
- parse_quoted_position (const string& s, bool unquote)
+ parse_quoted_position (const string& s, bool unquote, bool comments)
{
vector<pair<string, size_t>> r;
+
+ bool newline (true);
for (auto b (s.begin ()), i (b), e (s.end ()); i != e; )
{
- for (; i != e && space (*i); ++i) ; // Skip spaces.
+ // Skip spaces.
+ //
+ for (; i != e && space (*i); ++i)
+ {
+ if (*i == '\n')
+ newline = true;
+ }
+
+ // Skip comment line.
+ //
+ if (comments && newline && i != e && *i == '#')
+ {
+ for (++i; i != e && *i != '\n'; ++i) ;
+ continue;
+ }
if (i == e) // No more strings.
break;
+ newline = false;
+
string s;
char quoting ('\0'); // Current quoting mode, can be used as bool.
size_t pos (i - b); // String position.
@@ -74,9 +92,10 @@ namespace butl
}
vector<string>
- parse_quoted (const string& s, bool unquote)
+ parse_quoted (const string& s, bool unquote, bool comments)
{
- vector<pair<string, size_t>> sp (parse_quoted_position (s, unquote));
+ vector<pair<string, size_t>> sp (
+ parse_quoted_position (s, unquote, comments));
vector<string> r;
r.reserve (sp.size ());
diff --git a/libbutl/string-parser.hxx b/libbutl/string-parser.hxx
index 9fc20c0..74280a2 100644
--- a/libbutl/string-parser.hxx
+++ b/libbutl/string-parser.hxx
@@ -26,18 +26,22 @@ namespace butl
// Parse a whitespace-separated list of strings. Can contain single or
// double quoted substrings. No escaping is supported. If unquote is true,
- // return one-level unquoted values. Throw invalid_string in case of
+ // return one-level unquoted values. Optionally, assume that the passed
+ // string may contain comment lines (lines with `#` as the first non-
+ // whitespace character) and ignore them. Throw invalid_string in case of
// invalid quoting.
//
LIBBUTL_SYMEXPORT std::vector<std::string>
- parse_quoted (const std::string&, bool unquote);
+ parse_quoted (const std::string&, bool unquote, bool comments = false);
// As above but return a list of string and zero-based position pairs.
// Position is useful for issuing diagnostics about an invalid string
// during second-level parsing.
//
LIBBUTL_SYMEXPORT std::vector<std::pair<std::string, std::size_t>>
- parse_quoted_position (const std::string&, bool unquote);
+ parse_quoted_position (const std::string&,
+ bool unquote,
+ bool comments = false);
// Remove a single level of quotes. Note that the format or the
// correctness of the quotation is not validated.
diff --git a/tests/builtin/find.testscript b/tests/builtin/find.testscript
index b09822c..5971108 100644
--- a/tests/builtin/find.testscript
+++ b/tests/builtin/find.testscript
@@ -44,6 +44,30 @@ $* . -mindepth 12a 2>"find: invalid value '12a' for primary '-mindepth'" == 1
: path
:
{
+ : dir-symlink
+ :
+ {
+ mkdir -p a/c;
+ ln -s a c;
+
+ # If 'c' path is a symlink (may not be the case on Windows), then check
+ # that the find builtin only dereferences it if it is terminated with the
+ # directory separator.
+ #
+ $* c -type l | set p;
+
+ if ($p == 'c')
+ $* c >>EOO
+ c
+ EOO
+ end;
+
+ $* c/ -type d >>EOO
+ c/
+ c/c
+ EOO
+ }
+
: relative
:
{
diff --git a/tests/regex/testscript b/tests/regex/testscript
index 93ad4b6..137469d 100644
--- a/tests/regex/testscript
+++ b/tests/regex/testscript
@@ -63,6 +63,21 @@
:
$* xay '/a/\lVZ/' >xvZy
}
+
+ : empty-substring
+ :
+ : Note that the regex search-based replacement with the match_not_null flag
+ : is broken for older versions of libstdc++ and libc++ (may ignore
+ : match_not_null for the former and may hang for some string/pattern for the
+ : latter).
+ :
+ if (($cxx.id != 'gcc' || $cxx.version.major >= 7) && \
+ ($cxx.id != 'clang' || $cxx.version.major >= 6))
+ {
+ $* '' '/.*/x/' >'x' : empty
+ $* a '/a*/x/' >'x' : match
+ $* aa '/b*/x/' == 1 : no-match
+ }
}
: replace-match
@@ -72,6 +87,14 @@
$* abc '/a(b)c/x\1y/' >xby : match
$* abcd '/a(b)c/x\1yd/' == 1 : no-match
+
+ : empty-substring
+ :
+ {
+ $* '' '/.*/x/' >'x' : empty
+ $* a '/a*/x/' >'x' : match
+ $* ab '/a(c*)(b)/\1\2/' >'b' : match-mid
+ }
}
: invalid-regex-fmt
diff --git a/tests/string-parser/driver.cxx b/tests/string-parser/driver.cxx
index 8cba912..93d2088 100644
--- a/tests/string-parser/driver.cxx
+++ b/tests/string-parser/driver.cxx
@@ -14,13 +14,14 @@
using namespace std;
using namespace butl::string_parser;
-// Usage: argv[0] [-l] [-u] [-p]
+// Usage: argv[0] [-l] [-u] [-p] [-c]
//
// Read and parse lines into strings from STDIN and print them to STDOUT.
//
// -l output each string on a separate line
// -u unquote strings
// -p output positions
+// -c comments
//
int
main (int argc, char* argv[])
@@ -29,6 +30,7 @@ try
bool spl (false); // Print string per line.
bool unquote (false);
bool pos (false);
+ bool comments (false);
for (int i (1); i != argc; ++i)
{
@@ -40,6 +42,8 @@ try
unquote = true;
else if (o == "-p")
pos = true;
+ else if (o == "-c")
+ comments = true;
else
assert (false);
}
@@ -51,11 +55,8 @@ try
cout.exceptions (ios::failbit | ios::badbit);
- string l;
- while (getline (cin, l))
+ auto print = [spl, pos] (const vector<pair<string, size_t>>& v)
{
- vector<pair<string, size_t>> v (parse_quoted_position (l, unquote));
-
if (!spl)
{
for (auto b (v.cbegin ()), i (b), e (v.cend ()); i != e; ++i)
@@ -81,6 +82,19 @@ try
cout << s.first << endl;
}
}
+ };
+
+ if (!comments)
+ {
+ string l;
+ while (getline (cin, l))
+ print (parse_quoted_position (l, unquote));
+ }
+ else
+ {
+ string s;
+ getline (cin, s, '\0');
+ print (parse_quoted_position (s, unquote, true /* comments */));
}
return 0;
diff --git a/tests/string-parser/testscript b/tests/string-parser/testscript
index d484e01..05c2807 100644
--- a/tests/string-parser/testscript
+++ b/tests/string-parser/testscript
@@ -30,6 +30,36 @@
x "y z
EOO
}
+
+ : comments
+ :
+ {
+ $* -c <<EOI >>EOO
+ # Comment 1
+ #
+ abc #xyz
+
+ # Comment 2
+ #
+ abc#
+
+ "# not a comment 3" #not-a-comment4
+
+ "abc
+ # not a comment 5
+ "
+ # Comment 6
+ EOI
+ abc
+ #xyz
+ abc#
+ "# not a comment 3"
+ #not-a-comment4
+ "abc
+ # not a comment 5
+ "
+ EOO
+ }
}
: invalid