aboutsummaryrefslogtreecommitdiff
path: root/libbutl/builtin.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbutl/builtin.cxx')
-rw-r--r--libbutl/builtin.cxx492
1 files changed, 389 insertions, 103 deletions
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx
index 5c1c875..2755bf1 100644
--- a/libbutl/builtin.cxx
+++ b/libbutl/builtin.cxx
@@ -1,28 +1,16 @@
// file : libbutl/builtin.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/builtin.mxx>
-#endif
+#include <libbutl/builtin.hxx>
#ifdef _WIN32
# include <libbutl/win32-utility.hxx>
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <map>
-#include <string>
-#include <vector>
-#include <thread>
-#include <utility> // move(), forward()
-#include <cstdint> // uint*_t
-#include <functional>
-
#include <ios>
#include <chrono>
#include <cerrno>
+#include <cassert>
#include <ostream>
#include <sstream>
#include <cstdlib> // strtoull()
@@ -30,41 +18,16 @@
#include <exception>
#include <system_error>
-#endif
+#include <libbutl/regex.hxx>
+#include <libbutl/path-io.hxx>
+#include <libbutl/utility.hxx> // operator<<(ostream,exception),
+ // throw_generic_error()
+#include <libbutl/optional.hxx>
+#include <libbutl/filesystem.hxx>
+#include <libbutl/small-vector.hxx>
#include <libbutl/builtin-options.hxx>
-#ifdef __cpp_modules_ts
-module butl.builtin;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-import std.threading;
-#endif
-import butl.path;
-import butl.fdstream;
-import butl.timestamp;
-#endif
-
-import butl.regex;
-import butl.path_io;
-import butl.utility; // operator<<(ostream,exception),
- // throw_generic_error()
-import butl.optional;
-import butl.filesystem;
-import butl.small_vector;
-#else
-#include <libbutl/regex.mxx>
-#include <libbutl/path-io.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
-
// Strictly speaking a builtin which reads/writes from/to standard streams
// must be asynchronous so that the caller can communicate with it through
// pipes without being blocked on I/O operations. However, as an optimization,
@@ -507,7 +470,7 @@ namespace butl
if (cbs.create)
call (fail, cbs.create, to, false /* pre */);
- for (const auto& de: dir_iterator (from, false /* ignore_dangling */))
+ for (const auto& de: dir_iterator (from, dir_iterator::no_follow))
{
path f (from / de.path ());
path t (to / de.path ());
@@ -853,6 +816,314 @@ namespace butl
return builtin (r = 0);
}
+ // find <start-path>... [-name <pattern>]
+ // [-type <type>]
+ // [-mindepth <depth>]
+ // [-maxdepth <depth>]
+ //
+ // Note: must be executed asynchronously.
+ //
+ static uint8_t
+ find (const strings& args,
+ auto_fd in, auto_fd out, auto_fd err,
+ const dir_path& cwd,
+ const builtin_callbacks& cbs) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ()));
+
+ // Note that on some errors we will issue diagnostics but continue the
+ // search and return with non-zero code at the end. This is consistent
+ // with how major implementations behave (see below).
+ //
+ bool error_occured (false);
+ auto error = [&cerr, &error_occured] (bool fail = false)
+ {
+ error_occured = true;
+ return error_record (cerr, fail, "find");
+ };
+
+ auto fail = [&error] () {return error (true /* fail */);};
+
+ try
+ {
+ in.close ();
+ ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ()));
+
+ // Parse arguments.
+ //
+ cli::vector_scanner scan (args);
+
+ // Currently, we don't expect any options.
+ //
+ parse<find_options> (scan, args, cbs.parse_option, fail);
+
+ // Parse path arguments until the first primary (starts with '-') is
+ // encountered.
+ //
+ small_vector<path, 1> paths;
+
+ while (scan.more ())
+ {
+ if (*scan.peek () == '-')
+ break;
+
+ try
+ {
+ paths.emplace_back (scan.next ());
+ }
+ catch (const invalid_path& e)
+ {
+ fail () << "invalid path '" << e.path << "'";
+ }
+ }
+
+ // Note that POSIX doesn't explicitly describe the behavior if no paths
+ // are specified on the command line. On Linux the current directory is
+ // assumed in this case. We, however, will follow the FreeBSD behavior
+ // and fail since this seems to be less error-prone.
+ //
+ if (paths.empty ())
+ fail () << "missing start path";
+
+ // Parse primaries.
+ //
+ optional<string> name;
+ optional<entry_type> type;
+ optional<uint64_t> min_depth;
+ optional<uint64_t> max_depth;
+
+ while (scan.more ())
+ {
+ const char* p (scan.next ());
+
+ // Return the string value of the current primary. Fail if absent or
+ // empty, unless empty value is allowed.
+ //
+ auto str = [p, &scan, &fail] (bool allow_empty = false)
+ {
+ if (!scan.more ())
+ {
+ fail () << "missing value for primary '" << p << "'";
+ }
+
+ string n (p); // Save for diagnostics.
+ string r (scan.next ());
+
+ if (r.empty () && !allow_empty)
+ fail () << "empty value for primary '" << n << "'";
+
+ return r;
+ };
+
+ // Return the unsigned numeric value of the current primary. Fail if
+ // absent or is not a valid number.
+ //
+ auto num = [p, &str, &fail] ()
+ {
+ string n (p); // Save for diagnostics.
+ string s (str ());
+
+ const char* b (s.c_str ());
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ uint64_t r (strtoull (b, &e, 10)); // Can't throw.
+
+ if (errno == ERANGE || e != b + s.size ())
+ fail () << "invalid value '" << s << "' for primary '" << n << "'";
+
+ return r;
+ };
+
+ if (strcmp (p, "-name") == 0)
+ {
+ // Note that the empty never-matching pattern is allowed.
+ //
+ name = str (true /* allow_empty */);
+ }
+ else if (strcmp (p, "-type") == 0)
+ {
+ string s (str ());
+ char t (s.size () == 1 ? s[0] : '\0');
+
+ switch (t)
+ {
+ case 'f': type = entry_type::regular; break;
+ case 'd': type = entry_type::directory; break;
+ case 'l': type = entry_type::symlink; break;
+ default: fail () << "invalid value '" << s << "' for primary '-type'";
+ }
+ }
+ else if (strcmp (p, "-mindepth") == 0)
+ {
+ min_depth = num ();
+ }
+ else if (strcmp (p, "-maxdepth") == 0)
+ {
+ max_depth = num ();
+ }
+ else
+ fail () << "unknown primary '" << p << "'";
+ }
+
+ // Print the path if the expression evaluates to true for it. Traverse
+ // further down if the path refers to a directory and the maximum depth
+ // is not specified or is not reached.
+ //
+ // Note that paths for evaluating/printing (pp) and for
+ // stating/traversing (ap) are passed separately. The former is
+ // potentially relative and the latter is absolute. Also note that
+ // for optimization we separately pass the base name simple path.
+ //
+ auto find = [&cout,
+ &name,
+ &type,
+ &min_depth,
+ &max_depth,
+ &fail] (const path& pp,
+ const path& ap,
+ const path& bp,
+ entry_type t,
+ uint64_t level,
+ const auto& find) -> void
+ {
+ // Print the path if no primary evaluates to false.
+ //
+ if ((!type || *type == t) &&
+ (!min_depth || level >= *min_depth) &&
+ (!name || path_match (bp.string (), *name)))
+ {
+ // Print the trailing directory separator, if present.
+ //
+ if (pp.to_directory ())
+ {
+ // The trailing directory separator can only be present for
+ // paths specified on the command line.
+ //
+ assert (level == 0);
+
+ cout << pp.representation () << '\n';
+ }
+ else
+ cout << pp << '\n';
+ }
+
+ // Traverse the directory, unless the max depth is specified and
+ // reached.
+ //
+ if (t == entry_type::directory && (!max_depth || level < *max_depth))
+ try
+ {
+ for (const auto& de: dir_iterator (path_cast<dir_path> (ap),
+ dir_iterator::no_follow))
+ {
+ find (pp / de.path (),
+ ap / de.path (),
+ de.path (),
+ de.ltype (),
+ level + 1,
+ find);
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to scan directory '" << pp << "': " << e;
+ }
+ };
+
+ dir_path wd;
+
+ for (const path& p: paths)
+ {
+ // Complete the path if it is relative, so that we can properly stat
+ // it and, potentially, traverse. Note that we don't normalize it
+ // since POSIX requires that the paths should be evaluated (by
+ // primaries) and printed unaltered.
+ //
+ path ap;
+
+ if (p.relative ())
+ {
+ if (wd.empty () && cwd.relative ())
+ wd = current_directory (cwd, fail);
+
+ ap = (!wd.empty () ? wd : cwd) / p;
+ }
+
+ // Issue an error if the path is empty, doesn't exist, or has the
+ // trailing directory separator but refers to a non-directory.
+ //
+ // Note that POSIX doesn't explicitly describe the behavior if any of
+ // the above happens. We will follow the behavior which is common for
+ // both Linux and FreeBSD by issuing the diagnostics, proceeding to
+ // the subsequent paths, and returning with non-zero code at the end.
+ //
+ if (p.empty ())
+ {
+ error () << "empty path";
+ continue;
+ }
+
+ const path& fp (!ap.empty () ? ap : p);
+ pair<bool, entry_stat> pe;
+
+ try
+ {
+ pe = path_entry (fp);
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to stat '" << p << "': " << e;
+ }
+
+ if (!pe.first)
+ {
+ error () << "'" << p << "' doesn't exists";
+ continue;
+ }
+
+ entry_type t (pe.second.type);
+
+ if (p.to_directory () && t != entry_type::directory)
+ {
+ error () << "'" << p << "' is not a directory";
+ continue;
+ }
+
+ find (p, fp, p.leaf (), t, 0 /* level */, find);
+ }
+
+ cout.close ();
+ r = !error_occured ? 0 : 1;
+ }
+ // Can be thrown while closing cin or creating, writing to, or closing
+ // cout or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error () << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+ catch (const cli::exception& e)
+ {
+ error () << e;
+ }
+
+ cerr.close ();
+ return r;
+ }
+ // In particular, handles io_error exception potentially thrown while
+ // creating, writing to, or closing cerr.
+ //
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
// Create a symlink to a file or directory at the specified path and calling
// the hook for the created filesystem entries. The paths must be absolute
// and normalized. Fall back to creating a hardlink, if symlink creation is
@@ -1597,13 +1868,24 @@ namespace butl
//
auto_rmfile rm;
+ if (in == nullfd)
+ in = fddup (stdin_fd ());
+
+ if (out == nullfd)
+ out = fddup (stdout_fd ());
+
+ // Turn the streams into the binary mode to preserve the original line
+ // endings.
+ //
+ fdmode (in.get (), fdstream_mode::binary);
+ fdmode (out.get (), fdstream_mode::binary);
+
// Do not throw when failbit is set (getline() failed to extract any
// character).
//
- ifdstream cin (in != nullfd ? move (in) : fddup (stdin_fd ()),
- ifdstream::badbit);
+ ifdstream cin (move (in), ifdstream::badbit);
- ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ()));
+ ofdstream cout (move (out));
// Parse arguments.
//
@@ -1621,15 +1903,6 @@ namespace butl
string replacement;
bool global;
bool print;
-
- subst (const string& re, bool ic, string rp, bool gl, bool pr)
- //
- // Note that ECMAScript is implied if no grammar flag is specified.
- //
- : regex (re, ic ? regex::icase : regex::ECMAScript),
- replacement (move (rp)),
- global (gl),
- print (pr) {}
};
small_vector<subst, 1> substs;
@@ -1652,57 +1925,59 @@ namespace butl
if (delim == '\\' || delim == '\n')
fail () << "invalid delimiter for 's' command in '" << v << "'";
- size_t p (v.find (delim, 2));
- if (p == string::npos)
- fail () << "unterminated 's' command regex in '" << v << "'";
-
- string regex (v, 2, p - 2);
-
- // Empty regex matches nothing, so not of much use.
- //
- if (regex.empty ())
- fail () << "empty regex in 's' command in '" << v << "'";
-
- size_t b (p + 1);
- p = v.find (delim, b);
- if (p == string::npos)
- fail () << "unterminated 's' command replacement in '" << v << "'";
-
- string replacement (v, b, p - b);
-
- // Parse the substitute command flags.
+ // Parse the substitute command regex (as string), replacement, and
+ // flags.
//
+ pair<string, string> rf;
bool icase (false);
bool global (false);
bool print (false);
- char c;
- for (++p; (c = v[p]) != '\0'; ++p)
+ try
{
- switch (c)
+ size_t e;
+ rf = regex_replace_parse (v.c_str () + 1, v.size () - 1, e);
+
+ char c;
+ for (size_t i (e + 1); (c = v[i]) != '\0'; ++i)
{
- case 'i': icase = true; break;
- case 'g': global = true; break;
- case 'p': print = true; break;
- default:
+ switch (c)
{
- fail () << "invalid 's' command flag '" << c << "' in '" << v
- << "'";
+ case 'i': icase = true; break;
+ case 'g': global = true; break;
+ case 'p': print = true; break;
+ default:
+ {
+ fail () << "invalid 's' command flag '" << c << "' in '" << v
+ << "'";
+ }
}
}
}
+ catch (const invalid_argument& e)
+ {
+ fail () << "invalid 's' command '" << v << "': " << e;
+ }
+ // Parse the regex and add the substitution to the list.
+ //
try
{
- substs.emplace_back (regex, icase,
- move (replacement),
- global, print);
+ // Note that ECMAScript is implied if no grammar flag is specified.
+ //
+ regex re (rf.first, icase ? regex::icase : regex::ECMAScript);
+
+ substs.push_back ({move (re),
+ move (rf.second),
+ global,
+ print});
}
catch (const regex_error& e)
{
// Print regex_error description if meaningful (no space).
//
- fail () << "invalid regex '" << regex << "' in '" << v << "'" << e;
+ fail () << "invalid regex '" << rf.first << "' in '" << v << "'"
+ << e;
}
}
@@ -1747,7 +2022,8 @@ namespace butl
cout.open (fdopen (tp,
fdopen_mode::out |
fdopen_mode::truncate |
- fdopen_mode::create,
+ fdopen_mode::create |
+ fdopen_mode::binary,
path_permissions (p)));
}
catch (const io_error& e)
@@ -1771,7 +2047,7 @@ namespace butl
if (!p.empty ())
{
cin.close (); // Flush and close.
- cin.open (p);
+ cin.open (p, fdopen_mode::binary);
}
// Read until failbit is set (throw on badbit).
@@ -1779,6 +2055,13 @@ namespace butl
string ps;
while (getline (cin, ps))
{
+ // Remember the line ending type and, if it is CRLF, strip the
+ // trailing '\r'.
+ //
+ bool crlf (!ps.empty () && ps.back() == '\r');
+ if (crlf)
+ ps.pop_back();
+
bool prn (!ops.quiet ());
for (const subst& s: substs)
@@ -1808,10 +2091,11 @@ namespace butl
}
// Add newline regardless whether the source line is newline-
- // terminated or not (in accordance with POSIX).
+ // terminated or not (in accordance with POSIX), preserving the
+ // original line ending.
//
if (prn)
- cout << ps << '\n';
+ cout << ps << (crlf ? "\r\n" : "\n");
}
cin.close ();
@@ -1916,6 +2200,7 @@ namespace butl
if (!a.empty () && a[0] != '-' && a[0] != '+')
{
char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
n = strtoull (a.c_str (), &e, 10); // Can't throw.
if (errno != ERANGE && e == a.c_str () + a.size ())
@@ -2200,17 +2485,17 @@ namespace butl
{
unique_ptr<builtin::async_state> s (
new builtin::async_state (
+ r,
[fn,
- &r,
&args,
in = move (in), out = move (out), err = move (err),
&cwd,
- &cbs] () mutable noexcept
+ &cbs] () mutable noexcept -> uint8_t
{
- r = fn (args,
- move (in), move (out), move (err),
- cwd,
- cbs);
+ return fn (args,
+ move (in), move (out), move (err),
+ cwd,
+ cbs);
}));
return builtin (r, move (s));
@@ -2250,6 +2535,7 @@ namespace butl
{"diff", {nullptr, 2}},
{"echo", {&async_impl<&echo>, 2}},
{"false", {&false_, 0}},
+ {"find", {&async_impl<&find>, 2}},
{"ln", {&sync_impl<&ln>, 2}},
{"mkdir", {&sync_impl<&mkdir>, 2}},
{"mv", {&sync_impl<&mv>, 2}},
@@ -2269,7 +2555,7 @@ namespace butl
{
if (state_ != nullptr)
{
- unique_lock<mutex> l (state_->mutex);
+ unique_lock l (state_->mutex);
if (!state_->finished)
state_->condv.wait (l, [this] {return state_->finished;});
@@ -2284,7 +2570,7 @@ namespace butl
{
if (state_ != nullptr)
{
- unique_lock<mutex> l (state_->mutex);
+ unique_lock l (state_->mutex);
if (!state_->finished &&
!state_->condv.wait_for (l, tm, [this] {return state_->finished;}))