diff options
Diffstat (limited to 'libbutl/builtin.cxx')
-rw-r--r-- | libbutl/builtin.cxx | 492 |
1 files changed, 389 insertions, 103 deletions
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index 5c1c875..2755bf1 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -1,28 +1,16 @@ // file : libbutl/builtin.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include <libbutl/builtin.mxx> -#endif +#include <libbutl/builtin.hxx> #ifdef _WIN32 # include <libbutl/win32-utility.hxx> #endif -#include <cassert> - -#ifndef __cpp_lib_modules_ts -#include <map> -#include <string> -#include <vector> -#include <thread> -#include <utility> // move(), forward() -#include <cstdint> // uint*_t -#include <functional> - #include <ios> #include <chrono> #include <cerrno> +#include <cassert> #include <ostream> #include <sstream> #include <cstdlib> // strtoull() @@ -30,41 +18,16 @@ #include <exception> #include <system_error> -#endif +#include <libbutl/regex.hxx> +#include <libbutl/path-io.hxx> +#include <libbutl/utility.hxx> // operator<<(ostream,exception), + // throw_generic_error() +#include <libbutl/optional.hxx> +#include <libbutl/filesystem.hxx> +#include <libbutl/small-vector.hxx> #include <libbutl/builtin-options.hxx> -#ifdef __cpp_modules_ts -module butl.builtin; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -import std.threading; -#endif -import butl.path; -import butl.fdstream; -import butl.timestamp; -#endif - -import butl.regex; -import butl.path_io; -import butl.utility; // operator<<(ostream,exception), - // throw_generic_error() -import butl.optional; -import butl.filesystem; -import butl.small_vector; -#else -#include <libbutl/regex.mxx> -#include <libbutl/path-io.mxx> -#include <libbutl/utility.mxx> -#include <libbutl/optional.mxx> -#include <libbutl/filesystem.mxx> -#include <libbutl/small-vector.mxx> -#endif - // Strictly speaking a builtin which reads/writes from/to standard streams // must be asynchronous so that the caller can communicate with it through // pipes without being blocked on I/O operations. However, as an optimization, @@ -507,7 +470,7 @@ namespace butl if (cbs.create) call (fail, cbs.create, to, false /* pre */); - for (const auto& de: dir_iterator (from, false /* ignore_dangling */)) + for (const auto& de: dir_iterator (from, dir_iterator::no_follow)) { path f (from / de.path ()); path t (to / de.path ()); @@ -853,6 +816,314 @@ namespace butl return builtin (r = 0); } + // find <start-path>... [-name <pattern>] + // [-type <type>] + // [-mindepth <depth>] + // [-maxdepth <depth>] + // + // Note: must be executed asynchronously. + // + static uint8_t + find (const strings& args, + auto_fd in, auto_fd out, auto_fd err, + const dir_path& cwd, + const builtin_callbacks& cbs) noexcept + try + { + uint8_t r (1); + ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ())); + + // Note that on some errors we will issue diagnostics but continue the + // search and return with non-zero code at the end. This is consistent + // with how major implementations behave (see below). + // + bool error_occured (false); + auto error = [&cerr, &error_occured] (bool fail = false) + { + error_occured = true; + return error_record (cerr, fail, "find"); + }; + + auto fail = [&error] () {return error (true /* fail */);}; + + try + { + in.close (); + ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ())); + + // Parse arguments. + // + cli::vector_scanner scan (args); + + // Currently, we don't expect any options. + // + parse<find_options> (scan, args, cbs.parse_option, fail); + + // Parse path arguments until the first primary (starts with '-') is + // encountered. + // + small_vector<path, 1> paths; + + while (scan.more ()) + { + if (*scan.peek () == '-') + break; + + try + { + paths.emplace_back (scan.next ()); + } + catch (const invalid_path& e) + { + fail () << "invalid path '" << e.path << "'"; + } + } + + // Note that POSIX doesn't explicitly describe the behavior if no paths + // are specified on the command line. On Linux the current directory is + // assumed in this case. We, however, will follow the FreeBSD behavior + // and fail since this seems to be less error-prone. + // + if (paths.empty ()) + fail () << "missing start path"; + + // Parse primaries. + // + optional<string> name; + optional<entry_type> type; + optional<uint64_t> min_depth; + optional<uint64_t> max_depth; + + while (scan.more ()) + { + const char* p (scan.next ()); + + // Return the string value of the current primary. Fail if absent or + // empty, unless empty value is allowed. + // + auto str = [p, &scan, &fail] (bool allow_empty = false) + { + if (!scan.more ()) + { + fail () << "missing value for primary '" << p << "'"; + } + + string n (p); // Save for diagnostics. + string r (scan.next ()); + + if (r.empty () && !allow_empty) + fail () << "empty value for primary '" << n << "'"; + + return r; + }; + + // Return the unsigned numeric value of the current primary. Fail if + // absent or is not a valid number. + // + auto num = [p, &str, &fail] () + { + string n (p); // Save for diagnostics. + string s (str ()); + + const char* b (s.c_str ()); + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + uint64_t r (strtoull (b, &e, 10)); // Can't throw. + + if (errno == ERANGE || e != b + s.size ()) + fail () << "invalid value '" << s << "' for primary '" << n << "'"; + + return r; + }; + + if (strcmp (p, "-name") == 0) + { + // Note that the empty never-matching pattern is allowed. + // + name = str (true /* allow_empty */); + } + else if (strcmp (p, "-type") == 0) + { + string s (str ()); + char t (s.size () == 1 ? s[0] : '\0'); + + switch (t) + { + case 'f': type = entry_type::regular; break; + case 'd': type = entry_type::directory; break; + case 'l': type = entry_type::symlink; break; + default: fail () << "invalid value '" << s << "' for primary '-type'"; + } + } + else if (strcmp (p, "-mindepth") == 0) + { + min_depth = num (); + } + else if (strcmp (p, "-maxdepth") == 0) + { + max_depth = num (); + } + else + fail () << "unknown primary '" << p << "'"; + } + + // Print the path if the expression evaluates to true for it. Traverse + // further down if the path refers to a directory and the maximum depth + // is not specified or is not reached. + // + // Note that paths for evaluating/printing (pp) and for + // stating/traversing (ap) are passed separately. The former is + // potentially relative and the latter is absolute. Also note that + // for optimization we separately pass the base name simple path. + // + auto find = [&cout, + &name, + &type, + &min_depth, + &max_depth, + &fail] (const path& pp, + const path& ap, + const path& bp, + entry_type t, + uint64_t level, + const auto& find) -> void + { + // Print the path if no primary evaluates to false. + // + if ((!type || *type == t) && + (!min_depth || level >= *min_depth) && + (!name || path_match (bp.string (), *name))) + { + // Print the trailing directory separator, if present. + // + if (pp.to_directory ()) + { + // The trailing directory separator can only be present for + // paths specified on the command line. + // + assert (level == 0); + + cout << pp.representation () << '\n'; + } + else + cout << pp << '\n'; + } + + // Traverse the directory, unless the max depth is specified and + // reached. + // + if (t == entry_type::directory && (!max_depth || level < *max_depth)) + try + { + for (const auto& de: dir_iterator (path_cast<dir_path> (ap), + dir_iterator::no_follow)) + { + find (pp / de.path (), + ap / de.path (), + de.path (), + de.ltype (), + level + 1, + find); + } + } + catch (const system_error& e) + { + fail () << "unable to scan directory '" << pp << "': " << e; + } + }; + + dir_path wd; + + for (const path& p: paths) + { + // Complete the path if it is relative, so that we can properly stat + // it and, potentially, traverse. Note that we don't normalize it + // since POSIX requires that the paths should be evaluated (by + // primaries) and printed unaltered. + // + path ap; + + if (p.relative ()) + { + if (wd.empty () && cwd.relative ()) + wd = current_directory (cwd, fail); + + ap = (!wd.empty () ? wd : cwd) / p; + } + + // Issue an error if the path is empty, doesn't exist, or has the + // trailing directory separator but refers to a non-directory. + // + // Note that POSIX doesn't explicitly describe the behavior if any of + // the above happens. We will follow the behavior which is common for + // both Linux and FreeBSD by issuing the diagnostics, proceeding to + // the subsequent paths, and returning with non-zero code at the end. + // + if (p.empty ()) + { + error () << "empty path"; + continue; + } + + const path& fp (!ap.empty () ? ap : p); + pair<bool, entry_stat> pe; + + try + { + pe = path_entry (fp); + } + catch (const system_error& e) + { + fail () << "unable to stat '" << p << "': " << e; + } + + if (!pe.first) + { + error () << "'" << p << "' doesn't exists"; + continue; + } + + entry_type t (pe.second.type); + + if (p.to_directory () && t != entry_type::directory) + { + error () << "'" << p << "' is not a directory"; + continue; + } + + find (p, fp, p.leaf (), t, 0 /* level */, find); + } + + cout.close (); + r = !error_occured ? 0 : 1; + } + // Can be thrown while closing cin or creating, writing to, or closing + // cout or writing to cerr. + // + catch (const io_error& e) + { + error () << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + catch (const cli::exception& e) + { + error () << e; + } + + cerr.close (); + return r; + } + // In particular, handles io_error exception potentially thrown while + // creating, writing to, or closing cerr. + // + catch (const std::exception&) + { + return 1; + } + // Create a symlink to a file or directory at the specified path and calling // the hook for the created filesystem entries. The paths must be absolute // and normalized. Fall back to creating a hardlink, if symlink creation is @@ -1597,13 +1868,24 @@ namespace butl // auto_rmfile rm; + if (in == nullfd) + in = fddup (stdin_fd ()); + + if (out == nullfd) + out = fddup (stdout_fd ()); + + // Turn the streams into the binary mode to preserve the original line + // endings. + // + fdmode (in.get (), fdstream_mode::binary); + fdmode (out.get (), fdstream_mode::binary); + // Do not throw when failbit is set (getline() failed to extract any // character). // - ifdstream cin (in != nullfd ? move (in) : fddup (stdin_fd ()), - ifdstream::badbit); + ifdstream cin (move (in), ifdstream::badbit); - ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ())); + ofdstream cout (move (out)); // Parse arguments. // @@ -1621,15 +1903,6 @@ namespace butl string replacement; bool global; bool print; - - subst (const string& re, bool ic, string rp, bool gl, bool pr) - // - // Note that ECMAScript is implied if no grammar flag is specified. - // - : regex (re, ic ? regex::icase : regex::ECMAScript), - replacement (move (rp)), - global (gl), - print (pr) {} }; small_vector<subst, 1> substs; @@ -1652,57 +1925,59 @@ namespace butl if (delim == '\\' || delim == '\n') fail () << "invalid delimiter for 's' command in '" << v << "'"; - size_t p (v.find (delim, 2)); - if (p == string::npos) - fail () << "unterminated 's' command regex in '" << v << "'"; - - string regex (v, 2, p - 2); - - // Empty regex matches nothing, so not of much use. - // - if (regex.empty ()) - fail () << "empty regex in 's' command in '" << v << "'"; - - size_t b (p + 1); - p = v.find (delim, b); - if (p == string::npos) - fail () << "unterminated 's' command replacement in '" << v << "'"; - - string replacement (v, b, p - b); - - // Parse the substitute command flags. + // Parse the substitute command regex (as string), replacement, and + // flags. // + pair<string, string> rf; bool icase (false); bool global (false); bool print (false); - char c; - for (++p; (c = v[p]) != '\0'; ++p) + try { - switch (c) + size_t e; + rf = regex_replace_parse (v.c_str () + 1, v.size () - 1, e); + + char c; + for (size_t i (e + 1); (c = v[i]) != '\0'; ++i) { - case 'i': icase = true; break; - case 'g': global = true; break; - case 'p': print = true; break; - default: + switch (c) { - fail () << "invalid 's' command flag '" << c << "' in '" << v - << "'"; + case 'i': icase = true; break; + case 'g': global = true; break; + case 'p': print = true; break; + default: + { + fail () << "invalid 's' command flag '" << c << "' in '" << v + << "'"; + } } } } + catch (const invalid_argument& e) + { + fail () << "invalid 's' command '" << v << "': " << e; + } + // Parse the regex and add the substitution to the list. + // try { - substs.emplace_back (regex, icase, - move (replacement), - global, print); + // Note that ECMAScript is implied if no grammar flag is specified. + // + regex re (rf.first, icase ? regex::icase : regex::ECMAScript); + + substs.push_back ({move (re), + move (rf.second), + global, + print}); } catch (const regex_error& e) { // Print regex_error description if meaningful (no space). // - fail () << "invalid regex '" << regex << "' in '" << v << "'" << e; + fail () << "invalid regex '" << rf.first << "' in '" << v << "'" + << e; } } @@ -1747,7 +2022,8 @@ namespace butl cout.open (fdopen (tp, fdopen_mode::out | fdopen_mode::truncate | - fdopen_mode::create, + fdopen_mode::create | + fdopen_mode::binary, path_permissions (p))); } catch (const io_error& e) @@ -1771,7 +2047,7 @@ namespace butl if (!p.empty ()) { cin.close (); // Flush and close. - cin.open (p); + cin.open (p, fdopen_mode::binary); } // Read until failbit is set (throw on badbit). @@ -1779,6 +2055,13 @@ namespace butl string ps; while (getline (cin, ps)) { + // Remember the line ending type and, if it is CRLF, strip the + // trailing '\r'. + // + bool crlf (!ps.empty () && ps.back() == '\r'); + if (crlf) + ps.pop_back(); + bool prn (!ops.quiet ()); for (const subst& s: substs) @@ -1808,10 +2091,11 @@ namespace butl } // Add newline regardless whether the source line is newline- - // terminated or not (in accordance with POSIX). + // terminated or not (in accordance with POSIX), preserving the + // original line ending. // if (prn) - cout << ps << '\n'; + cout << ps << (crlf ? "\r\n" : "\n"); } cin.close (); @@ -1916,6 +2200,7 @@ namespace butl if (!a.empty () && a[0] != '-' && a[0] != '+') { char* e (nullptr); + errno = 0; // We must clear it according to POSIX. n = strtoull (a.c_str (), &e, 10); // Can't throw. if (errno != ERANGE && e == a.c_str () + a.size ()) @@ -2200,17 +2485,17 @@ namespace butl { unique_ptr<builtin::async_state> s ( new builtin::async_state ( + r, [fn, - &r, &args, in = move (in), out = move (out), err = move (err), &cwd, - &cbs] () mutable noexcept + &cbs] () mutable noexcept -> uint8_t { - r = fn (args, - move (in), move (out), move (err), - cwd, - cbs); + return fn (args, + move (in), move (out), move (err), + cwd, + cbs); })); return builtin (r, move (s)); @@ -2250,6 +2535,7 @@ namespace butl {"diff", {nullptr, 2}}, {"echo", {&async_impl<&echo>, 2}}, {"false", {&false_, 0}}, + {"find", {&async_impl<&find>, 2}}, {"ln", {&sync_impl<&ln>, 2}}, {"mkdir", {&sync_impl<&mkdir>, 2}}, {"mv", {&sync_impl<&mv>, 2}}, @@ -2269,7 +2555,7 @@ namespace butl { if (state_ != nullptr) { - unique_lock<mutex> l (state_->mutex); + unique_lock l (state_->mutex); if (!state_->finished) state_->condv.wait (l, [this] {return state_->finished;}); @@ -2284,7 +2570,7 @@ namespace butl { if (state_ != nullptr) { - unique_lock<mutex> l (state_->mutex); + unique_lock l (state_->mutex); if (!state_->finished && !state_->condv.wait_for (l, tm, [this] {return state_->finished;})) |