diff options
Diffstat (limited to 'libbutl/builtin.cxx')
-rw-r--r-- | libbutl/builtin.cxx | 613 |
1 files changed, 485 insertions, 128 deletions
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index c6083b6..2755bf1 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -1,28 +1,16 @@ // file : libbutl/builtin.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file -#ifndef __cpp_modules_ts -#include <libbutl/builtin.mxx> -#endif +#include <libbutl/builtin.hxx> #ifdef _WIN32 # include <libbutl/win32-utility.hxx> #endif -#include <cassert> - -#ifndef __cpp_lib_modules_ts -#include <map> -#include <string> -#include <vector> -#include <thread> -#include <utility> // move(), forward() -#include <cstdint> // uint*_t -#include <functional> - #include <ios> #include <chrono> #include <cerrno> +#include <cassert> #include <ostream> #include <sstream> #include <cstdlib> // strtoull() @@ -30,41 +18,16 @@ #include <exception> #include <system_error> -#endif +#include <libbutl/regex.hxx> +#include <libbutl/path-io.hxx> +#include <libbutl/utility.hxx> // operator<<(ostream,exception), + // throw_generic_error() +#include <libbutl/optional.hxx> +#include <libbutl/filesystem.hxx> +#include <libbutl/small-vector.hxx> #include <libbutl/builtin-options.hxx> -#ifdef __cpp_modules_ts -module butl.builtin; - -// Only imports additional to interface. -#ifdef __clang__ -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -import std.threading; -#endif -import butl.path; -import butl.fdstream; -import butl.timestamp; -#endif - -import butl.regex; -import butl.path_io; -import butl.utility; // operator<<(ostream,exception), - // throw_generic_error() -import butl.optional; -import butl.filesystem; -import butl.small_vector; -#else -#include <libbutl/regex.mxx> -#include <libbutl/path-io.mxx> -#include <libbutl/utility.mxx> -#include <libbutl/optional.mxx> -#include <libbutl/filesystem.mxx> -#include <libbutl/small-vector.mxx> -#endif - // Strictly speaking a builtin which reads/writes from/to standard streams // must be asynchronous so that the caller can communicate with it through // pipes without being blocked on I/O operations. However, as an optimization, @@ -507,7 +470,7 @@ namespace butl if (cbs.create) call (fail, cbs.create, to, false /* pre */); - for (const auto& de: dir_iterator (from, false /* ignore_dangling */)) + for (const auto& de: dir_iterator (from, dir_iterator::no_follow)) { path f (from / de.path ()); path t (to / de.path ()); @@ -853,6 +816,314 @@ namespace butl return builtin (r = 0); } + // find <start-path>... [-name <pattern>] + // [-type <type>] + // [-mindepth <depth>] + // [-maxdepth <depth>] + // + // Note: must be executed asynchronously. + // + static uint8_t + find (const strings& args, + auto_fd in, auto_fd out, auto_fd err, + const dir_path& cwd, + const builtin_callbacks& cbs) noexcept + try + { + uint8_t r (1); + ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ())); + + // Note that on some errors we will issue diagnostics but continue the + // search and return with non-zero code at the end. This is consistent + // with how major implementations behave (see below). + // + bool error_occured (false); + auto error = [&cerr, &error_occured] (bool fail = false) + { + error_occured = true; + return error_record (cerr, fail, "find"); + }; + + auto fail = [&error] () {return error (true /* fail */);}; + + try + { + in.close (); + ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ())); + + // Parse arguments. + // + cli::vector_scanner scan (args); + + // Currently, we don't expect any options. + // + parse<find_options> (scan, args, cbs.parse_option, fail); + + // Parse path arguments until the first primary (starts with '-') is + // encountered. + // + small_vector<path, 1> paths; + + while (scan.more ()) + { + if (*scan.peek () == '-') + break; + + try + { + paths.emplace_back (scan.next ()); + } + catch (const invalid_path& e) + { + fail () << "invalid path '" << e.path << "'"; + } + } + + // Note that POSIX doesn't explicitly describe the behavior if no paths + // are specified on the command line. On Linux the current directory is + // assumed in this case. We, however, will follow the FreeBSD behavior + // and fail since this seems to be less error-prone. + // + if (paths.empty ()) + fail () << "missing start path"; + + // Parse primaries. + // + optional<string> name; + optional<entry_type> type; + optional<uint64_t> min_depth; + optional<uint64_t> max_depth; + + while (scan.more ()) + { + const char* p (scan.next ()); + + // Return the string value of the current primary. Fail if absent or + // empty, unless empty value is allowed. + // + auto str = [p, &scan, &fail] (bool allow_empty = false) + { + if (!scan.more ()) + { + fail () << "missing value for primary '" << p << "'"; + } + + string n (p); // Save for diagnostics. + string r (scan.next ()); + + if (r.empty () && !allow_empty) + fail () << "empty value for primary '" << n << "'"; + + return r; + }; + + // Return the unsigned numeric value of the current primary. Fail if + // absent or is not a valid number. + // + auto num = [p, &str, &fail] () + { + string n (p); // Save for diagnostics. + string s (str ()); + + const char* b (s.c_str ()); + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + uint64_t r (strtoull (b, &e, 10)); // Can't throw. + + if (errno == ERANGE || e != b + s.size ()) + fail () << "invalid value '" << s << "' for primary '" << n << "'"; + + return r; + }; + + if (strcmp (p, "-name") == 0) + { + // Note that the empty never-matching pattern is allowed. + // + name = str (true /* allow_empty */); + } + else if (strcmp (p, "-type") == 0) + { + string s (str ()); + char t (s.size () == 1 ? s[0] : '\0'); + + switch (t) + { + case 'f': type = entry_type::regular; break; + case 'd': type = entry_type::directory; break; + case 'l': type = entry_type::symlink; break; + default: fail () << "invalid value '" << s << "' for primary '-type'"; + } + } + else if (strcmp (p, "-mindepth") == 0) + { + min_depth = num (); + } + else if (strcmp (p, "-maxdepth") == 0) + { + max_depth = num (); + } + else + fail () << "unknown primary '" << p << "'"; + } + + // Print the path if the expression evaluates to true for it. Traverse + // further down if the path refers to a directory and the maximum depth + // is not specified or is not reached. + // + // Note that paths for evaluating/printing (pp) and for + // stating/traversing (ap) are passed separately. The former is + // potentially relative and the latter is absolute. Also note that + // for optimization we separately pass the base name simple path. + // + auto find = [&cout, + &name, + &type, + &min_depth, + &max_depth, + &fail] (const path& pp, + const path& ap, + const path& bp, + entry_type t, + uint64_t level, + const auto& find) -> void + { + // Print the path if no primary evaluates to false. + // + if ((!type || *type == t) && + (!min_depth || level >= *min_depth) && + (!name || path_match (bp.string (), *name))) + { + // Print the trailing directory separator, if present. + // + if (pp.to_directory ()) + { + // The trailing directory separator can only be present for + // paths specified on the command line. + // + assert (level == 0); + + cout << pp.representation () << '\n'; + } + else + cout << pp << '\n'; + } + + // Traverse the directory, unless the max depth is specified and + // reached. + // + if (t == entry_type::directory && (!max_depth || level < *max_depth)) + try + { + for (const auto& de: dir_iterator (path_cast<dir_path> (ap), + dir_iterator::no_follow)) + { + find (pp / de.path (), + ap / de.path (), + de.path (), + de.ltype (), + level + 1, + find); + } + } + catch (const system_error& e) + { + fail () << "unable to scan directory '" << pp << "': " << e; + } + }; + + dir_path wd; + + for (const path& p: paths) + { + // Complete the path if it is relative, so that we can properly stat + // it and, potentially, traverse. Note that we don't normalize it + // since POSIX requires that the paths should be evaluated (by + // primaries) and printed unaltered. + // + path ap; + + if (p.relative ()) + { + if (wd.empty () && cwd.relative ()) + wd = current_directory (cwd, fail); + + ap = (!wd.empty () ? wd : cwd) / p; + } + + // Issue an error if the path is empty, doesn't exist, or has the + // trailing directory separator but refers to a non-directory. + // + // Note that POSIX doesn't explicitly describe the behavior if any of + // the above happens. We will follow the behavior which is common for + // both Linux and FreeBSD by issuing the diagnostics, proceeding to + // the subsequent paths, and returning with non-zero code at the end. + // + if (p.empty ()) + { + error () << "empty path"; + continue; + } + + const path& fp (!ap.empty () ? ap : p); + pair<bool, entry_stat> pe; + + try + { + pe = path_entry (fp); + } + catch (const system_error& e) + { + fail () << "unable to stat '" << p << "': " << e; + } + + if (!pe.first) + { + error () << "'" << p << "' doesn't exists"; + continue; + } + + entry_type t (pe.second.type); + + if (p.to_directory () && t != entry_type::directory) + { + error () << "'" << p << "' is not a directory"; + continue; + } + + find (p, fp, p.leaf (), t, 0 /* level */, find); + } + + cout.close (); + r = !error_occured ? 0 : 1; + } + // Can be thrown while closing cin or creating, writing to, or closing + // cout or writing to cerr. + // + catch (const io_error& e) + { + error () << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + catch (const cli::exception& e) + { + error () << e; + } + + cerr.close (); + return r; + } + // In particular, handles io_error exception potentially thrown while + // creating, writing to, or closing cerr. + // + catch (const std::exception&) + { + return 1; + } + // Create a symlink to a file or directory at the specified path and calling // the hook for the created filesystem entries. The paths must be absolute // and normalized. Fall back to creating a hardlink, if symlink creation is @@ -1569,7 +1840,7 @@ namespace butl return 1; } - // sed [-n|--quiet] [-i|--in-place] -e|--expression <script> [<file>] + // sed [-n|--quiet] [-i|--in-place] (-e|--expression <script>)... [<file>] // // Note: must be executed asynchronously. // @@ -1597,13 +1868,24 @@ namespace butl // auto_rmfile rm; + if (in == nullfd) + in = fddup (stdin_fd ()); + + if (out == nullfd) + out = fddup (stdout_fd ()); + + // Turn the streams into the binary mode to preserve the original line + // endings. + // + fdmode (in.get (), fdstream_mode::binary); + fdmode (out.get (), fdstream_mode::binary); + // Do not throw when failbit is set (getline() failed to extract any // character). // - ifdstream cin (in != nullfd ? move (in) : fddup (stdin_fd ()), - ifdstream::badbit); + ifdstream cin (move (in), ifdstream::badbit); - ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ())); + ofdstream cout (move (out)); // Parse arguments. // @@ -1615,71 +1897,88 @@ namespace butl if (ops.expression ().empty ()) fail () << "missing script"; - // Only a single script is supported. - // - if (ops.expression ().size () != 1) - fail () << "multiple scripts"; - - struct + struct subst { - string regex; + std::regex regex; string replacement; - bool icase = false; - bool global = false; - bool print = false; - } subst; + bool global; + bool print; + }; + + small_vector<subst, 1> substs; + for (const string& v: ops.expression ()) { - const string& v (ops.expression ()[0]); if (v.empty ()) fail () << "empty script"; if (v[0] != 's') - fail () << "only 's' command supported"; + fail () << "unknown command in '" << v << "': only 's' command " + << "supported"; // Parse the substitute command. // if (v.size () < 2) - fail () << "no delimiter for 's' command"; + fail () << "no delimiter for 's' command in '" << v << "'"; char delim (v[1]); if (delim == '\\' || delim == '\n') - fail () << "invalid delimiter for 's' command"; - - size_t p (v.find (delim, 2)); - if (p == string::npos) - fail () << "unterminated 's' command regex"; - - subst.regex.assign (v, 2, p - 2); + fail () << "invalid delimiter for 's' command in '" << v << "'"; - // Empty regex matches nothing, so not of much use. + // Parse the substitute command regex (as string), replacement, and + // flags. // - if (subst.regex.empty ()) - fail () << "empty regex in 's' command"; + pair<string, string> rf; + bool icase (false); + bool global (false); + bool print (false); - size_t b (p + 1); - p = v.find (delim, b); - if (p == string::npos) - fail () << "unterminated 's' command replacement"; - - subst.replacement.assign (v, b, p - b); - - // Parse the substitute command flags. - // - char c; - for (++p; (c = v[p]) != '\0'; ++p) + try { - switch (c) + size_t e; + rf = regex_replace_parse (v.c_str () + 1, v.size () - 1, e); + + char c; + for (size_t i (e + 1); (c = v[i]) != '\0'; ++i) { - case 'i': subst.icase = true; break; - case 'g': subst.global = true; break; - case 'p': subst.print = true; break; - default: + switch (c) { - fail () << "invalid 's' command flag '" << c << "'"; + case 'i': icase = true; break; + case 'g': global = true; break; + case 'p': print = true; break; + default: + { + fail () << "invalid 's' command flag '" << c << "' in '" << v + << "'"; + } } } } + catch (const invalid_argument& e) + { + fail () << "invalid 's' command '" << v << "': " << e; + } + + // Parse the regex and add the substitution to the list. + // + try + { + // Note that ECMAScript is implied if no grammar flag is specified. + // + regex re (rf.first, icase ? regex::icase : regex::ECMAScript); + + substs.push_back ({move (re), + move (rf.second), + global, + print}); + } + catch (const regex_error& e) + { + // Print regex_error description if meaningful (no space). + // + fail () << "invalid regex '" << rf.first << "' in '" << v << "'" + << e; + } } // Path of a file to edit. An empty path represents stdin. @@ -1723,7 +2022,8 @@ namespace butl cout.open (fdopen (tp, fdopen_mode::out | fdopen_mode::truncate | - fdopen_mode::create, + fdopen_mode::create | + fdopen_mode::binary, path_permissions (p))); } catch (const io_error& e) @@ -1738,10 +2038,6 @@ namespace butl rm = auto_rmfile (tp); } - // Note that ECMAScript is implied if no grammar flag is specified. - // - regex re (subst.regex, subst.icase ? regex::icase : regex::ECMAScript); - // Edit a file or STDIN. // try @@ -1751,27 +2047,55 @@ namespace butl if (!p.empty ()) { cin.close (); // Flush and close. - cin.open (p); + cin.open (p, fdopen_mode::binary); } // Read until failbit is set (throw on badbit). // - string s; - while (getline (cin, s)) + string ps; + while (getline (cin, ps)) { - auto r (regex_replace_search ( - s, - re, - subst.replacement, - subst.global - ? regex_constants::format_default - : regex_constants::format_first_only)); + // Remember the line ending type and, if it is CRLF, strip the + // trailing '\r'. + // + bool crlf (!ps.empty () && ps.back() == '\r'); + if (crlf) + ps.pop_back(); + + bool prn (!ops.quiet ()); + + for (const subst& s: substs) + { + auto r (regex_replace_search ( + ps, + s.regex, + s.replacement, + s.global + ? regex_constants::format_default + : regex_constants::format_first_only)); + + // If the regex matches, then override the pattern space with the + // replacement result and print it and proceed to the next line, + // if requested. + // + if (r.second) + { + ps = move (r.first); + + if (s.print) + { + prn = true; + break; + } + } + } // Add newline regardless whether the source line is newline- - // terminated or not (in accordance with POSIX). + // terminated or not (in accordance with POSIX), preserving the + // original line ending. // - if (!ops.quiet () || (r.second && subst.print)) - cout << r.first << '\n'; + if (prn) + cout << ps << (crlf ? "\r\n" : "\n"); } cin.close (); @@ -1801,12 +2125,6 @@ namespace butl d << ": " << e; } } - catch (const regex_error& e) - { - // Print regex_error description if meaningful (no space). - // - error () << "invalid regex" << e; - } // Can be thrown while creating cin, cout or writing to cerr. // catch (const io_error& e) @@ -1882,6 +2200,7 @@ namespace butl if (!a.empty () && a[0] != '-' && a[0] != '+') { char* e (nullptr); + errno = 0; // We must clear it according to POSIX. n = strtoull (a.c_str (), &e, 10); // Can't throw. if (errno != ERANGE && e == a.c_str () + a.size ()) @@ -2164,17 +2483,22 @@ namespace butl const dir_path& cwd, const builtin_callbacks& cbs) { - return builtin ( - r, - thread ([fn, &r, &args, - in = move (in), - out = move (out), - err = move (err), - &cwd, - &cbs] () mutable noexcept - { - r = fn (args, move (in), move (out), move (err), cwd, cbs); - })); + unique_ptr<builtin::async_state> s ( + new builtin::async_state ( + r, + [fn, + &args, + in = move (in), out = move (out), err = move (err), + &cwd, + &cbs] () mutable noexcept -> uint8_t + { + return fn (args, + move (in), move (out), move (err), + cwd, + cbs); + })); + + return builtin (r, move (s)); } template <builtin_impl fn> @@ -2200,7 +2524,7 @@ namespace butl const builtin_callbacks& cbs) { r = fn (args, move (in), move (out), move (err), cwd, cbs); - return builtin (r, thread ()); + return builtin (r); } const builtin_map builtins @@ -2211,6 +2535,7 @@ namespace butl {"diff", {nullptr, 2}}, {"echo", {&async_impl<&echo>, 2}}, {"false", {&false_, 0}}, + {"find", {&async_impl<&find>, 2}}, {"ln", {&sync_impl<&ln>, 2}}, {"mkdir", {&sync_impl<&mkdir>, 2}}, {"mv", {&sync_impl<&mv>, 2}}, @@ -2222,4 +2547,36 @@ namespace butl {"touch", {&sync_impl<&touch>, 2}}, {"true", {&true_, 0}} }; + + // builtin + // + uint8_t builtin:: + wait () + { + if (state_ != nullptr) + { + unique_lock l (state_->mutex); + + if (!state_->finished) + state_->condv.wait (l, [this] {return state_->finished;}); + } + + return result_; + } + + template <> + optional<uint8_t> builtin:: + timed_wait (const chrono::milliseconds& tm) + { + if (state_ != nullptr) + { + unique_lock l (state_->mutex); + + if (!state_->finished && + !state_->condv.wait_for (l, tm, [this] {return state_->finished;})) + return nullopt; + } + + return result_; + } } |