From 3282252e09d960108cea00dc464799111be42220 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 23 May 2023 20:44:50 +0300 Subject: Add find builtin --- libbutl/builtin-options.cxx | 263 +++++++++++++++++++++++++++++++++++++ libbutl/builtin-options.hxx | 61 +++++++++ libbutl/builtin-options.ixx | 3 + libbutl/builtin.cli | 5 + libbutl/builtin.cxx | 309 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 641 insertions(+) (limited to 'libbutl') diff --git a/libbutl/builtin-options.cxx b/libbutl/builtin-options.cxx index a2eb781..3178555 100644 --- a/libbutl/builtin-options.cxx +++ b/libbutl/builtin-options.cxx @@ -1271,6 +1271,269 @@ namespace butl return r; } + // find_options + // + + find_options:: + find_options () + { + } + + bool find_options:: + parse (int& argc, + char** argv, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (argc, argv, erase); + bool r = _parse (s, opt, arg); + return r; + } + + bool find_options:: + parse (int start, + int& argc, + char** argv, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (start, argc, argv, erase); + bool r = _parse (s, opt, arg); + return r; + } + + bool find_options:: + parse (int& argc, + char** argv, + int& end, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (argc, argv, erase); + bool r = _parse (s, opt, arg); + end = s.end (); + return r; + } + + bool find_options:: + parse (int start, + int& argc, + char** argv, + int& end, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (start, argc, argv, erase); + bool r = _parse (s, opt, arg); + end = s.end (); + return r; + } + + bool find_options:: + parse (::butl::cli::scanner& s, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + bool r = _parse (s, opt, arg); + return r; + } + + typedef + std::map + _cli_find_options_map; + + static _cli_find_options_map _cli_find_options_map_; + + struct _cli_find_options_map_init + { + _cli_find_options_map_init () + { + } + }; + + static _cli_find_options_map_init _cli_find_options_map_init_; + + bool find_options:: + _parse (const char* o, ::butl::cli::scanner& s) + { + _cli_find_options_map::const_iterator i (_cli_find_options_map_.find (o)); + + if (i != _cli_find_options_map_.end ()) + { + (*(i->second)) (*this, s); + return true; + } + + return false; + } + + bool find_options:: + _parse (::butl::cli::scanner& s, + ::butl::cli::unknown_mode opt_mode, + ::butl::cli::unknown_mode arg_mode) + { + // Can't skip combined flags (--no-combined-flags). + // + assert (opt_mode != ::butl::cli::unknown_mode::skip); + + bool r = false; + bool opt = true; + + while (s.more ()) + { + const char* o = s.peek (); + + if (std::strcmp (o, "--") == 0) + { + opt = false; + } + + if (opt) + { + if (_parse (o, s)) + { + r = true; + continue; + } + + if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') + { + // Handle combined option values. + // + std::string co; + if (const char* v = std::strchr (o, '=')) + { + co.assign (o, 0, v - o); + ++v; + + int ac (2); + char* av[] = + { + const_cast (co.c_str ()), + const_cast (v) + }; + + ::butl::cli::argv_scanner ns (0, ac, av); + + if (_parse (co.c_str (), ns)) + { + // Parsed the option but not its value? + // + if (ns.end () != 2) + throw ::butl::cli::invalid_value (co, v); + + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = co.c_str (); + } + } + + // Handle combined flags. + // + char cf[3]; + { + const char* p = o + 1; + for (; *p != '\0'; ++p) + { + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + break; + } + + if (*p == '\0') + { + for (p = o + 1; *p != '\0'; ++p) + { + std::strcpy (cf, "-"); + cf[1] = *p; + cf[2] = '\0'; + + int ac (1); + char* av[] = + { + cf + }; + + ::butl::cli::argv_scanner ns (0, ac, av); + + if (!_parse (cf, ns)) + break; + } + + if (*p == '\0') + { + // All handled. + // + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = cf; + } + } + } + + switch (opt_mode) + { + case ::butl::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::butl::cli::unknown_mode::stop: + { + break; + } + case ::butl::cli::unknown_mode::fail: + { + throw ::butl::cli::unknown_option (o); + } + } + + break; + } + } + + switch (arg_mode) + { + case ::butl::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::butl::cli::unknown_mode::stop: + { + break; + } + case ::butl::cli::unknown_mode::fail: + { + throw ::butl::cli::unknown_argument (o); + } + } + + break; + } + + return r; + } + // ln_options // diff --git a/libbutl/builtin-options.hxx b/libbutl/builtin-options.hxx index 6288e54..ad34b1b 100644 --- a/libbutl/builtin-options.hxx +++ b/libbutl/builtin-options.hxx @@ -484,6 +484,67 @@ namespace butl bool utc_; }; + class find_options + { + public: + find_options (); + + // Return true if anything has been parsed. + // + bool + parse (int& argc, + char** argv, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int start, + int& argc, + char** argv, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int& argc, + char** argv, + int& end, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int start, + int& argc, + char** argv, + int& end, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (::butl::cli::scanner&, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + // Option accessors. + // + // Implementation details. + // + protected: + bool + _parse (const char*, ::butl::cli::scanner&); + + private: + bool + _parse (::butl::cli::scanner&, + ::butl::cli::unknown_mode option, + ::butl::cli::unknown_mode argument); + + public: + }; + class ln_options { public: diff --git a/libbutl/builtin-options.ixx b/libbutl/builtin-options.ixx index b977f16..e118156 100644 --- a/libbutl/builtin-options.ixx +++ b/libbutl/builtin-options.ixx @@ -193,6 +193,9 @@ namespace butl return this->utc_; } + // find_options + // + // ln_options // diff --git a/libbutl/builtin.cli b/libbutl/builtin.cli index adc47fa..23a5708 100644 --- a/libbutl/builtin.cli +++ b/libbutl/builtin.cli @@ -34,6 +34,11 @@ namespace butl bool --utc|-u; }; + class find_options + { + // No options so far (expression/primaries handled as arguments). + }; + class ln_options { bool --symbolic|-s; diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index a225c95..2755bf1 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -816,6 +816,314 @@ namespace butl return builtin (r = 0); } + // find ... [-name ] + // [-type ] + // [-mindepth ] + // [-maxdepth ] + // + // Note: must be executed asynchronously. + // + static uint8_t + find (const strings& args, + auto_fd in, auto_fd out, auto_fd err, + const dir_path& cwd, + const builtin_callbacks& cbs) noexcept + try + { + uint8_t r (1); + ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ())); + + // Note that on some errors we will issue diagnostics but continue the + // search and return with non-zero code at the end. This is consistent + // with how major implementations behave (see below). + // + bool error_occured (false); + auto error = [&cerr, &error_occured] (bool fail = false) + { + error_occured = true; + return error_record (cerr, fail, "find"); + }; + + auto fail = [&error] () {return error (true /* fail */);}; + + try + { + in.close (); + ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ())); + + // Parse arguments. + // + cli::vector_scanner scan (args); + + // Currently, we don't expect any options. + // + parse (scan, args, cbs.parse_option, fail); + + // Parse path arguments until the first primary (starts with '-') is + // encountered. + // + small_vector paths; + + while (scan.more ()) + { + if (*scan.peek () == '-') + break; + + try + { + paths.emplace_back (scan.next ()); + } + catch (const invalid_path& e) + { + fail () << "invalid path '" << e.path << "'"; + } + } + + // Note that POSIX doesn't explicitly describe the behavior if no paths + // are specified on the command line. On Linux the current directory is + // assumed in this case. We, however, will follow the FreeBSD behavior + // and fail since this seems to be less error-prone. + // + if (paths.empty ()) + fail () << "missing start path"; + + // Parse primaries. + // + optional name; + optional type; + optional min_depth; + optional max_depth; + + while (scan.more ()) + { + const char* p (scan.next ()); + + // Return the string value of the current primary. Fail if absent or + // empty, unless empty value is allowed. + // + auto str = [p, &scan, &fail] (bool allow_empty = false) + { + if (!scan.more ()) + { + fail () << "missing value for primary '" << p << "'"; + } + + string n (p); // Save for diagnostics. + string r (scan.next ()); + + if (r.empty () && !allow_empty) + fail () << "empty value for primary '" << n << "'"; + + return r; + }; + + // Return the unsigned numeric value of the current primary. Fail if + // absent or is not a valid number. + // + auto num = [p, &str, &fail] () + { + string n (p); // Save for diagnostics. + string s (str ()); + + const char* b (s.c_str ()); + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + uint64_t r (strtoull (b, &e, 10)); // Can't throw. + + if (errno == ERANGE || e != b + s.size ()) + fail () << "invalid value '" << s << "' for primary '" << n << "'"; + + return r; + }; + + if (strcmp (p, "-name") == 0) + { + // Note that the empty never-matching pattern is allowed. + // + name = str (true /* allow_empty */); + } + else if (strcmp (p, "-type") == 0) + { + string s (str ()); + char t (s.size () == 1 ? s[0] : '\0'); + + switch (t) + { + case 'f': type = entry_type::regular; break; + case 'd': type = entry_type::directory; break; + case 'l': type = entry_type::symlink; break; + default: fail () << "invalid value '" << s << "' for primary '-type'"; + } + } + else if (strcmp (p, "-mindepth") == 0) + { + min_depth = num (); + } + else if (strcmp (p, "-maxdepth") == 0) + { + max_depth = num (); + } + else + fail () << "unknown primary '" << p << "'"; + } + + // Print the path if the expression evaluates to true for it. Traverse + // further down if the path refers to a directory and the maximum depth + // is not specified or is not reached. + // + // Note that paths for evaluating/printing (pp) and for + // stating/traversing (ap) are passed separately. The former is + // potentially relative and the latter is absolute. Also note that + // for optimization we separately pass the base name simple path. + // + auto find = [&cout, + &name, + &type, + &min_depth, + &max_depth, + &fail] (const path& pp, + const path& ap, + const path& bp, + entry_type t, + uint64_t level, + const auto& find) -> void + { + // Print the path if no primary evaluates to false. + // + if ((!type || *type == t) && + (!min_depth || level >= *min_depth) && + (!name || path_match (bp.string (), *name))) + { + // Print the trailing directory separator, if present. + // + if (pp.to_directory ()) + { + // The trailing directory separator can only be present for + // paths specified on the command line. + // + assert (level == 0); + + cout << pp.representation () << '\n'; + } + else + cout << pp << '\n'; + } + + // Traverse the directory, unless the max depth is specified and + // reached. + // + if (t == entry_type::directory && (!max_depth || level < *max_depth)) + try + { + for (const auto& de: dir_iterator (path_cast (ap), + dir_iterator::no_follow)) + { + find (pp / de.path (), + ap / de.path (), + de.path (), + de.ltype (), + level + 1, + find); + } + } + catch (const system_error& e) + { + fail () << "unable to scan directory '" << pp << "': " << e; + } + }; + + dir_path wd; + + for (const path& p: paths) + { + // Complete the path if it is relative, so that we can properly stat + // it and, potentially, traverse. Note that we don't normalize it + // since POSIX requires that the paths should be evaluated (by + // primaries) and printed unaltered. + // + path ap; + + if (p.relative ()) + { + if (wd.empty () && cwd.relative ()) + wd = current_directory (cwd, fail); + + ap = (!wd.empty () ? wd : cwd) / p; + } + + // Issue an error if the path is empty, doesn't exist, or has the + // trailing directory separator but refers to a non-directory. + // + // Note that POSIX doesn't explicitly describe the behavior if any of + // the above happens. We will follow the behavior which is common for + // both Linux and FreeBSD by issuing the diagnostics, proceeding to + // the subsequent paths, and returning with non-zero code at the end. + // + if (p.empty ()) + { + error () << "empty path"; + continue; + } + + const path& fp (!ap.empty () ? ap : p); + pair pe; + + try + { + pe = path_entry (fp); + } + catch (const system_error& e) + { + fail () << "unable to stat '" << p << "': " << e; + } + + if (!pe.first) + { + error () << "'" << p << "' doesn't exists"; + continue; + } + + entry_type t (pe.second.type); + + if (p.to_directory () && t != entry_type::directory) + { + error () << "'" << p << "' is not a directory"; + continue; + } + + find (p, fp, p.leaf (), t, 0 /* level */, find); + } + + cout.close (); + r = !error_occured ? 0 : 1; + } + // Can be thrown while closing cin or creating, writing to, or closing + // cout or writing to cerr. + // + catch (const io_error& e) + { + error () << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + catch (const cli::exception& e) + { + error () << e; + } + + cerr.close (); + return r; + } + // In particular, handles io_error exception potentially thrown while + // creating, writing to, or closing cerr. + // + catch (const std::exception&) + { + return 1; + } + // Create a symlink to a file or directory at the specified path and calling // the hook for the created filesystem entries. The paths must be absolute // and normalized. Fall back to creating a hardlink, if symlink creation is @@ -2227,6 +2535,7 @@ namespace butl {"diff", {nullptr, 2}}, {"echo", {&async_impl<&echo>, 2}}, {"false", {&false_, 0}}, + {"find", {&async_impl<&find>, 2}}, {"ln", {&sync_impl<&ln>, 2}}, {"mkdir", {&sync_impl<&mkdir>, 2}}, {"mv", {&sync_impl<&mv>, 2}}, -- cgit v1.1