From 3282252e09d960108cea00dc464799111be42220 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 23 May 2023 20:44:50 +0300 Subject: Add find builtin --- libbutl/builtin-options.cxx | 263 +++++++++++++++++++++++++++++++++++ libbutl/builtin-options.hxx | 61 +++++++++ libbutl/builtin-options.ixx | 3 + libbutl/builtin.cli | 5 + libbutl/builtin.cxx | 309 ++++++++++++++++++++++++++++++++++++++++++ tests/builtin/driver.cxx | 7 + tests/builtin/find.testscript | 276 +++++++++++++++++++++++++++++++++++++ 7 files changed, 924 insertions(+) create mode 100644 tests/builtin/find.testscript diff --git a/libbutl/builtin-options.cxx b/libbutl/builtin-options.cxx index a2eb781..3178555 100644 --- a/libbutl/builtin-options.cxx +++ b/libbutl/builtin-options.cxx @@ -1271,6 +1271,269 @@ namespace butl return r; } + // find_options + // + + find_options:: + find_options () + { + } + + bool find_options:: + parse (int& argc, + char** argv, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (argc, argv, erase); + bool r = _parse (s, opt, arg); + return r; + } + + bool find_options:: + parse (int start, + int& argc, + char** argv, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (start, argc, argv, erase); + bool r = _parse (s, opt, arg); + return r; + } + + bool find_options:: + parse (int& argc, + char** argv, + int& end, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (argc, argv, erase); + bool r = _parse (s, opt, arg); + end = s.end (); + return r; + } + + bool find_options:: + parse (int start, + int& argc, + char** argv, + int& end, + bool erase, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + ::butl::cli::argv_scanner s (start, argc, argv, erase); + bool r = _parse (s, opt, arg); + end = s.end (); + return r; + } + + bool find_options:: + parse (::butl::cli::scanner& s, + ::butl::cli::unknown_mode opt, + ::butl::cli::unknown_mode arg) + { + bool r = _parse (s, opt, arg); + return r; + } + + typedef + std::map + _cli_find_options_map; + + static _cli_find_options_map _cli_find_options_map_; + + struct _cli_find_options_map_init + { + _cli_find_options_map_init () + { + } + }; + + static _cli_find_options_map_init _cli_find_options_map_init_; + + bool find_options:: + _parse (const char* o, ::butl::cli::scanner& s) + { + _cli_find_options_map::const_iterator i (_cli_find_options_map_.find (o)); + + if (i != _cli_find_options_map_.end ()) + { + (*(i->second)) (*this, s); + return true; + } + + return false; + } + + bool find_options:: + _parse (::butl::cli::scanner& s, + ::butl::cli::unknown_mode opt_mode, + ::butl::cli::unknown_mode arg_mode) + { + // Can't skip combined flags (--no-combined-flags). + // + assert (opt_mode != ::butl::cli::unknown_mode::skip); + + bool r = false; + bool opt = true; + + while (s.more ()) + { + const char* o = s.peek (); + + if (std::strcmp (o, "--") == 0) + { + opt = false; + } + + if (opt) + { + if (_parse (o, s)) + { + r = true; + continue; + } + + if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') + { + // Handle combined option values. + // + std::string co; + if (const char* v = std::strchr (o, '=')) + { + co.assign (o, 0, v - o); + ++v; + + int ac (2); + char* av[] = + { + const_cast (co.c_str ()), + const_cast (v) + }; + + ::butl::cli::argv_scanner ns (0, ac, av); + + if (_parse (co.c_str (), ns)) + { + // Parsed the option but not its value? + // + if (ns.end () != 2) + throw ::butl::cli::invalid_value (co, v); + + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = co.c_str (); + } + } + + // Handle combined flags. + // + char cf[3]; + { + const char* p = o + 1; + for (; *p != '\0'; ++p) + { + if (!((*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9'))) + break; + } + + if (*p == '\0') + { + for (p = o + 1; *p != '\0'; ++p) + { + std::strcpy (cf, "-"); + cf[1] = *p; + cf[2] = '\0'; + + int ac (1); + char* av[] = + { + cf + }; + + ::butl::cli::argv_scanner ns (0, ac, av); + + if (!_parse (cf, ns)) + break; + } + + if (*p == '\0') + { + // All handled. + // + s.next (); + r = true; + continue; + } + else + { + // Set the unknown option and fall through. + // + o = cf; + } + } + } + + switch (opt_mode) + { + case ::butl::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::butl::cli::unknown_mode::stop: + { + break; + } + case ::butl::cli::unknown_mode::fail: + { + throw ::butl::cli::unknown_option (o); + } + } + + break; + } + } + + switch (arg_mode) + { + case ::butl::cli::unknown_mode::skip: + { + s.skip (); + r = true; + continue; + } + case ::butl::cli::unknown_mode::stop: + { + break; + } + case ::butl::cli::unknown_mode::fail: + { + throw ::butl::cli::unknown_argument (o); + } + } + + break; + } + + return r; + } + // ln_options // diff --git a/libbutl/builtin-options.hxx b/libbutl/builtin-options.hxx index 6288e54..ad34b1b 100644 --- a/libbutl/builtin-options.hxx +++ b/libbutl/builtin-options.hxx @@ -484,6 +484,67 @@ namespace butl bool utc_; }; + class find_options + { + public: + find_options (); + + // Return true if anything has been parsed. + // + bool + parse (int& argc, + char** argv, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int start, + int& argc, + char** argv, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int& argc, + char** argv, + int& end, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (int start, + int& argc, + char** argv, + int& end, + bool erase = false, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + bool + parse (::butl::cli::scanner&, + ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail, + ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop); + + // Option accessors. + // + // Implementation details. + // + protected: + bool + _parse (const char*, ::butl::cli::scanner&); + + private: + bool + _parse (::butl::cli::scanner&, + ::butl::cli::unknown_mode option, + ::butl::cli::unknown_mode argument); + + public: + }; + class ln_options { public: diff --git a/libbutl/builtin-options.ixx b/libbutl/builtin-options.ixx index b977f16..e118156 100644 --- a/libbutl/builtin-options.ixx +++ b/libbutl/builtin-options.ixx @@ -193,6 +193,9 @@ namespace butl return this->utc_; } + // find_options + // + // ln_options // diff --git a/libbutl/builtin.cli b/libbutl/builtin.cli index adc47fa..23a5708 100644 --- a/libbutl/builtin.cli +++ b/libbutl/builtin.cli @@ -34,6 +34,11 @@ namespace butl bool --utc|-u; }; + class find_options + { + // No options so far (expression/primaries handled as arguments). + }; + class ln_options { bool --symbolic|-s; diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx index a225c95..2755bf1 100644 --- a/libbutl/builtin.cxx +++ b/libbutl/builtin.cxx @@ -816,6 +816,314 @@ namespace butl return builtin (r = 0); } + // find ... [-name ] + // [-type ] + // [-mindepth ] + // [-maxdepth ] + // + // Note: must be executed asynchronously. + // + static uint8_t + find (const strings& args, + auto_fd in, auto_fd out, auto_fd err, + const dir_path& cwd, + const builtin_callbacks& cbs) noexcept + try + { + uint8_t r (1); + ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ())); + + // Note that on some errors we will issue diagnostics but continue the + // search and return with non-zero code at the end. This is consistent + // with how major implementations behave (see below). + // + bool error_occured (false); + auto error = [&cerr, &error_occured] (bool fail = false) + { + error_occured = true; + return error_record (cerr, fail, "find"); + }; + + auto fail = [&error] () {return error (true /* fail */);}; + + try + { + in.close (); + ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ())); + + // Parse arguments. + // + cli::vector_scanner scan (args); + + // Currently, we don't expect any options. + // + parse (scan, args, cbs.parse_option, fail); + + // Parse path arguments until the first primary (starts with '-') is + // encountered. + // + small_vector paths; + + while (scan.more ()) + { + if (*scan.peek () == '-') + break; + + try + { + paths.emplace_back (scan.next ()); + } + catch (const invalid_path& e) + { + fail () << "invalid path '" << e.path << "'"; + } + } + + // Note that POSIX doesn't explicitly describe the behavior if no paths + // are specified on the command line. On Linux the current directory is + // assumed in this case. We, however, will follow the FreeBSD behavior + // and fail since this seems to be less error-prone. + // + if (paths.empty ()) + fail () << "missing start path"; + + // Parse primaries. + // + optional name; + optional type; + optional min_depth; + optional max_depth; + + while (scan.more ()) + { + const char* p (scan.next ()); + + // Return the string value of the current primary. Fail if absent or + // empty, unless empty value is allowed. + // + auto str = [p, &scan, &fail] (bool allow_empty = false) + { + if (!scan.more ()) + { + fail () << "missing value for primary '" << p << "'"; + } + + string n (p); // Save for diagnostics. + string r (scan.next ()); + + if (r.empty () && !allow_empty) + fail () << "empty value for primary '" << n << "'"; + + return r; + }; + + // Return the unsigned numeric value of the current primary. Fail if + // absent or is not a valid number. + // + auto num = [p, &str, &fail] () + { + string n (p); // Save for diagnostics. + string s (str ()); + + const char* b (s.c_str ()); + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + uint64_t r (strtoull (b, &e, 10)); // Can't throw. + + if (errno == ERANGE || e != b + s.size ()) + fail () << "invalid value '" << s << "' for primary '" << n << "'"; + + return r; + }; + + if (strcmp (p, "-name") == 0) + { + // Note that the empty never-matching pattern is allowed. + // + name = str (true /* allow_empty */); + } + else if (strcmp (p, "-type") == 0) + { + string s (str ()); + char t (s.size () == 1 ? s[0] : '\0'); + + switch (t) + { + case 'f': type = entry_type::regular; break; + case 'd': type = entry_type::directory; break; + case 'l': type = entry_type::symlink; break; + default: fail () << "invalid value '" << s << "' for primary '-type'"; + } + } + else if (strcmp (p, "-mindepth") == 0) + { + min_depth = num (); + } + else if (strcmp (p, "-maxdepth") == 0) + { + max_depth = num (); + } + else + fail () << "unknown primary '" << p << "'"; + } + + // Print the path if the expression evaluates to true for it. Traverse + // further down if the path refers to a directory and the maximum depth + // is not specified or is not reached. + // + // Note that paths for evaluating/printing (pp) and for + // stating/traversing (ap) are passed separately. The former is + // potentially relative and the latter is absolute. Also note that + // for optimization we separately pass the base name simple path. + // + auto find = [&cout, + &name, + &type, + &min_depth, + &max_depth, + &fail] (const path& pp, + const path& ap, + const path& bp, + entry_type t, + uint64_t level, + const auto& find) -> void + { + // Print the path if no primary evaluates to false. + // + if ((!type || *type == t) && + (!min_depth || level >= *min_depth) && + (!name || path_match (bp.string (), *name))) + { + // Print the trailing directory separator, if present. + // + if (pp.to_directory ()) + { + // The trailing directory separator can only be present for + // paths specified on the command line. + // + assert (level == 0); + + cout << pp.representation () << '\n'; + } + else + cout << pp << '\n'; + } + + // Traverse the directory, unless the max depth is specified and + // reached. + // + if (t == entry_type::directory && (!max_depth || level < *max_depth)) + try + { + for (const auto& de: dir_iterator (path_cast (ap), + dir_iterator::no_follow)) + { + find (pp / de.path (), + ap / de.path (), + de.path (), + de.ltype (), + level + 1, + find); + } + } + catch (const system_error& e) + { + fail () << "unable to scan directory '" << pp << "': " << e; + } + }; + + dir_path wd; + + for (const path& p: paths) + { + // Complete the path if it is relative, so that we can properly stat + // it and, potentially, traverse. Note that we don't normalize it + // since POSIX requires that the paths should be evaluated (by + // primaries) and printed unaltered. + // + path ap; + + if (p.relative ()) + { + if (wd.empty () && cwd.relative ()) + wd = current_directory (cwd, fail); + + ap = (!wd.empty () ? wd : cwd) / p; + } + + // Issue an error if the path is empty, doesn't exist, or has the + // trailing directory separator but refers to a non-directory. + // + // Note that POSIX doesn't explicitly describe the behavior if any of + // the above happens. We will follow the behavior which is common for + // both Linux and FreeBSD by issuing the diagnostics, proceeding to + // the subsequent paths, and returning with non-zero code at the end. + // + if (p.empty ()) + { + error () << "empty path"; + continue; + } + + const path& fp (!ap.empty () ? ap : p); + pair pe; + + try + { + pe = path_entry (fp); + } + catch (const system_error& e) + { + fail () << "unable to stat '" << p << "': " << e; + } + + if (!pe.first) + { + error () << "'" << p << "' doesn't exists"; + continue; + } + + entry_type t (pe.second.type); + + if (p.to_directory () && t != entry_type::directory) + { + error () << "'" << p << "' is not a directory"; + continue; + } + + find (p, fp, p.leaf (), t, 0 /* level */, find); + } + + cout.close (); + r = !error_occured ? 0 : 1; + } + // Can be thrown while closing cin or creating, writing to, or closing + // cout or writing to cerr. + // + catch (const io_error& e) + { + error () << e; + } + catch (const failed&) + { + // Diagnostics has already been issued. + } + catch (const cli::exception& e) + { + error () << e; + } + + cerr.close (); + return r; + } + // In particular, handles io_error exception potentially thrown while + // creating, writing to, or closing cerr. + // + catch (const std::exception&) + { + return 1; + } + // Create a symlink to a file or directory at the specified path and calling // the hook for the created filesystem entries. The paths must be absolute // and normalized. Fall back to creating a hardlink, if symlink creation is @@ -2227,6 +2535,7 @@ namespace butl {"diff", {nullptr, 2}}, {"echo", {&async_impl<&echo>, 2}}, {"false", {&false_, 0}}, + {"find", {&async_impl<&find>, 2}}, {"ln", {&sync_impl<&ln>, 2}}, {"mkdir", {&sync_impl<&mkdir>, 2}}, {"mv", {&sync_impl<&mv>, 2}}, diff --git a/tests/builtin/driver.cxx b/tests/builtin/driver.cxx index 7a0193f..bdf3fa9 100644 --- a/tests/builtin/driver.cxx +++ b/tests/builtin/driver.cxx @@ -28,6 +28,13 @@ using namespace std; using namespace butl; +// Disable arguments globbing that may be enabled by default for MinGW runtime +// (see tests/wildcard/driver.cxx for details). +// +#ifdef __MINGW32__ +int _CRT_glob = 0; +#endif + inline ostream& operator<< (ostream& os, const path& p) { diff --git a/tests/builtin/find.testscript b/tests/builtin/find.testscript new file mode 100644 index 0000000..b09822c --- /dev/null +++ b/tests/builtin/find.testscript @@ -0,0 +1,276 @@ +# file : tests/builtin/find.testscript +# license : MIT; see accompanying LICENSE file + +posix = ($cxx.target.class != 'windows') + +test.arguments = "find" + +: no-paths +: +$* 2>"find: missing start path" == 1 + +: no-paths-primary +: +$* -name foo 2>"find: unknown option '-name'" == 1 + +: unknown-primary +: +$* . -foo 2>"find: unknown primary '-foo'" == 1 + + +: no-primary-value +: +$* . -name 2>"find: missing value for primary '-name'" == 1 + +: empty-primary-value +: +$* . -type '' 2>"find: empty value for primary '-type'" == 1 + +: invalid-type-primary +: +$* . -type foo 2>"find: invalid value 'foo' for primary '-type'" == 1 + +: invalid-mindepth-primary +: +$* . -mindepth 12a 2>"find: invalid value '12a' for primary '-mindepth'" == 1 + +: path-not-exists +: +{ + mkdir d; + $* x d >'d' 2>"find: 'x' doesn't exists" != 0 +} + +: path +: +{ + : relative + : + { + : no-cwd + : + { + mkdir a; + touch a/b; + + $* a >>/EOO + a + a/b + EOO + } + + : absolute-cwd + : + : When cross-testing we cannot guarantee that host absolute paths are + : recognized by the target process. + : + if ($test.target == $build.host) + { + test.options += -d $~/a; + mkdir a; + touch a/b; + + $* b >'b' + } + + : relative-cwd + : + if ($test.target == $build.host) + { + test.options += -d a; + mkdir a; + touch a/b; + + $* b >'b' + } + } + + : non-normalized + : + { + mkdir a; + touch a/b; + + # Note that the path specified on the command line is used unaltered. + # + s = ($posix ? '/' : '\'); + + $* ./a >>"EOO" + ./a + ./a$(s)b + EOO + } + + : absolute + : + { + mkdir a; + touch a/b; + + $* $~/a >>/"EOO" + $~/a + $~/a/b + EOO + } + + : non-existent + : + { + touch a b; + + $* a x b >>EOO 2>"find: 'x' doesn't exists" != 0 + a + b + EOO + } + + : non-directory + : + { + touch a b c; + + $* a b/ c >>EOO 2>"find: 'b' is not a directory" != 0 + a + c + EOO + } + + : trailing-slash + : + { + mkdir -p a/b; + + $* a >>/"EOO"; + a + a/b + EOO + + $* a/ >>"EOO" + a/ + a/b + EOO + } +} + +: name-primary +: +{ + : basic + : + { + mkdir a; + touch a/ab a/ba; + + $* . -name 'a*' >>/EOO; + ./a + ./a/ab + EOO + + $* . -name 'b*' >>/EOO; + ./a/ba + EOO + + $* a -name 'a*' >>/EOO + a + a/ab + EOO + } + + : empty + : + { + touch a; + + $* . -name '' + } +} + +: type-primary +: +{ + : regular + : + { + mkdir -p a/b; + touch a/b/c; + + $* a -type f >>/EOO + a/b/c + EOO + } + + : directory + : + { + mkdir -p a/b; + touch a/b/c; + + $* a -type d >>/EOO + a + a/b + EOO + } + + : symlink + : + if $posix + { + mkdir -p a/b; + touch a/b/c; + ln -s c a/b/d; + + $* a -type l >>/EOO + a/b/d + EOO + } +} + +: mindepth-primary +: +{ + mkdir -p a/b/c; + + $* a -mindepth 0 >>/EOO; + a + a/b + a/b/c + EOO + + $* a -mindepth 1 >>/EOO; + a/b + a/b/c + EOO + + $* a -mindepth 2 >>/EOO; + a/b/c + EOO + + $* a -mindepth 3 +} + +: maxdepth-primary +: +{ + mkdir -p a/b/c; + + $* a -maxdepth 0 >>/EOO; + a + EOO + + $* a -maxdepth 1 >>/EOO; + a + a/b + EOO + + $* a -maxdepth 2 >>/EOO; + a + a/b + a/b/c + EOO + + $* a -maxdepth 3 >>/EOO + a + a/b + a/b/c + EOO +} -- cgit v1.1