aboutsummaryrefslogtreecommitdiff
path: root/libbutl
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2023-05-23 20:44:50 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2023-05-25 14:12:42 +0300
commit3282252e09d960108cea00dc464799111be42220 (patch)
treeb5c577eb89ee5cf31864fab5cde589934f479bc3 /libbutl
parent5de2cb9ae1ed011469a208ae07df913f7bee88eb (diff)
Add find builtin
Diffstat (limited to 'libbutl')
-rw-r--r--libbutl/builtin-options.cxx263
-rw-r--r--libbutl/builtin-options.hxx61
-rw-r--r--libbutl/builtin-options.ixx3
-rw-r--r--libbutl/builtin.cli5
-rw-r--r--libbutl/builtin.cxx309
5 files changed, 641 insertions, 0 deletions
diff --git a/libbutl/builtin-options.cxx b/libbutl/builtin-options.cxx
index a2eb781..3178555 100644
--- a/libbutl/builtin-options.cxx
+++ b/libbutl/builtin-options.cxx
@@ -1271,6 +1271,269 @@ namespace butl
return r;
}
+ // find_options
+ //
+
+ find_options::
+ find_options ()
+ {
+ }
+
+ bool find_options::
+ parse (int& argc,
+ char** argv,
+ bool erase,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ ::butl::cli::argv_scanner s (argc, argv, erase);
+ bool r = _parse (s, opt, arg);
+ return r;
+ }
+
+ bool find_options::
+ parse (int start,
+ int& argc,
+ char** argv,
+ bool erase,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ ::butl::cli::argv_scanner s (start, argc, argv, erase);
+ bool r = _parse (s, opt, arg);
+ return r;
+ }
+
+ bool find_options::
+ parse (int& argc,
+ char** argv,
+ int& end,
+ bool erase,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ ::butl::cli::argv_scanner s (argc, argv, erase);
+ bool r = _parse (s, opt, arg);
+ end = s.end ();
+ return r;
+ }
+
+ bool find_options::
+ parse (int start,
+ int& argc,
+ char** argv,
+ int& end,
+ bool erase,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ ::butl::cli::argv_scanner s (start, argc, argv, erase);
+ bool r = _parse (s, opt, arg);
+ end = s.end ();
+ return r;
+ }
+
+ bool find_options::
+ parse (::butl::cli::scanner& s,
+ ::butl::cli::unknown_mode opt,
+ ::butl::cli::unknown_mode arg)
+ {
+ bool r = _parse (s, opt, arg);
+ return r;
+ }
+
+ typedef
+ std::map<std::string, void (*) (find_options&, ::butl::cli::scanner&)>
+ _cli_find_options_map;
+
+ static _cli_find_options_map _cli_find_options_map_;
+
+ struct _cli_find_options_map_init
+ {
+ _cli_find_options_map_init ()
+ {
+ }
+ };
+
+ static _cli_find_options_map_init _cli_find_options_map_init_;
+
+ bool find_options::
+ _parse (const char* o, ::butl::cli::scanner& s)
+ {
+ _cli_find_options_map::const_iterator i (_cli_find_options_map_.find (o));
+
+ if (i != _cli_find_options_map_.end ())
+ {
+ (*(i->second)) (*this, s);
+ return true;
+ }
+
+ return false;
+ }
+
+ bool find_options::
+ _parse (::butl::cli::scanner& s,
+ ::butl::cli::unknown_mode opt_mode,
+ ::butl::cli::unknown_mode arg_mode)
+ {
+ // Can't skip combined flags (--no-combined-flags).
+ //
+ assert (opt_mode != ::butl::cli::unknown_mode::skip);
+
+ bool r = false;
+ bool opt = true;
+
+ while (s.more ())
+ {
+ const char* o = s.peek ();
+
+ if (std::strcmp (o, "--") == 0)
+ {
+ opt = false;
+ }
+
+ if (opt)
+ {
+ if (_parse (o, s))
+ {
+ r = true;
+ continue;
+ }
+
+ if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0')
+ {
+ // Handle combined option values.
+ //
+ std::string co;
+ if (const char* v = std::strchr (o, '='))
+ {
+ co.assign (o, 0, v - o);
+ ++v;
+
+ int ac (2);
+ char* av[] =
+ {
+ const_cast<char*> (co.c_str ()),
+ const_cast<char*> (v)
+ };
+
+ ::butl::cli::argv_scanner ns (0, ac, av);
+
+ if (_parse (co.c_str (), ns))
+ {
+ // Parsed the option but not its value?
+ //
+ if (ns.end () != 2)
+ throw ::butl::cli::invalid_value (co, v);
+
+ s.next ();
+ r = true;
+ continue;
+ }
+ else
+ {
+ // Set the unknown option and fall through.
+ //
+ o = co.c_str ();
+ }
+ }
+
+ // Handle combined flags.
+ //
+ char cf[3];
+ {
+ const char* p = o + 1;
+ for (; *p != '\0'; ++p)
+ {
+ if (!((*p >= 'a' && *p <= 'z') ||
+ (*p >= 'A' && *p <= 'Z') ||
+ (*p >= '0' && *p <= '9')))
+ break;
+ }
+
+ if (*p == '\0')
+ {
+ for (p = o + 1; *p != '\0'; ++p)
+ {
+ std::strcpy (cf, "-");
+ cf[1] = *p;
+ cf[2] = '\0';
+
+ int ac (1);
+ char* av[] =
+ {
+ cf
+ };
+
+ ::butl::cli::argv_scanner ns (0, ac, av);
+
+ if (!_parse (cf, ns))
+ break;
+ }
+
+ if (*p == '\0')
+ {
+ // All handled.
+ //
+ s.next ();
+ r = true;
+ continue;
+ }
+ else
+ {
+ // Set the unknown option and fall through.
+ //
+ o = cf;
+ }
+ }
+ }
+
+ switch (opt_mode)
+ {
+ case ::butl::cli::unknown_mode::skip:
+ {
+ s.skip ();
+ r = true;
+ continue;
+ }
+ case ::butl::cli::unknown_mode::stop:
+ {
+ break;
+ }
+ case ::butl::cli::unknown_mode::fail:
+ {
+ throw ::butl::cli::unknown_option (o);
+ }
+ }
+
+ break;
+ }
+ }
+
+ switch (arg_mode)
+ {
+ case ::butl::cli::unknown_mode::skip:
+ {
+ s.skip ();
+ r = true;
+ continue;
+ }
+ case ::butl::cli::unknown_mode::stop:
+ {
+ break;
+ }
+ case ::butl::cli::unknown_mode::fail:
+ {
+ throw ::butl::cli::unknown_argument (o);
+ }
+ }
+
+ break;
+ }
+
+ return r;
+ }
+
// ln_options
//
diff --git a/libbutl/builtin-options.hxx b/libbutl/builtin-options.hxx
index 6288e54..ad34b1b 100644
--- a/libbutl/builtin-options.hxx
+++ b/libbutl/builtin-options.hxx
@@ -484,6 +484,67 @@ namespace butl
bool utc_;
};
+ class find_options
+ {
+ public:
+ find_options ();
+
+ // Return true if anything has been parsed.
+ //
+ bool
+ parse (int& argc,
+ char** argv,
+ bool erase = false,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ bool
+ parse (int start,
+ int& argc,
+ char** argv,
+ bool erase = false,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ bool
+ parse (int& argc,
+ char** argv,
+ int& end,
+ bool erase = false,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ bool
+ parse (int start,
+ int& argc,
+ char** argv,
+ int& end,
+ bool erase = false,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ bool
+ parse (::butl::cli::scanner&,
+ ::butl::cli::unknown_mode option = ::butl::cli::unknown_mode::fail,
+ ::butl::cli::unknown_mode argument = ::butl::cli::unknown_mode::stop);
+
+ // Option accessors.
+ //
+ // Implementation details.
+ //
+ protected:
+ bool
+ _parse (const char*, ::butl::cli::scanner&);
+
+ private:
+ bool
+ _parse (::butl::cli::scanner&,
+ ::butl::cli::unknown_mode option,
+ ::butl::cli::unknown_mode argument);
+
+ public:
+ };
+
class ln_options
{
public:
diff --git a/libbutl/builtin-options.ixx b/libbutl/builtin-options.ixx
index b977f16..e118156 100644
--- a/libbutl/builtin-options.ixx
+++ b/libbutl/builtin-options.ixx
@@ -193,6 +193,9 @@ namespace butl
return this->utc_;
}
+ // find_options
+ //
+
// ln_options
//
diff --git a/libbutl/builtin.cli b/libbutl/builtin.cli
index adc47fa..23a5708 100644
--- a/libbutl/builtin.cli
+++ b/libbutl/builtin.cli
@@ -34,6 +34,11 @@ namespace butl
bool --utc|-u;
};
+ class find_options
+ {
+ // No options so far (expression/primaries handled as arguments).
+ };
+
class ln_options
{
bool --symbolic|-s;
diff --git a/libbutl/builtin.cxx b/libbutl/builtin.cxx
index a225c95..2755bf1 100644
--- a/libbutl/builtin.cxx
+++ b/libbutl/builtin.cxx
@@ -816,6 +816,314 @@ namespace butl
return builtin (r = 0);
}
+ // find <start-path>... [-name <pattern>]
+ // [-type <type>]
+ // [-mindepth <depth>]
+ // [-maxdepth <depth>]
+ //
+ // Note: must be executed asynchronously.
+ //
+ static uint8_t
+ find (const strings& args,
+ auto_fd in, auto_fd out, auto_fd err,
+ const dir_path& cwd,
+ const builtin_callbacks& cbs) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (err != nullfd ? move (err) : fddup (stderr_fd ()));
+
+ // Note that on some errors we will issue diagnostics but continue the
+ // search and return with non-zero code at the end. This is consistent
+ // with how major implementations behave (see below).
+ //
+ bool error_occured (false);
+ auto error = [&cerr, &error_occured] (bool fail = false)
+ {
+ error_occured = true;
+ return error_record (cerr, fail, "find");
+ };
+
+ auto fail = [&error] () {return error (true /* fail */);};
+
+ try
+ {
+ in.close ();
+ ofdstream cout (out != nullfd ? move (out) : fddup (stdout_fd ()));
+
+ // Parse arguments.
+ //
+ cli::vector_scanner scan (args);
+
+ // Currently, we don't expect any options.
+ //
+ parse<find_options> (scan, args, cbs.parse_option, fail);
+
+ // Parse path arguments until the first primary (starts with '-') is
+ // encountered.
+ //
+ small_vector<path, 1> paths;
+
+ while (scan.more ())
+ {
+ if (*scan.peek () == '-')
+ break;
+
+ try
+ {
+ paths.emplace_back (scan.next ());
+ }
+ catch (const invalid_path& e)
+ {
+ fail () << "invalid path '" << e.path << "'";
+ }
+ }
+
+ // Note that POSIX doesn't explicitly describe the behavior if no paths
+ // are specified on the command line. On Linux the current directory is
+ // assumed in this case. We, however, will follow the FreeBSD behavior
+ // and fail since this seems to be less error-prone.
+ //
+ if (paths.empty ())
+ fail () << "missing start path";
+
+ // Parse primaries.
+ //
+ optional<string> name;
+ optional<entry_type> type;
+ optional<uint64_t> min_depth;
+ optional<uint64_t> max_depth;
+
+ while (scan.more ())
+ {
+ const char* p (scan.next ());
+
+ // Return the string value of the current primary. Fail if absent or
+ // empty, unless empty value is allowed.
+ //
+ auto str = [p, &scan, &fail] (bool allow_empty = false)
+ {
+ if (!scan.more ())
+ {
+ fail () << "missing value for primary '" << p << "'";
+ }
+
+ string n (p); // Save for diagnostics.
+ string r (scan.next ());
+
+ if (r.empty () && !allow_empty)
+ fail () << "empty value for primary '" << n << "'";
+
+ return r;
+ };
+
+ // Return the unsigned numeric value of the current primary. Fail if
+ // absent or is not a valid number.
+ //
+ auto num = [p, &str, &fail] ()
+ {
+ string n (p); // Save for diagnostics.
+ string s (str ());
+
+ const char* b (s.c_str ());
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ uint64_t r (strtoull (b, &e, 10)); // Can't throw.
+
+ if (errno == ERANGE || e != b + s.size ())
+ fail () << "invalid value '" << s << "' for primary '" << n << "'";
+
+ return r;
+ };
+
+ if (strcmp (p, "-name") == 0)
+ {
+ // Note that the empty never-matching pattern is allowed.
+ //
+ name = str (true /* allow_empty */);
+ }
+ else if (strcmp (p, "-type") == 0)
+ {
+ string s (str ());
+ char t (s.size () == 1 ? s[0] : '\0');
+
+ switch (t)
+ {
+ case 'f': type = entry_type::regular; break;
+ case 'd': type = entry_type::directory; break;
+ case 'l': type = entry_type::symlink; break;
+ default: fail () << "invalid value '" << s << "' for primary '-type'";
+ }
+ }
+ else if (strcmp (p, "-mindepth") == 0)
+ {
+ min_depth = num ();
+ }
+ else if (strcmp (p, "-maxdepth") == 0)
+ {
+ max_depth = num ();
+ }
+ else
+ fail () << "unknown primary '" << p << "'";
+ }
+
+ // Print the path if the expression evaluates to true for it. Traverse
+ // further down if the path refers to a directory and the maximum depth
+ // is not specified or is not reached.
+ //
+ // Note that paths for evaluating/printing (pp) and for
+ // stating/traversing (ap) are passed separately. The former is
+ // potentially relative and the latter is absolute. Also note that
+ // for optimization we separately pass the base name simple path.
+ //
+ auto find = [&cout,
+ &name,
+ &type,
+ &min_depth,
+ &max_depth,
+ &fail] (const path& pp,
+ const path& ap,
+ const path& bp,
+ entry_type t,
+ uint64_t level,
+ const auto& find) -> void
+ {
+ // Print the path if no primary evaluates to false.
+ //
+ if ((!type || *type == t) &&
+ (!min_depth || level >= *min_depth) &&
+ (!name || path_match (bp.string (), *name)))
+ {
+ // Print the trailing directory separator, if present.
+ //
+ if (pp.to_directory ())
+ {
+ // The trailing directory separator can only be present for
+ // paths specified on the command line.
+ //
+ assert (level == 0);
+
+ cout << pp.representation () << '\n';
+ }
+ else
+ cout << pp << '\n';
+ }
+
+ // Traverse the directory, unless the max depth is specified and
+ // reached.
+ //
+ if (t == entry_type::directory && (!max_depth || level < *max_depth))
+ try
+ {
+ for (const auto& de: dir_iterator (path_cast<dir_path> (ap),
+ dir_iterator::no_follow))
+ {
+ find (pp / de.path (),
+ ap / de.path (),
+ de.path (),
+ de.ltype (),
+ level + 1,
+ find);
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to scan directory '" << pp << "': " << e;
+ }
+ };
+
+ dir_path wd;
+
+ for (const path& p: paths)
+ {
+ // Complete the path if it is relative, so that we can properly stat
+ // it and, potentially, traverse. Note that we don't normalize it
+ // since POSIX requires that the paths should be evaluated (by
+ // primaries) and printed unaltered.
+ //
+ path ap;
+
+ if (p.relative ())
+ {
+ if (wd.empty () && cwd.relative ())
+ wd = current_directory (cwd, fail);
+
+ ap = (!wd.empty () ? wd : cwd) / p;
+ }
+
+ // Issue an error if the path is empty, doesn't exist, or has the
+ // trailing directory separator but refers to a non-directory.
+ //
+ // Note that POSIX doesn't explicitly describe the behavior if any of
+ // the above happens. We will follow the behavior which is common for
+ // both Linux and FreeBSD by issuing the diagnostics, proceeding to
+ // the subsequent paths, and returning with non-zero code at the end.
+ //
+ if (p.empty ())
+ {
+ error () << "empty path";
+ continue;
+ }
+
+ const path& fp (!ap.empty () ? ap : p);
+ pair<bool, entry_stat> pe;
+
+ try
+ {
+ pe = path_entry (fp);
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to stat '" << p << "': " << e;
+ }
+
+ if (!pe.first)
+ {
+ error () << "'" << p << "' doesn't exists";
+ continue;
+ }
+
+ entry_type t (pe.second.type);
+
+ if (p.to_directory () && t != entry_type::directory)
+ {
+ error () << "'" << p << "' is not a directory";
+ continue;
+ }
+
+ find (p, fp, p.leaf (), t, 0 /* level */, find);
+ }
+
+ cout.close ();
+ r = !error_occured ? 0 : 1;
+ }
+ // Can be thrown while closing cin or creating, writing to, or closing
+ // cout or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error () << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+ catch (const cli::exception& e)
+ {
+ error () << e;
+ }
+
+ cerr.close ();
+ return r;
+ }
+ // In particular, handles io_error exception potentially thrown while
+ // creating, writing to, or closing cerr.
+ //
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
// Create a symlink to a file or directory at the specified path and calling
// the hook for the created filesystem entries. The paths must be absolute
// and normalized. Fall back to creating a hardlink, if symlink creation is
@@ -2227,6 +2535,7 @@ namespace butl
{"diff", {nullptr, 2}},
{"echo", {&async_impl<&echo>, 2}},
{"false", {&false_, 0}},
+ {"find", {&async_impl<&find>, 2}},
{"ln", {&sync_impl<&ln>, 2}},
{"mkdir", {&sync_impl<&mkdir>, 2}},
{"mv", {&sync_impl<&mv>, 2}},