From 1d0b79df453e1e5c5c49a35549df9f350b3660ff Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Sat, 10 Jun 2017 21:06:51 +0300 Subject: Add tests and fixes for path_search(pattern,entry) --- libbutl/filesystem.cxx | 133 ++++++++-- libbutl/filesystem.hxx | 5 + libbutl/path.hxx | 8 + tests/wildcard/driver.cxx | 49 ++-- tests/wildcard/testscript | 624 ++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 756 insertions(+), 63 deletions(-) diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx index 3002c93..6da93b4 100644 --- a/libbutl/filesystem.cxx +++ b/libbutl/filesystem.cxx @@ -32,10 +32,12 @@ #include #include #include // unique_ptr +#include // size_t #include // pair #include // reverse_iterator #include + #include #include // throw_generic_error() #include @@ -1203,13 +1205,16 @@ namespace butl // component containing ***) is a bit tricky. This directory is // represented by the iterator as an empty path, and so we need to // compute it (the leaf would actually be enough) for matching. This - // leaf can be acquired from the start_dir / pattern_dir. We don't expect - // this path to be empty, as the filesystem object must replace an empty - // start directory with the current one. This is the case when we search - // in the current directory (start_dir is empty) with a pattern that - // starts with *** wildcard (for example f***/bar). Note that this will - // be the only case per path_search() as the next time pattern_dir will - // not be empty. + // leaf can be acquired from the pattern_dir (if not empty) or + // start_dir. We don't expect the start_dir to be empty, as the + // filesystem object must replace an empty start directory with the + // current one. This is the case when we search in the current directory + // (start_dir is empty) with a pattern that starts with a *** wildcard + // (for example f***/bar). Note that this will be the only case per + // path_search() as the next time pattern_dir will not be empty. Also + // note that this is never the case for a pattern that is an absolute + // path, as the first component cannot be a wildcard (is empty for POSIX + // and is drive for Windows). // const path& se (!p.empty () ? p @@ -1442,7 +1447,11 @@ namespace butl bool follow_symlinks, preopen po) const { - return iterator_type (start_ / p, recursive, self, follow_symlinks, po); + return iterator_type (start_ / p, + recursive, + self, + follow_symlinks, + move (po)); } }; @@ -1475,11 +1484,37 @@ namespace butl open (dir_path (), self_); } + // Behave as recursive_dir_iterator (see above) would do if iterating over + // non-existent directory. Note that this behavior differs from that for + // an empty directory (previous ctor being called with an empty path). If + // self flag is true, then, for the empty directory, the first next() call + // returns true and saves empty path (self sub-entry). For non-existent + // directory the first next() call returns false. Also note that + // recursive_dir_iterator calls the preopen function before it becomes + // known that the directory doesn't exist, and we emulate such a behavior + // here as well. + // + path_iterator (bool self, preopen po) + : self_ (false), + iter_ (path_.begin ()) + { + if (self) + po (empty_dir); + } + // Move constructible-only, non-assignable type. // path_iterator (const path_iterator&) = delete; path_iterator& operator= (const path_iterator&) = delete; - path_iterator (path_iterator&&) = default; + + // Custom move ctor (properly moves path/iterator pair). + // + path_iterator (path_iterator&& pi) + : path_ (move (pi.path_)), + recursive_ (pi.recursive_), + self_ (pi.self_), + preopen_ (move (pi.preopen_)), + iter_ (path_, pi.iter_) {} // Return false if no more entries left. Otherwise save the next entry path // and return true. @@ -1552,19 +1587,33 @@ namespace butl path_ (p) {} pair - path_entry (const path& p, bool /*follow_symlinks*/) const + path_entry (const path& p, bool /*follow_symlinks*/) { - // Note that paths are not required to be normalized, so we just check - // that one path is a literal prefix of the other one. + // If path and sub-path are non-empty, and both are absolute or relative, + // then no extra effort is required (prior to checking if one is a + // sub-path or the other). Otherwise we complete the relative paths + // first. // - if (!path_.sub (p)) - return make_pair (false, entry_stat {entry_type::unknown, 0}); + auto path_entry = [] (const path& p, const path& pe) + { + // Note that paths are not required to be normalized, so we just check + // that one path is a literal prefix of the other one. + // + if (!p.sub (pe)) + return make_pair (false, entry_stat {entry_type::unknown, 0}); - entry_type t (p == path_ && !p.to_directory () - ? entry_type::regular - : entry_type::directory); + entry_type t (pe == p && !p.to_directory () + ? entry_type::regular + : entry_type::directory); - return make_pair (true, entry_stat {t, 0}); + return make_pair (true, entry_stat {t, 0}); + }; + + if (path_.relative () == p.relative () && !path_.empty () && !p.empty ()) + return path_entry (path_, p); + + return path_entry (path_.absolute () ? path_ : complete (path_), + p.absolute () ? p : complete (p)); } iterator_type @@ -1572,10 +1621,50 @@ namespace butl bool recursive, bool self, bool /*follow_symlinks*/, - preopen po) const + preopen po) { - assert (path_.sub (p)); - return iterator_type (path_.leaf (p), recursive, self, po); + // If path and sub-path are non-empty, and both are absolute or relative, + // then no extra effort is required (prior to checking if one is a + // sub-path or the other). Otherwise we complete the relative paths + // first. + // + auto iterator = [recursive, self, &po] (const path& p, + const dir_path& pe) + { + // If the directory we should iterate belongs to the directory tree, + // then return the corresponding leaf path iterator. Otherwise return + // the non-existent directory iterator (returns false on the first + // next() call). + // + return p.sub (pe) + ? iterator_type (p.leaf (pe), recursive, self, move (po)) + : iterator_type (self, move (po)); + }; + + if (path_.relative () == p.relative () && !path_.empty () && !p.empty ()) + return iterator (path_, p); + + return iterator (path_.absolute () ? path_ : complete (path_), + p.absolute () ? p : path_cast (complete (p))); + } + + private: + // Complete the relative path. + // + path + complete (const path& p) + { + assert (p.relative ()); + + if (start_.absolute ()) + return start_ / p; + + if (current_.empty ()) + current_ = dir_path::current_directory (); + + return !start_.empty () + ? current_ / start_ / p + : current_ / p; } private: @@ -1589,7 +1678,7 @@ namespace butl const function& func, const dir_path& start) { - path_filesystem fs (pattern.relative () ? start : empty_dir, entry); + path_filesystem fs (start, entry); search (pattern, dir_path (), true, func, fs); } diff --git a/libbutl/filesystem.hxx b/libbutl/filesystem.hxx index b4f8d96..35bab9f 100644 --- a/libbutl/filesystem.hxx +++ b/libbutl/filesystem.hxx @@ -601,6 +601,11 @@ namespace butl // through contains only the specified entry. The start directory is used if // the first pattern component is a self-matching wildcard (see above). // + // If pattern or entry is relative, then it is assumed to be relative to the + // start directory (which, if relative itself, is assumed to be relative to + // the current directory). Note that the implementation can optimize the + // case when pattern and entry are both non-empty and relative. + // LIBBUTL_EXPORT void path_search (const path& pattern, const path& entry, diff --git a/libbutl/path.hxx b/libbutl/path.hxx index 56e43cf..b1ba31a 100644 --- a/libbutl/path.hxx +++ b/libbutl/path.hxx @@ -756,6 +756,14 @@ namespace butl iterator (const data_type* p, size_type b, size_type e) : p_ (p), b_ (b), e_ (e) {} + // Create an iterator by "rebasing" an old iterator onto a new path + // object. Can, for example, be used to "move" an iterator when moving + // the path object. Note: potentially dangerous if the old iterator used + // to point to a different path. + // + iterator (const basic_path& p, const iterator& i) + : p_ (&p), b_ (i.b_), e_ (i.e_) {} + iterator& operator++ () { diff --git a/tests/wildcard/driver.cxx b/tests/wildcard/driver.cxx index 1e600f6..9e3d1b7 100644 --- a/tests/wildcard/driver.cxx +++ b/tests/wildcard/driver.cxx @@ -12,6 +12,7 @@ #include #include // operator<<(ostream, exception) +#include #include using namespace std; @@ -28,25 +29,37 @@ using namespace butl; int _CRT_glob = 0; #endif -// Usage: argv[0] (-m | -s [-n] []) +// Usages: // -// Execute actions specified by -m or -s options. Exit with code 0 if succeed, +// argv[0] -mn +// argv[0] -sd [-n] [] +// argv[0] -sp [-n] [] +// +// Execute actions specified by the first option. Exit with code 0 if succeed, // 1 if fail, 2 on the underlying OS error (print error description to STDERR). // -// -m +// -mn // Match a name against the pattern. // -// -s +// -sd // Search for paths matching the pattern in the directory specified (absent // directory means the current one). Print the matching canonicalized paths // to STDOUT in the ascending order. Succeed if at least one matching path -// is found. Note that this option must go first in the command line. +// is found. For each matching path we will assert that it is also get +// matched being searched in the directory tree represented by this path +// itself. +// +// Note that the driver excludes from search file system entries which names +// start from dot, unless the pattern explicitly matches them. // -// Also note that the driver excludes from search file system entries which -// names start from dot, unless the pattern explicitly matches them. +// -sp +// Same as above, but behaves as if the directory tree being searched +// through contains only the specified entry. The start directory is used if +// the first pattern component is a self-matching wildcard. // // -n -// Do not sort paths found. +// Do not sort paths found. Meaningful in combination with -sd or -sp +// options and must follow it, if specified in the command line. // int main (int argc, const char* argv[]) @@ -55,10 +68,8 @@ try assert (argc >= 2); string op (argv[1]); - bool match (op == "-m"); - assert (match || op == "-s"); - if (match) + if (op == "-mn") { assert (argc == 4); @@ -66,9 +77,9 @@ try string name (argv[3]); return path_match (pattern, name) ? 0 : 1; } - else + else if (op == "-sd" || op == "-sp") { - assert (argc >= 3); + assert (argc >= (op == "-sd" ? 3 : 4)); bool sort (true); int i (2); @@ -84,6 +95,11 @@ try assert (i != argc); // Still need pattern. path pattern (argv[i++]); + optional entry; + + if (op == "-sp") + entry = path (argv[i++]); + dir_path start; if (i != argc) start = dir_path (argv[i++]); @@ -133,7 +149,10 @@ try return true; }; - path_search (pattern, add, start); + if (!entry) + path_search (pattern, add, start); + else + path_search (pattern, *entry, add, start); // Test search in the directory tree represented by the path. // @@ -178,6 +197,8 @@ try return paths.empty () ? 1 : 0; } + else + assert (false); } catch (const invalid_path& e) { diff --git a/tests/wildcard/testscript b/tests/wildcard/testscript index 48159e2..489f724 100644 --- a/tests/wildcard/testscript +++ b/tests/wildcard/testscript @@ -2,10 +2,10 @@ # copyright : Copyright (c) 2014-2017 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -: path-match +: path-name-match : { - test.options = -m + test.options = -mn $* foo/ foo == 1 : dir-vs-file $* foo foo/ == 1 : file-vs-dir @@ -89,10 +89,10 @@ } } -: path-search +: path-directory-search : { - test.options = -s + test.options = -sd : start : @@ -163,17 +163,17 @@ : self-recursive : - { - : start - : + { + : start + : $* f*** ../../foo >>/EOO bar/fox EOO : current : - mkdir -p bar/fox; - touch bar/fox/cox; + mkdir -p bar/fox; + touch bar/fox/cox; $* c*** >>/EOO bar/fox/cox EOO @@ -201,12 +201,12 @@ : self-recursive : - { - : start - : - : Note that the start dir is represented as an empty path being - : found. - : + { + : start + : + : Note that the start dir is represented as an empty path being + : found. + : $* f***/ ../../foo >>/EOO bar/fox/ @@ -214,7 +214,7 @@ : current : - mkdir -p bar/cox/box/; + mkdir -p bar/cox/box/; $* c***/ >>/EOO bar/cox/ @@ -249,9 +249,9 @@ : self-recursive : - { - : pattern - : + { + : pattern + : $* foo/f*** ../.. >>/EOO foo/fox EOO @@ -264,12 +264,12 @@ : current : - mkdir -p bar; - touch bar/cox; + mkdir -p bar; + touch bar/cox; $* c*** >>/EOO bar/cox EOO - } + } } : dir @@ -297,9 +297,9 @@ : self-recursive : - { - : pattern - : + { + : pattern + : $* foo/f***/b**/ ../.. >>/EOO foo/bar/ foo/fox/box/ @@ -316,13 +316,13 @@ : current : - mkdir -p bar/cox/box/; + mkdir -p bar/cox/box/; $* c***/b**/ >>/EOO bar/ bar/cox/box/ bar/cox/box/ EOO - } + } } } @@ -567,3 +567,573 @@ } } } + +: path-entry-search +: +{ + test.options = -sp + + : match + : + { + : fast-forward + : + { + : partial + : + $* foo/f** foo/fox >>/EOO + foo/fox + EOO + + : reduce + : + { + : file + : + $* foo foo >>EOO + foo + EOO + + : dir + : + $* foo/ foo/ >>/EOO + foo/ + EOO + } + } + + : iterating + : + { + : simple + : + { + : immediate + : + { + : file + : + $* f* foo >>EOO + foo + EOO + + : dir + : + $* f*/ foo/ >>/EOO + foo/ + EOO + } + + : distant + : + { + : file + : + $* f*/b* foo/bar >>/EOO + foo/bar + EOO + + : dir + : + $* f*/b*/ foo/bar/ >>/EOO + foo/bar/ + EOO + } + } + + : recursive + : + { + : immediate + : + { + : file + : + $* f** foo >>EOO + foo + EOO + + : dir + : + $* f**/ foo/ >>/EOO + foo/ + EOO + } + + : distant + : + { + : file + : + $* f** foo/fox >>/EOO + foo/fox + EOO + + : dir + : + $* f**/ foo/fox/ >>/EOO + foo/ + foo/fox/ + EOO + } + } + + : self + : + { + : immediate + : + { + : file + : + $* f*** foo fox/ >>EOO + foo + EOO + + : dir + : + $* f***/ foo/ fox/ >>/EOO + + foo/ + EOO + } + + : distant + : + { + : file + : + $* f*** foo/fox >>/EOO + foo/fox + EOO + + : dir + : + $* f***/ foo/fox/ foo/ >>/EOO + + foo/ + foo/fox/ + EOO + } + } + } + + : absolute + : + : When cross-testing we can't guarantee that host absolute paths are + : recognized by the target process. + : + if ($test.target == $build.host) + { + wd = $~ + + : both + : + { + : reduce + : + $* $wd/foo $wd/foo >>/"EOO" + $wd/foo + EOO + + : iterate + : + $* $wd/f**/ $wd/foo/fox/ >>/"EOO" + $wd/foo/ + $wd/foo/fox/ + EOO + } + + : pattern + : + { + : reduce + : + $* $wd/foo foo $wd >>/"EOO" + $wd/foo + EOO + + : iterate + : + $* $wd/f**/ foo/fox/ >>/"EOO" + $wd/pattern/iterate/foo/ + $wd/pattern/iterate/foo/fox/ + EOO + } + + : entry + : + { + : reduce + : + $* foo $wd/foo $wd >>/"EOO" + foo + EOO + + : iterate + : + $* f**/ $wd/entry/iterate/foo/fox/ >>/"EOO" + foo/ + foo/fox/ + EOO + } + } + } + + : no-match + : + { + : fast-forward + : + { + : partial + : + $* foo/f** fox/bar/baz/fix == 1 + + : reduce + : + { + : file + : + { + : not-exists + : + $* foo fox == 1 + + : not-file + : + $* foo foo/ == 1 + + : empy + : + { + : both + : + $* '' '' == 1 + + : pattern + : + $* '' foo == 1 + + : path + : + $* foo '' == 1 + } + } + + : dir + : + { + : not-exists + : + $* foo/ fox/ == 1 + + : not-dir + : + $* foo/ foo == 1 + + : empy + : + { + : pattern + : + $* '' foo/ == 1 + + : path + : + $* foo/ '' == 1 + } + } + } + } + + : iterating + : + { + : simple + : + { + : immediate + : + { + : file + : + { + : not-exists + : + $* f* bar == 1 + + : not-file + : + $* f* foo/ == 1 + + : empty + : + $* f* '' == 1 + } + + : dir + : + { + : not-exists + : + $* f*/ bar/ == 1 + + : not-dir + : + $* f*/ foo == 1 + + : empty + : + $* f*/ '' == 1 + } + } + + : distant + : + { + : file + : + { + : not-exists-other + : + $* f*/b* foo/fox == 1 + + : not-exists-none + : + $* f*/b* foo/ == 1 + + : not-file + : + $* f*/b* foo/bar/ == 1 + } + + : dir + : + { + : not-exists-other + : + $* f*/b*/ foo/fox/ == 1 + + : not-exists-none + : + $* f*/b*/ foo/ == 1 + + : not-dir + : + $* f*/b*/ foo/bar == 1 + } + } + } + + : recursive + : + { + : immediate + : + { + : file + : + { + : not-exists + : + $* f** bar == 1 + + : not-file + : + $* f** foo/ == 1 + } + + : dir + : + { + : not-exists + : + $* f**/ bar/ == 1 + + : not-dir + : + $* f**/ foo == 1 + } + } + + : distant + : + { + : file + : + { + : not-exists-other + : + $* f** foo/bar == 1 + + : not-exists-none + : + $* f** foo/ == 1 + + : not-file + : + $* f** foo/fox/ == 1 + } + + : dir + : + { + : not-exists-other + : + $* f*/b*/ foo/fox/ == 1 + + : not-exists-none + : + $* f*/b*/ foo/ == 1 + + : not-dir + : + $* f*/b*/ foo/bar == 1 + } + } + } + + + : self + : + { + : immediate + : + { + : file + : + { + : not-exists + : + $* f*** bar baz/ == 1 + + : not-file + : + $* f*** foo/ fox/ == 1 + } + + : dir + : + { + : not-exists + : + $* f***/ bar/ baz/ == 1 + + : not-dir + : + $* f***/ foo == 1 + } + } + + : distant + : + { + : file + : + { + : not-exists-other + : + $* f*** foo/bar == 1 + + : not-exists-none + : + $* f*** foo/ == 1 + + : not-file + : + $* f*** foo/fox/ fix/ == 1 + } + + : dir + : + { + : not-exists-other + : + $* f***/ bar/baz/ bak/ == 1 + + : not-exists-none + : + $* f***/ bar/ == 1 + + : not-dir + : + $* f***/ bar/foo == 1 + } + } + } + } + + : absolute + : + : When cross-testing we can't guarantee that host absolute paths are + : recognized by the target process. + : + if ($test.target == $build.host) + { + : reduce + : + { + : file + : + { + : not-exists + : + $* foo $~/fox == 1 + + : not-file + : + $* $~/foo foo/ == 1 + } + + : dir + : + { + : not-exists + : + $* $~/foo/ fox/ == 1 + + : not-dir + : + $* foo/ $~/foo == 1 + } + } + + : iterating + : + { + : file + : + { + : not-exists-other + : + $* $~/f** foo/bar == 1 + + : not-exists-none + : + $* f** $~/foo/ == 1 + + : not-file + : + $* $~/f** $~/foo/fox/ == 1 + } + + : dir + : + { + : not-exists-other + : + $* $~/f*/b*/ $~/foo/fox/ == 1 + + : not-exists-none + : + $* $~/f*/b*/ foo/ == 1 + + : not-dir + : + $* f*/b*/ $~/foo/bar == 1 + } + } + } + } +} -- cgit v1.1