From dc2c25e3f3182e8181a15487de4befca74a1ffec Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 10 Mar 2017 00:14:00 +0300 Subject: Add flexibility to path search callback function --- butl/filesystem | 29 +++++--- butl/filesystem.cxx | 115 +++++++++++++++++++++++------- tests/wildcard/driver.cxx | 32 ++++++++- tests/wildcard/testscript | 178 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 315 insertions(+), 39 deletions(-) diff --git a/butl/filesystem b/butl/filesystem index 5a8eb91..336233c 100644 --- a/butl/filesystem +++ b/butl/filesystem @@ -460,21 +460,30 @@ namespace butl // As an example, consider pattern f*/bar/b*/*.txt and path // foo/bar/baz/x.txt. The sequence of calls in this case will be: // - // (foo/, f*/, true) - // (foo/bar/baz/, b*/, true) + // (foo/, f*/, true) + // (foo/bar/baz/, b*/, true) // (foo/bar/baz/x.txt, *.txt, false) // + // If the pattern contains a recursive wildcard, then the callback function + // can be called for the same directory twice: first time as an intermediate + // match with */ pattern to decide if to recursively traverse the directory, + // and the second time if the directory matches the pattern component (either + // as an intermediate or a final match). As an example, consider pattern + // b**/c* and directory tree a/b/c/. The sequence of calls in this case will + // be: + // + // (a/, */, true) + // (a/b/, */ true) + // (a/b/c/, */, true) + // (a/b/, b*/, true) + // (a/b/c/, c*/, false) + // LIBBUTL_EXPORT void path_search (const path& pattern, - const std::function&, + const std::function&, const dir_path& start = dir_path ()); - /* - LIBBUTL_EXPORT void - path_search ( - const path& pattern, - const std::function&, - const dir_path& start = dir_path ()); - */ } #include diff --git a/butl/filesystem.cxx b/butl/filesystem.cxx index 5135281..da832a9 100644 --- a/butl/filesystem.cxx +++ b/butl/filesystem.cxx @@ -931,16 +931,28 @@ namespace butl // sense for the cleanup use cases (@@ maybe this should be controllable // since for directory creation it won't make sense). // + // Prior to recursively opening a directory for iterating the preopen + // callback function is called. If false is returned, then the directory is + // not traversed but still returned by the next() call. + // // Note that iterating over non-existent directory is not en error. The // subsequent next() call returns false for such a directory. // + using preopen = std::function; + class recursive_dir_iterator { public: - recursive_dir_iterator (dir_path p, bool recursive, bool self) - : recursive_ (recursive), self_ (self), start_ (p) + recursive_dir_iterator (dir_path p, + bool recursive, + bool self, + preopen po) + : start_ (move (p)), + recursive_ (recursive), + self_ (self), + preopen_ (move (po)) { - open (dir_path ()); + open (dir_path (), self_); } // Non-copyable, non-movable type. @@ -993,7 +1005,7 @@ namespace butl if (recursive_ && pe.to_directory ()) { - open (path_cast (move (pe))); + open (path_cast (move (pe)), true); return next (p); } @@ -1003,7 +1015,7 @@ namespace butl private: void - open (dir_path p) + open (dir_path p, bool preopen) { // We should consider a racing condition here. The directory can be // removed before we create an iterator for it. In this case we just do @@ -1011,8 +1023,17 @@ namespace butl // try { - dir_path d (start_ / p); - dir_iterator i (!d.empty () ? d : dir_path (".")); + // If preopen_() returns false, then the directory will not be + // traversed (as we leave iterator with end semantics) but still be + // returned by the next() call as a sub-entry. + // + dir_iterator i; + if (!preopen || preopen_ (p)) + { + dir_path d (start_ / p); + i = dir_iterator (!d.empty () ? d : dir_path (".")); + } + iters_.emplace_back (move (i), move (p)); } catch (const system_error& e) @@ -1028,9 +1049,10 @@ namespace butl } private: + dir_path start_; bool recursive_; bool self_; - dir_path start_; + preopen preopen_; small_vector, 1> iters_; }; @@ -1038,9 +1060,14 @@ namespace butl // each matching path. Return false if the underlying func() call returns // false. Otherwise the function conforms to the path_search() description. // + static const string any_dir ("*/"); + static bool - search (path pattern, dir_path pattern_dir, const dir_path start_dir, - const function& func) + search ( + path pattern, + dir_path pattern_dir, + const dir_path start_dir, + const function& func) { // Fast-forward the leftmost pattern non-wildcard components. So, for // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. @@ -1062,7 +1089,7 @@ namespace butl if (pe.first && ((pe.second == entry_type::directory) == p.to_directory ())) - return func (move (p)); + return func (move (p), string (), false); return true; } @@ -1088,11 +1115,37 @@ namespace butl // bool simple (pattern.simple ()); + // Note that we rely on "small function object" optimization here. + // recursive_dir_iterator i ( start_dir / pattern_dir, - pcr.find ("**") != string::npos, // Recursive. - pcr.find ("***") != string::npos); // Self-inclusive. + pcr.find ("**") != string::npos, // Recursive. + pcr.find ("***") != string::npos, // Self-inclusive. + [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. + { + return func (pattern_dir / p, any_dir, true); + }); + // Canonicalize the pattern component collapsing consecutive stars (used to + // express that it is recursive) into a single one. + // + size_t j (0); + size_t n (pcr.size ()); + for (size_t i (0); i < n; ++i) + { + char c (pcr[i]); + if (!(c == '*' && i > 0 && pcr[i - 1] == '*')) + pcr[j++] = c; + } + + if (j != n) + pcr.resize (j); + + // Note that the callback function can be called for the same directory + // twice: first time as intermediate match from iterator's preopen() call, + // and then, if the first call succeed, from the iterating loop (possibly + // as the final match). + // path p; while (i.next (p)) { @@ -1122,17 +1175,26 @@ namespace butl if (!path_match (pcr, se.leaf ().representation ())) continue; - // If the pattern is a simple path then call func() for the sub-entry. - // Otherwise the sub-entry is a directory (read above), and we search in - // it using the trailing part of the pattern. + // If the callback function returns false, then we stop the entire search + // for the final match, or do not search below the path for the + // intermediate one. + // + if (!func (pattern_dir / p, pcr, !simple)) + { + if (simple) // Final match. + return false; + else + continue; + } + + // If the pattern is not a simple one, and it's leftmost component + // matches the sub-entry, then the sub-entry is a directory (see the note + // above), and we search in it using the trailing part of the pattern. // - if (!( - simple - ? func (pattern_dir / p) - : search (pattern.leaf (pc), - pattern_dir / path_cast (move (p)), - start_dir, - func))) + if (!simple && !search (pattern.leaf (pc), + pattern_dir / path_cast (move (p)), + start_dir, + func)) return false; } @@ -1140,9 +1202,10 @@ namespace butl } void - path_search (const path& pattern, - const function& func, - const dir_path& start) + path_search ( + const path& pattern, + const function& func, + const dir_path& start) { search (pattern, dir_path (), diff --git a/tests/wildcard/driver.cxx b/tests/wildcard/driver.cxx index b3aae62..5744969 100644 --- a/tests/wildcard/driver.cxx +++ b/tests/wildcard/driver.cxx @@ -39,7 +39,10 @@ int _CRT_glob = 0; // Search for paths matching the pattern in the directory specified (absent // directory means the current one). Print the matching canonicalized paths // to STDOUT in the ascending order. Succeed if at least one matching path -// is found. Note that this option must go first in the command line, +// is found. Note that this option must go first in the command line. +// +// Also note that the driver excludes from search file system entries which +// names start from dot, unless the pattern explicitly matches them. // // -n // Do not sort paths found. @@ -87,9 +90,32 @@ try assert (i == argc); // All args parsed, vector paths; - auto add = [&paths] (path&& p) -> bool + auto add = + [&paths, &start] (path&& p, const std::string& pt, bool interim) -> bool { - paths.emplace_back (move (p.canonicalize ())); + bool pd (!pt.empty () && pt[0] == '.'); // Dot-started pattern. + + const path& fp (!p.empty () + ? p + : path_cast (!start.empty () + ? start + : path::current_directory ())); + + const string& s (fp.leaf ().string ()); + assert (!s.empty ()); + + bool ld (s[0] == '.'); // Dot-started leaf. + + // Skip dot-started names if pattern is not dot-started. + // + bool skip (ld && !pd); + + if (interim) + return !skip; + + if (!skip) + paths.emplace_back (move (p.canonicalize ())); + return true; }; diff --git a/tests/wildcard/testscript b/tests/wildcard/testscript index 4a6c532..48159e2 100644 --- a/tests/wildcard/testscript +++ b/tests/wildcard/testscript @@ -388,4 +388,182 @@ } } } + + : dot-started + : + { + +mkdir -p z/.z/.z z/z a/.z .a/.z + +touch z/.z.cxx z/z.cxx z/.z/.z.cxx z/.z/z.cxx z/z/.z.cxx z/z/z.cxx \ + a/z.cxx a/.z.cxx .a/z.cxx .a/.z.cxx + + wd=../../.. + + : recursive + : + { + : simple + : + { + : file + : + $* *z**.cxx $wd >>/EOO + a/z.cxx + z/z.cxx + z/z/z.cxx + EOO + + : dot-leading-file + : + $* .z**.cxx $wd >>/EOO + a/.z.cxx + z/.z.cxx + z/z/.z.cxx + EOO + + : dir + : + $* **z/ $wd >>/EOO + z/ + z/z/ + EOO + + : dot-leading-dir + : + $* .**z/ $wd >>/EOO + a/.z/ + z/.z/ + EOO + } + + : z-compound + : + { + : not-dot-leading + : + $* **z/*z.cxx $wd >>/EOO + z/z.cxx + z/z/z.cxx + EOO + + : dot-leading + : + $* .z**/*z.cxx $wd >>/EOO + z/.z/z.cxx + EOO + } + + : compound + : + { + : not-dot-leading + : + $* **/*z.cxx $wd >>/EOO + a/z.cxx + z/z.cxx + z/z/z.cxx + EOO + + : dot-leading + : + $* .**/*z.cxx $wd >>/EOO + .a/z.cxx + z/.z/z.cxx + EOO + } + + : self + : + { + : not-dot-leading + : + $* *z***/*z.cxx $wd/z >>/EOO + z.cxx + z/z.cxx + EOO + + : dot-leading + : + $* .z***/*z.cxx $wd/z >>/EOO + .z/z.cxx + EOO + } + } + + : immediate + : + { + : simple + : + { + : file + : + $* *z*.cxx $wd/z >>/EOO + z.cxx + EOO + + : dot-leading-file + : + $* .z*.cxx $wd/z >>/EOO + .z.cxx + EOO + + : file-dot-leading-start + : + $* *z*.cxx $wd/z/.z >>/EOO + z.cxx + EOO + + : dot-leading-file-dot-leading-start + : + $* .z*.cxx $wd/z/.z >>/EOO + .z.cxx + EOO + + : dir + : + $* *z/ $wd/ >>/EOO + z/ + EOO + + : dot-leading-dir + : + $* .*z/ $wd/z >>/EOO + .z/ + EOO + } + + : z-compound + : + { + : not-dot-leading + : + $* *z/*z.cxx $wd/z >>/EOO + z/z.cxx + EOO + + : dot-leading + : + $* .z*/*z.cxx $wd/z >>/EOO + .z/z.cxx + EOO + } + + : compound + : + { + : not-dot-leading + : + $* */*z.cxx $wd >>/EOO + a/z.cxx + z/z.cxx + EOO + + : dot-leading + : + $* .*/*z.cxx $wd >>/EOO + .a/z.cxx + EOO + } + } + } } -- cgit v1.1