From dc2c25e3f3182e8181a15487de4befca74a1ffec Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 10 Mar 2017 00:14:00 +0300 Subject: Add flexibility to path search callback function --- butl/filesystem | 29 ++++++++----- butl/filesystem.cxx | 115 ++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 108 insertions(+), 36 deletions(-) (limited to 'butl') diff --git a/butl/filesystem b/butl/filesystem index 5a8eb91..336233c 100644 --- a/butl/filesystem +++ b/butl/filesystem @@ -460,21 +460,30 @@ namespace butl // As an example, consider pattern f*/bar/b*/*.txt and path // foo/bar/baz/x.txt. The sequence of calls in this case will be: // - // (foo/, f*/, true) - // (foo/bar/baz/, b*/, true) + // (foo/, f*/, true) + // (foo/bar/baz/, b*/, true) // (foo/bar/baz/x.txt, *.txt, false) // + // If the pattern contains a recursive wildcard, then the callback function + // can be called for the same directory twice: first time as an intermediate + // match with */ pattern to decide if to recursively traverse the directory, + // and the second time if the directory matches the pattern component (either + // as an intermediate or a final match). As an example, consider pattern + // b**/c* and directory tree a/b/c/. The sequence of calls in this case will + // be: + // + // (a/, */, true) + // (a/b/, */ true) + // (a/b/c/, */, true) + // (a/b/, b*/, true) + // (a/b/c/, c*/, false) + // LIBBUTL_EXPORT void path_search (const path& pattern, - const std::function&, + const std::function&, const dir_path& start = dir_path ()); - /* - LIBBUTL_EXPORT void - path_search ( - const path& pattern, - const std::function&, - const dir_path& start = dir_path ()); - */ } #include diff --git a/butl/filesystem.cxx b/butl/filesystem.cxx index 5135281..da832a9 100644 --- a/butl/filesystem.cxx +++ b/butl/filesystem.cxx @@ -931,16 +931,28 @@ namespace butl // sense for the cleanup use cases (@@ maybe this should be controllable // since for directory creation it won't make sense). // + // Prior to recursively opening a directory for iterating the preopen + // callback function is called. If false is returned, then the directory is + // not traversed but still returned by the next() call. + // // Note that iterating over non-existent directory is not en error. The // subsequent next() call returns false for such a directory. // + using preopen = std::function; + class recursive_dir_iterator { public: - recursive_dir_iterator (dir_path p, bool recursive, bool self) - : recursive_ (recursive), self_ (self), start_ (p) + recursive_dir_iterator (dir_path p, + bool recursive, + bool self, + preopen po) + : start_ (move (p)), + recursive_ (recursive), + self_ (self), + preopen_ (move (po)) { - open (dir_path ()); + open (dir_path (), self_); } // Non-copyable, non-movable type. @@ -993,7 +1005,7 @@ namespace butl if (recursive_ && pe.to_directory ()) { - open (path_cast (move (pe))); + open (path_cast (move (pe)), true); return next (p); } @@ -1003,7 +1015,7 @@ namespace butl private: void - open (dir_path p) + open (dir_path p, bool preopen) { // We should consider a racing condition here. The directory can be // removed before we create an iterator for it. In this case we just do @@ -1011,8 +1023,17 @@ namespace butl // try { - dir_path d (start_ / p); - dir_iterator i (!d.empty () ? d : dir_path (".")); + // If preopen_() returns false, then the directory will not be + // traversed (as we leave iterator with end semantics) but still be + // returned by the next() call as a sub-entry. + // + dir_iterator i; + if (!preopen || preopen_ (p)) + { + dir_path d (start_ / p); + i = dir_iterator (!d.empty () ? d : dir_path (".")); + } + iters_.emplace_back (move (i), move (p)); } catch (const system_error& e) @@ -1028,9 +1049,10 @@ namespace butl } private: + dir_path start_; bool recursive_; bool self_; - dir_path start_; + preopen preopen_; small_vector, 1> iters_; }; @@ -1038,9 +1060,14 @@ namespace butl // each matching path. Return false if the underlying func() call returns // false. Otherwise the function conforms to the path_search() description. // + static const string any_dir ("*/"); + static bool - search (path pattern, dir_path pattern_dir, const dir_path start_dir, - const function& func) + search ( + path pattern, + dir_path pattern_dir, + const dir_path start_dir, + const function& func) { // Fast-forward the leftmost pattern non-wildcard components. So, for // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. @@ -1062,7 +1089,7 @@ namespace butl if (pe.first && ((pe.second == entry_type::directory) == p.to_directory ())) - return func (move (p)); + return func (move (p), string (), false); return true; } @@ -1088,11 +1115,37 @@ namespace butl // bool simple (pattern.simple ()); + // Note that we rely on "small function object" optimization here. + // recursive_dir_iterator i ( start_dir / pattern_dir, - pcr.find ("**") != string::npos, // Recursive. - pcr.find ("***") != string::npos); // Self-inclusive. + pcr.find ("**") != string::npos, // Recursive. + pcr.find ("***") != string::npos, // Self-inclusive. + [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. + { + return func (pattern_dir / p, any_dir, true); + }); + // Canonicalize the pattern component collapsing consecutive stars (used to + // express that it is recursive) into a single one. + // + size_t j (0); + size_t n (pcr.size ()); + for (size_t i (0); i < n; ++i) + { + char c (pcr[i]); + if (!(c == '*' && i > 0 && pcr[i - 1] == '*')) + pcr[j++] = c; + } + + if (j != n) + pcr.resize (j); + + // Note that the callback function can be called for the same directory + // twice: first time as intermediate match from iterator's preopen() call, + // and then, if the first call succeed, from the iterating loop (possibly + // as the final match). + // path p; while (i.next (p)) { @@ -1122,17 +1175,26 @@ namespace butl if (!path_match (pcr, se.leaf ().representation ())) continue; - // If the pattern is a simple path then call func() for the sub-entry. - // Otherwise the sub-entry is a directory (read above), and we search in - // it using the trailing part of the pattern. + // If the callback function returns false, then we stop the entire search + // for the final match, or do not search below the path for the + // intermediate one. + // + if (!func (pattern_dir / p, pcr, !simple)) + { + if (simple) // Final match. + return false; + else + continue; + } + + // If the pattern is not a simple one, and it's leftmost component + // matches the sub-entry, then the sub-entry is a directory (see the note + // above), and we search in it using the trailing part of the pattern. // - if (!( - simple - ? func (pattern_dir / p) - : search (pattern.leaf (pc), - pattern_dir / path_cast (move (p)), - start_dir, - func))) + if (!simple && !search (pattern.leaf (pc), + pattern_dir / path_cast (move (p)), + start_dir, + func)) return false; } @@ -1140,9 +1202,10 @@ namespace butl } void - path_search (const path& pattern, - const function& func, - const dir_path& start) + path_search ( + const path& pattern, + const function& func, + const dir_path& start) { search (pattern, dir_path (), -- cgit v1.1