From 04f4bb3e2492bb6e4b0769b4c7f020493cfa5cc4 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 7 Jun 2017 17:18:54 +0300 Subject: Add path_match() and path_search() overloads --- libbutl/filesystem.cxx | 491 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 359 insertions(+), 132 deletions(-) (limited to 'libbutl/filesystem.cxx') diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx index 18b4d22..3002c93 100644 --- a/libbutl/filesystem.cxx +++ b/libbutl/filesystem.cxx @@ -1097,6 +1097,192 @@ namespace butl return match (pattern.begin (), pi.base (), name.begin (), ni.base ()); } + // Search for paths matching the pattern and call the specified function for + // each matching path. Return false if the underlying func() call returns + // false. Otherwise the function conforms to the path_search() description. + // + // Note that the access to the traversed directory tree (real or virtual) is + // performed through the provided filesystem object. + // + static const string any_dir ("*/"); + + template + static bool + search ( + path pattern, + dir_path pattern_dir, + bool follow_symlinks, + const function& func, + FS& filesystem) + { + // Fast-forward the leftmost pattern non-wildcard components. So, for + // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. + // + { + auto b (pattern.begin ()); + auto e (pattern.end ()); + auto i (b); + for (; i != e && (*i).find_first_of ("*?") == string::npos; ++i) ; + + // If the pattern has no wildcards then we reduce to checking for the + // filesystem entry existence. It matches if exists and is of the proper + // type. + // + if (i == e) + { + path p (pattern_dir / pattern); + auto pe (filesystem.path_entry (p, follow_symlinks)); + + if (pe.first && + ((pe.second.type == entry_type::directory) == p.to_directory ())) + return func (move (p), string (), false); + + return true; + } + else if (i != b) // There are non-wildcard components, so fast-forward. + { + path p (b, i); + pattern = pattern.leaf (p); + pattern_dir /= path_cast (move (p)); + } + } + + assert (!pattern.empty ()); + + // The pattern leftmost component. Will use it to match the start directory + // sub-entries. + // + path pc (pattern.begin (), ++pattern.begin ()); + string pcr (pc.representation ()); + + // Note that if the pattern has multiple components (is not a simple path), + // then the leftmost one has a trailing separator, and so will match + // sub-directories only. + // + bool simple (pattern.simple ()); + + // Note that we rely on "small function object" optimization here. + // + typename FS::iterator_type i (filesystem.iterator ( + pattern_dir, + pcr.find ("**") != string::npos, // Recursive. + pcr.find ("***") != string::npos, // Self-inclusive. + follow_symlinks || !simple, + [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. + { + return func (pattern_dir / p, any_dir, true); + })); + + // Canonicalize the pattern component collapsing consecutive stars (used to + // express that it is recursive) into a single one. + // + size_t j (0); + size_t n (pcr.size ()); + for (size_t i (0); i < n; ++i) + { + char c (pcr[i]); + if (!(c == '*' && i > 0 && pcr[i - 1] == '*')) + pcr[j++] = c; + } + + if (j != n) + pcr.resize (j); + + // Note that the callback function can be called for the same directory + // twice: first time as intermediate match from iterator's preopen() call, + // and then, if the first call succeed, from the iterating loop (possibly + // as the final match). + // + path p; + while (i.next (p)) + { + // Skip sub-entry if its name doesn't match the pattern leftmost + // component. + // + // Matching the directory we are iterating through (as for a pattern + // component containing ***) is a bit tricky. This directory is + // represented by the iterator as an empty path, and so we need to + // compute it (the leaf would actually be enough) for matching. This + // leaf can be acquired from the start_dir / pattern_dir. We don't expect + // this path to be empty, as the filesystem object must replace an empty + // start directory with the current one. This is the case when we search + // in the current directory (start_dir is empty) with a pattern that + // starts with *** wildcard (for example f***/bar). Note that this will + // be the only case per path_search() as the next time pattern_dir will + // not be empty. + // + const path& se (!p.empty () + ? p + : path_cast (!pattern_dir.empty () + ? pattern_dir + : filesystem.start_dir ())); + + if (!path_match (pcr, se.leaf ().representation ())) + continue; + + // If the callback function returns false, then we stop the entire search + // for the final match, or do not search below the path for the + // intermediate one. + // + if (!func (pattern_dir / p, pcr, !simple)) + { + if (simple) // Final match. + return false; + else + continue; + } + + // If the pattern is not a simple one, and it's leftmost component + // matches the sub-entry, then the sub-entry is a directory (see the note + // above), and we search in it using the trailing part of the pattern. + // + if (!simple && !search (pattern.leaf (pc), + pattern_dir / path_cast (move (p)), + follow_symlinks, + func, + filesystem)) + return false; + } + + return true; + } + + // Path search implementations. + // + static const dir_path empty_dir; + + using preopen = std::function; + + // Base for filesystem (see above) implementations. + // + // Don't copy start directory. It is expected to exist till the end of the + // object lifetime. + // + class filesystem_base + { + protected: + filesystem_base (const dir_path& start): start_ (start) {} + + public: + const dir_path& + start_dir () + { + if (!start_.empty ()) + return start_; + + if (current_.empty ()) + current_ = dir_path::current_directory (); + + return current_; + } + + protected: + const dir_path& start_; + dir_path current_; + }; + + // Search path in the real filesystem. + // // Iterate over directory sub-entries, recursively and including itself if // requested. Note that recursive iterating goes depth-first which make // sense for the cleanup use cases (@@ maybe this should be controllable @@ -1109,8 +1295,6 @@ namespace butl // Note that iterating over non-existent directory is not en error. The // subsequent next() call returns false for such a directory. // - using preopen = std::function; - class recursive_dir_iterator { public: @@ -1128,10 +1312,11 @@ namespace butl open (dir_path (), self_); } - // Non-copyable, non-movable type. + // Move constructible-only, non-assignable type. // recursive_dir_iterator (const recursive_dir_iterator&) = delete; recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete; + recursive_dir_iterator (recursive_dir_iterator&&) = default; // Return false if no more entries left. Otherwise save the next entry path // and return true. The path is relative against the directory being @@ -1235,165 +1420,207 @@ namespace butl small_vector, 1> iters_; }; - // Search for paths matching the pattern and call the specified function for - // each matching path. Return false if the underlying func() call returns - // false. Otherwise the function conforms to the path_search() description. + // Provide an access to the real filesystem. // - static const string any_dir ("*/"); - - static bool - search ( - path pattern, - dir_path pattern_dir, - const dir_path start_dir, - bool follow_symlinks, - const function& func) + class real_filesystem: public filesystem_base { - // Fast-forward the leftmost pattern non-wildcard components. So, for - // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. - // - { - auto b (pattern.begin ()); - auto e (pattern.end ()); - auto i (b); - for (; i != e && (*i).find_first_of ("*?") == string::npos; ++i) ; + public: + using iterator_type = recursive_dir_iterator; - // If the pattern has no wildcards then we reduce to checking for the - // filesystem entry existence. It matches if exists and is of the proper - // type. - // - if (i == e) - { - path p (pattern_dir / pattern); - auto pe (path_entry (start_dir / p, true)); + real_filesystem (const dir_path& start): filesystem_base (start) {} - if (pe.first && - ((pe.second.type == entry_type::directory) == p.to_directory ())) - return func (move (p), string (), false); + pair + path_entry (const path& p, bool follow_symlinks) const + { + return butl::path_entry (start_ / p, follow_symlinks); + } - return true; - } - else if (i != b) // There are non-wildcard components, so fast-forward. - { - path p (b, i); - pattern = pattern.leaf (p); - pattern_dir /= path_cast (move (p)); - } + iterator_type + iterator (const dir_path& p, + bool recursive, + bool self, + bool follow_symlinks, + preopen po) const + { + return iterator_type (start_ / p, recursive, self, follow_symlinks, po); } + }; - assert (!pattern.empty ()); + void + path_search ( + const path& pattern, + const function& func, + const dir_path& start, + bool follow_symlinks) + { + real_filesystem fs (pattern.relative () ? start : empty_dir); + search (pattern, dir_path (), follow_symlinks, func, fs); + } - // The pattern leftmost component. Will use it to match the start directory - // sub-entries. - // - path pc (pattern.begin (), ++pattern.begin ()); - string pcr (pc.representation ()); + // Search path in the directory tree represented by a path. + // + // Iterate over path prefixes, as recursive_dir_iterator (see above) would + // iterate over the real directory tree. + // + class path_iterator + { + public: + path_iterator (path p, bool recursive, bool self, preopen po) + : path_ (move (p)), + recursive_ (recursive), + self_ (self), + preopen_ (move (po)), + iter_ (path_.begin ()) + { + open (dir_path (), self_); + } - // Note that if the pattern has multiple components (is not a simple path), - // then the leftmost one has a trailing separator, and so will match - // sub-directories only. + // Move constructible-only, non-assignable type. // - bool simple (pattern.simple ()); + path_iterator (const path_iterator&) = delete; + path_iterator& operator= (const path_iterator&) = delete; + path_iterator (path_iterator&&) = default; - // Note that we rely on "small function object" optimization here. + // Return false if no more entries left. Otherwise save the next entry path + // and return true. // - recursive_dir_iterator i ( - start_dir / pattern_dir, - pcr.find ("**") != string::npos, // Recursive. - pcr.find ("***") != string::npos, // Self-inclusive. - follow_symlinks, - [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. + bool + next (path& p) + { + if (iter_ == path_.begin ()) { - return func (pattern_dir / p, any_dir, true); - }); + if (!self_) + return false; - // Canonicalize the pattern component collapsing consecutive stars (used to - // express that it is recursive) into a single one. - // - size_t j (0); - size_t n (pcr.size ()); - for (size_t i (0); i < n; ++i) - { - char c (pcr[i]); - if (!(c == '*' && i > 0 && pcr[i - 1] == '*')) - pcr[j++] = c; - } + p = path (); + self_ = false; // To bail out the next time. + return true; + } - if (j != n) - pcr.resize (j); + path pe (path_.begin (), iter_); + if (recursive_ && pe.to_directory ()) + { + open (path_cast (pe), true); + return next (p); + } - // Note that the callback function can be called for the same directory - // twice: first time as intermediate match from iterator's preopen() call, - // and then, if the first call succeed, from the iterating loop (possibly - // as the final match). - // - path p; - while (i.next (p)) + --iter_; // Return one level up. + + p = move (pe); + return true; + } + + private: + void + open (const dir_path& p, bool preopen) { - // Skip sub-entry if its name doesn't match the pattern leftmost - // component. + // If preopen_() returns false, then the directory will not be + // traversed (as we reset the recursive flag) but still be returned by + // the next() call as a sub-entry. // - // Matching the directory we are iterating through (as for a pattern - // component containing ***) is a bit tricky. This directory is - // represented by the iterator as an empty path, and so we need to - // compute it (the leaf would actually be enough) for matching. This - // leaf can be aquired from the pattern_dir / start_dir path except the - // case when both directories are empty. This is the case when we search - // in the current directory (start_dir is empty) with a pattern that - // starts with *** wildcard (for example f***/bar). All we can do here is - // to fallback to path::current_directory() call. Note that this will be - // the only call per path_search() as the next time pattern_dir will not - // be empty. + if (preopen && !preopen_ (p)) + recursive_ = false; + else if (iter_ != path_.end ()) + ++iter_; + + // If the rightmost component is reached, then all the directories were + // traversed, so we reset the recursive flag. // - const path& se (!p.empty () - ? p - : path_cast (!pattern_dir.empty () - ? pattern_dir - : !start_dir.empty () - ? start_dir - : path::current_directory ())); + if (iter_ == path_.end ()) + recursive_ = false; + } - if (!path_match (pcr, se.leaf ().representation ())) - continue; + private: + path path_; + bool recursive_; + bool self_; + preopen preopen_; + path::iterator iter_; + }; - // If the callback function returns false, then we stop the entire search - // for the final match, or do not search below the path for the - // intermediate one. - // - if (!func (pattern_dir / p, pcr, !simple)) - { - if (simple) // Final match. - return false; - else - continue; - } + // Provide an access to a directory tree, that is represented by the path. + // + // Note that symlinks are meaningless for this filesystem. + // + class path_filesystem: public filesystem_base + { + public: + using iterator_type = path_iterator; - // If the pattern is not a simple one, and it's leftmost component - // matches the sub-entry, then the sub-entry is a directory (see the note - // above), and we search in it using the trailing part of the pattern. + path_filesystem (const dir_path& start, const path& p) + : filesystem_base (start), + path_ (p) {} + + pair + path_entry (const path& p, bool /*follow_symlinks*/) const + { + // Note that paths are not required to be normalized, so we just check + // that one path is a literal prefix of the other one. // - if (!simple && !search (pattern.leaf (pc), - pattern_dir / path_cast (move (p)), - start_dir, - follow_symlinks, - func)) - return false; + if (!path_.sub (p)) + return make_pair (false, entry_stat {entry_type::unknown, 0}); + + entry_type t (p == path_ && !p.to_directory () + ? entry_type::regular + : entry_type::directory); + + return make_pair (true, entry_stat {t, 0}); } - return true; - } + iterator_type + iterator (const dir_path& p, + bool recursive, + bool self, + bool /*follow_symlinks*/, + preopen po) const + { + assert (path_.sub (p)); + return iterator_type (path_.leaf (p), recursive, self, po); + } + + private: + const path& path_; + }; void path_search ( const path& pattern, + const path& entry, const function& func, - const dir_path& start, - bool follow_symlinks) + const dir_path& start) + { + path_filesystem fs (pattern.relative () ? start : empty_dir, entry); + search (pattern, dir_path (), true, func, fs); + } + + bool + path_match (const path& pattern, const path& entry, const dir_path& start) { - search (pattern, - dir_path (), - pattern.relative () ? start : dir_path (), - follow_symlinks, - func); + bool r (false); + + auto match = [&entry, &r] (path&& p, const std::string&, bool interim) + { + if (p == entry) + { + // If we found the entry (possibly through one of the recursive + // components) no need to search further. + // + if (!interim) + { + r = true; + return false; + } + else + // For self-matching the callback is first called in the interim + // mode (through the preopen function) with an empty path. + // + assert (p.empty ()); + } + + return true; + }; + + path_search (pattern, entry, match, start); + return r; } } -- cgit v1.1