aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-03-10 00:14:00 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-03-13 14:45:32 +0300
commitdc2c25e3f3182e8181a15487de4befca74a1ffec (patch)
treeb83cfb83188abd2c361f9eb839cc3600fb1f8f44
parent870da718e38555352343a46ce02fb46d5eb3a365 (diff)
Add flexibility to path search callback function
-rw-r--r--butl/filesystem29
-rw-r--r--butl/filesystem.cxx115
-rw-r--r--tests/wildcard/driver.cxx32
-rw-r--r--tests/wildcard/testscript178
4 files changed, 315 insertions, 39 deletions
diff --git a/butl/filesystem b/butl/filesystem
index 5a8eb91..336233c 100644
--- a/butl/filesystem
+++ b/butl/filesystem
@@ -460,21 +460,30 @@ namespace butl
// As an example, consider pattern f*/bar/b*/*.txt and path
// foo/bar/baz/x.txt. The sequence of calls in this case will be:
//
- // (foo/, f*/, true)
- // (foo/bar/baz/, b*/, true)
+ // (foo/, f*/, true)
+ // (foo/bar/baz/, b*/, true)
// (foo/bar/baz/x.txt, *.txt, false)
//
+ // If the pattern contains a recursive wildcard, then the callback function
+ // can be called for the same directory twice: first time as an intermediate
+ // match with */ pattern to decide if to recursively traverse the directory,
+ // and the second time if the directory matches the pattern component (either
+ // as an intermediate or a final match). As an example, consider pattern
+ // b**/c* and directory tree a/b/c/. The sequence of calls in this case will
+ // be:
+ //
+ // (a/, */, true)
+ // (a/b/, */ true)
+ // (a/b/c/, */, true)
+ // (a/b/, b*/, true)
+ // (a/b/c/, c*/, false)
+ //
LIBBUTL_EXPORT void
path_search (const path& pattern,
- const std::function<bool (path&&)>&,
+ const std::function<bool (path&&,
+ const std::string& pattern,
+ bool interm)>&,
const dir_path& start = dir_path ());
- /*
- LIBBUTL_EXPORT void
- path_search (
- const path& pattern,
- const std::function<bool (path&&, const string& pattern, bool interm)>&,
- const dir_path& start = dir_path ());
- */
}
#include <butl/filesystem.ixx>
diff --git a/butl/filesystem.cxx b/butl/filesystem.cxx
index 5135281..da832a9 100644
--- a/butl/filesystem.cxx
+++ b/butl/filesystem.cxx
@@ -931,16 +931,28 @@ namespace butl
// sense for the cleanup use cases (@@ maybe this should be controllable
// since for directory creation it won't make sense).
//
+ // Prior to recursively opening a directory for iterating the preopen
+ // callback function is called. If false is returned, then the directory is
+ // not traversed but still returned by the next() call.
+ //
// Note that iterating over non-existent directory is not en error. The
// subsequent next() call returns false for such a directory.
//
+ using preopen = std::function<bool (const dir_path&)>;
+
class recursive_dir_iterator
{
public:
- recursive_dir_iterator (dir_path p, bool recursive, bool self)
- : recursive_ (recursive), self_ (self), start_ (p)
+ recursive_dir_iterator (dir_path p,
+ bool recursive,
+ bool self,
+ preopen po)
+ : start_ (move (p)),
+ recursive_ (recursive),
+ self_ (self),
+ preopen_ (move (po))
{
- open (dir_path ());
+ open (dir_path (), self_);
}
// Non-copyable, non-movable type.
@@ -993,7 +1005,7 @@ namespace butl
if (recursive_ && pe.to_directory ())
{
- open (path_cast<dir_path> (move (pe)));
+ open (path_cast<dir_path> (move (pe)), true);
return next (p);
}
@@ -1003,7 +1015,7 @@ namespace butl
private:
void
- open (dir_path p)
+ open (dir_path p, bool preopen)
{
// We should consider a racing condition here. The directory can be
// removed before we create an iterator for it. In this case we just do
@@ -1011,8 +1023,17 @@ namespace butl
//
try
{
- dir_path d (start_ / p);
- dir_iterator i (!d.empty () ? d : dir_path ("."));
+ // If preopen_() returns false, then the directory will not be
+ // traversed (as we leave iterator with end semantics) but still be
+ // returned by the next() call as a sub-entry.
+ //
+ dir_iterator i;
+ if (!preopen || preopen_ (p))
+ {
+ dir_path d (start_ / p);
+ i = dir_iterator (!d.empty () ? d : dir_path ("."));
+ }
+
iters_.emplace_back (move (i), move (p));
}
catch (const system_error& e)
@@ -1028,9 +1049,10 @@ namespace butl
}
private:
+ dir_path start_;
bool recursive_;
bool self_;
- dir_path start_;
+ preopen preopen_;
small_vector<pair<dir_iterator, dir_path>, 1> iters_;
};
@@ -1038,9 +1060,14 @@ namespace butl
// each matching path. Return false if the underlying func() call returns
// false. Otherwise the function conforms to the path_search() description.
//
+ static const string any_dir ("*/");
+
static bool
- search (path pattern, dir_path pattern_dir, const dir_path start_dir,
- const function<bool (path&&)>& func)
+ search (
+ path pattern,
+ dir_path pattern_dir,
+ const dir_path start_dir,
+ const function<bool (path&&, const string& pattern, bool interm)>& func)
{
// Fast-forward the leftmost pattern non-wildcard components. So, for
// example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/.
@@ -1062,7 +1089,7 @@ namespace butl
if (pe.first &&
((pe.second == entry_type::directory) == p.to_directory ()))
- return func (move (p));
+ return func (move (p), string (), false);
return true;
}
@@ -1088,11 +1115,37 @@ namespace butl
//
bool simple (pattern.simple ());
+ // Note that we rely on "small function object" optimization here.
+ //
recursive_dir_iterator i (
start_dir / pattern_dir,
- pcr.find ("**") != string::npos, // Recursive.
- pcr.find ("***") != string::npos); // Self-inclusive.
+ pcr.find ("**") != string::npos, // Recursive.
+ pcr.find ("***") != string::npos, // Self-inclusive.
+ [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen.
+ {
+ return func (pattern_dir / p, any_dir, true);
+ });
+ // Canonicalize the pattern component collapsing consecutive stars (used to
+ // express that it is recursive) into a single one.
+ //
+ size_t j (0);
+ size_t n (pcr.size ());
+ for (size_t i (0); i < n; ++i)
+ {
+ char c (pcr[i]);
+ if (!(c == '*' && i > 0 && pcr[i - 1] == '*'))
+ pcr[j++] = c;
+ }
+
+ if (j != n)
+ pcr.resize (j);
+
+ // Note that the callback function can be called for the same directory
+ // twice: first time as intermediate match from iterator's preopen() call,
+ // and then, if the first call succeed, from the iterating loop (possibly
+ // as the final match).
+ //
path p;
while (i.next (p))
{
@@ -1122,17 +1175,26 @@ namespace butl
if (!path_match (pcr, se.leaf ().representation ()))
continue;
- // If the pattern is a simple path then call func() for the sub-entry.
- // Otherwise the sub-entry is a directory (read above), and we search in
- // it using the trailing part of the pattern.
+ // If the callback function returns false, then we stop the entire search
+ // for the final match, or do not search below the path for the
+ // intermediate one.
+ //
+ if (!func (pattern_dir / p, pcr, !simple))
+ {
+ if (simple) // Final match.
+ return false;
+ else
+ continue;
+ }
+
+ // If the pattern is not a simple one, and it's leftmost component
+ // matches the sub-entry, then the sub-entry is a directory (see the note
+ // above), and we search in it using the trailing part of the pattern.
//
- if (!(
- simple
- ? func (pattern_dir / p)
- : search (pattern.leaf (pc),
- pattern_dir / path_cast<dir_path> (move (p)),
- start_dir,
- func)))
+ if (!simple && !search (pattern.leaf (pc),
+ pattern_dir / path_cast<dir_path> (move (p)),
+ start_dir,
+ func))
return false;
}
@@ -1140,9 +1202,10 @@ namespace butl
}
void
- path_search (const path& pattern,
- const function<bool (path&&)>& func,
- const dir_path& start)
+ path_search (
+ const path& pattern,
+ const function<bool (path&&, const string& pattern, bool interm)>& func,
+ const dir_path& start)
{
search (pattern,
dir_path (),
diff --git a/tests/wildcard/driver.cxx b/tests/wildcard/driver.cxx
index b3aae62..5744969 100644
--- a/tests/wildcard/driver.cxx
+++ b/tests/wildcard/driver.cxx
@@ -39,7 +39,10 @@ int _CRT_glob = 0;
// Search for paths matching the pattern in the directory specified (absent
// directory means the current one). Print the matching canonicalized paths
// to STDOUT in the ascending order. Succeed if at least one matching path
-// is found. Note that this option must go first in the command line,
+// is found. Note that this option must go first in the command line.
+//
+// Also note that the driver excludes from search file system entries which
+// names start from dot, unless the pattern explicitly matches them.
//
// -n
// Do not sort paths found.
@@ -87,9 +90,32 @@ try
assert (i == argc); // All args parsed,
vector<path> paths;
- auto add = [&paths] (path&& p) -> bool
+ auto add =
+ [&paths, &start] (path&& p, const std::string& pt, bool interim) -> bool
{
- paths.emplace_back (move (p.canonicalize ()));
+ bool pd (!pt.empty () && pt[0] == '.'); // Dot-started pattern.
+
+ const path& fp (!p.empty ()
+ ? p
+ : path_cast<path> (!start.empty ()
+ ? start
+ : path::current_directory ()));
+
+ const string& s (fp.leaf ().string ());
+ assert (!s.empty ());
+
+ bool ld (s[0] == '.'); // Dot-started leaf.
+
+ // Skip dot-started names if pattern is not dot-started.
+ //
+ bool skip (ld && !pd);
+
+ if (interim)
+ return !skip;
+
+ if (!skip)
+ paths.emplace_back (move (p.canonicalize ()));
+
return true;
};
diff --git a/tests/wildcard/testscript b/tests/wildcard/testscript
index 4a6c532..48159e2 100644
--- a/tests/wildcard/testscript
+++ b/tests/wildcard/testscript
@@ -388,4 +388,182 @@
}
}
}
+
+ : dot-started
+ :
+ {
+ +mkdir -p z/.z/.z z/z a/.z .a/.z
+ +touch z/.z.cxx z/z.cxx z/.z/.z.cxx z/.z/z.cxx z/z/.z.cxx z/z/z.cxx \
+ a/z.cxx a/.z.cxx .a/z.cxx .a/.z.cxx
+
+ wd=../../..
+
+ : recursive
+ :
+ {
+ : simple
+ :
+ {
+ : file
+ :
+ $* *z**.cxx $wd >>/EOO
+ a/z.cxx
+ z/z.cxx
+ z/z/z.cxx
+ EOO
+
+ : dot-leading-file
+ :
+ $* .z**.cxx $wd >>/EOO
+ a/.z.cxx
+ z/.z.cxx
+ z/z/.z.cxx
+ EOO
+
+ : dir
+ :
+ $* **z/ $wd >>/EOO
+ z/
+ z/z/
+ EOO
+
+ : dot-leading-dir
+ :
+ $* .**z/ $wd >>/EOO
+ a/.z/
+ z/.z/
+ EOO
+ }
+
+ : z-compound
+ :
+ {
+ : not-dot-leading
+ :
+ $* **z/*z.cxx $wd >>/EOO
+ z/z.cxx
+ z/z/z.cxx
+ EOO
+
+ : dot-leading
+ :
+ $* .z**/*z.cxx $wd >>/EOO
+ z/.z/z.cxx
+ EOO
+ }
+
+ : compound
+ :
+ {
+ : not-dot-leading
+ :
+ $* **/*z.cxx $wd >>/EOO
+ a/z.cxx
+ z/z.cxx
+ z/z/z.cxx
+ EOO
+
+ : dot-leading
+ :
+ $* .**/*z.cxx $wd >>/EOO
+ .a/z.cxx
+ z/.z/z.cxx
+ EOO
+ }
+
+ : self
+ :
+ {
+ : not-dot-leading
+ :
+ $* *z***/*z.cxx $wd/z >>/EOO
+ z.cxx
+ z/z.cxx
+ EOO
+
+ : dot-leading
+ :
+ $* .z***/*z.cxx $wd/z >>/EOO
+ .z/z.cxx
+ EOO
+ }
+ }
+
+ : immediate
+ :
+ {
+ : simple
+ :
+ {
+ : file
+ :
+ $* *z*.cxx $wd/z >>/EOO
+ z.cxx
+ EOO
+
+ : dot-leading-file
+ :
+ $* .z*.cxx $wd/z >>/EOO
+ .z.cxx
+ EOO
+
+ : file-dot-leading-start
+ :
+ $* *z*.cxx $wd/z/.z >>/EOO
+ z.cxx
+ EOO
+
+ : dot-leading-file-dot-leading-start
+ :
+ $* .z*.cxx $wd/z/.z >>/EOO
+ .z.cxx
+ EOO
+
+ : dir
+ :
+ $* *z/ $wd/ >>/EOO
+ z/
+ EOO
+
+ : dot-leading-dir
+ :
+ $* .*z/ $wd/z >>/EOO
+ .z/
+ EOO
+ }
+
+ : z-compound
+ :
+ {
+ : not-dot-leading
+ :
+ $* *z/*z.cxx $wd/z >>/EOO
+ z/z.cxx
+ EOO
+
+ : dot-leading
+ :
+ $* .z*/*z.cxx $wd/z >>/EOO
+ .z/z.cxx
+ EOO
+ }
+
+ : compound
+ :
+ {
+ : not-dot-leading
+ :
+ $* */*z.cxx $wd >>/EOO
+ a/z.cxx
+ z/z.cxx
+ EOO
+
+ : dot-leading
+ :
+ $* .*/*z.cxx $wd >>/EOO
+ .a/z.cxx
+ EOO
+ }
+ }
+ }
}