aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2018-11-23 00:23:23 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2018-11-30 17:13:11 +0300
commitabfee51c362cb1ed2e8eb62fec12b3eb5ca03fb0 (patch)
tree3f08e73ecc447a9d05d340eeab169affb9e517b6
parentb55143ecaa986aa3ba93dc6a078ed7d9cf495b1c (diff)
Add match_absent flag for path_{search,match}() functions
-rw-r--r--libbutl/filesystem.cxx54
-rw-r--r--libbutl/filesystem.ixx26
-rw-r--r--libbutl/filesystem.mxx61
-rw-r--r--tests/wildcard/driver.cxx29
-rw-r--r--tests/wildcard/testscript81
5 files changed, 217 insertions, 34 deletions
diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx
index e87669d..736426b 100644
--- a/libbutl/filesystem.cxx
+++ b/libbutl/filesystem.cxx
@@ -1594,10 +1594,13 @@ namespace butl
search (
path pattern,
dir_path pattern_dir,
- bool follow_symlinks,
+ path_match_flags fl,
const function<bool (path&&, const string& pattern, bool interm)>& func,
FS& filesystem)
{
+ bool follow_symlinks ((fl & path_match_flags::follow_symlinks) !=
+ path_match_flags::none);
+
// Fast-forward the leftmost pattern non-wildcard components. So, for
// example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/.
//
@@ -1724,12 +1727,43 @@ namespace butl
//
if (!simple && !search (pattern.leaf (pc),
pattern_dir / path_cast<dir_path> (move (p)),
- follow_symlinks,
+ fl,
func,
filesystem))
return false;
}
+ // If requested, also search with the absent-matching pattern path
+ // component omitted, unless this is the only pattern component.
+ //
+ if ((fl & path_match_flags::match_absent) != path_match_flags::none &&
+ pc.string ().find_first_not_of ('*') == string::npos &&
+ (!pattern_dir.empty () || !simple))
+ {
+ // Stripping the (leading) absent-matching pattern component and calling
+ // search() with the resulting pattern and the same pattern dir works in
+ // most cases, except for a simple pattern. In the latter case, the
+ // pattern becomes empty and its type information gets lost. In other
+ // words, the patterns a/b/*/ and a/b/* become indistinguishable. Thus,
+ // for such a corner case we will strip the leaf from the pattern dir
+ // and use it as a pattern, stripping the trailing separator, if
+ // required. So for the above examples the search() calls will be as
+ // follows:
+ //
+ // search(b/, a/)
+ // search(b, a/)
+ //
+ const dir_path& d (!simple ? pattern_dir : pattern_dir.directory ());
+
+ const path& p (
+ !simple ? pattern.leaf (pc) :
+ pattern.to_directory () ? pattern_dir.leaf () :
+ path (pattern_dir.leaf ().string ())); // Strip the trailing separator.
+
+ if (!search (p, d, fl, func, filesystem))
+ return false;
+ }
+
return true;
}
@@ -1945,10 +1979,10 @@ namespace butl
const path& pattern,
const function<bool (path&&, const string& pattern, bool interm)>& func,
const dir_path& start,
- bool follow_symlinks)
+ path_match_flags flags)
{
real_filesystem fs (pattern.relative () ? start : empty_dir);
- search (pattern, dir_path (), follow_symlinks, func, fs);
+ search (pattern, dir_path (), flags, func, fs);
}
// Search path in the directory tree represented by a path.
@@ -2161,14 +2195,18 @@ namespace butl
const path& pattern,
const path& entry,
const function<bool (path&&, const string& pattern, bool interm)>& func,
- const dir_path& start)
+ const dir_path& start,
+ path_match_flags flags)
{
path_filesystem fs (start, entry);
- search (pattern, dir_path (), true, func, fs);
+ search (pattern, dir_path (), flags, func, fs);
}
bool
- path_match (const path& pattern, const path& entry, const dir_path& start)
+ path_match (const path& pattern,
+ const path& entry,
+ const dir_path& start,
+ path_match_flags flags)
{
bool r (false);
@@ -2186,7 +2224,7 @@ namespace butl
return true;
};
- path_search (pattern, entry, match, start);
+ path_search (pattern, entry, match, start, flags);
return r;
}
}
diff --git a/libbutl/filesystem.ixx b/libbutl/filesystem.ixx
index 97891d5..866d603 100644
--- a/libbutl/filesystem.ixx
+++ b/libbutl/filesystem.ixx
@@ -105,6 +105,32 @@ namespace butl
static_cast<std::uint16_t> (y));
}
+ // path_match_flags
+ //
+ inline path_match_flags operator& (path_match_flags x, path_match_flags y)
+ {
+ return x &= y;
+ }
+
+ inline path_match_flags operator| (path_match_flags x, path_match_flags y)
+ {
+ return x |= y;
+ }
+
+ inline path_match_flags operator&= (path_match_flags& x, path_match_flags y)
+ {
+ return x = static_cast<path_match_flags> (
+ static_cast<std::uint16_t> (x) &
+ static_cast<std::uint16_t> (y));
+ }
+
+ inline path_match_flags operator|= (path_match_flags& x, path_match_flags y)
+ {
+ return x = static_cast<path_match_flags> (
+ static_cast<std::uint16_t> (x) |
+ static_cast<std::uint16_t> (y));
+ }
+
// dir_entry
//
inline entry_type dir_entry::
diff --git a/libbutl/filesystem.mxx b/libbutl/filesystem.mxx
index 2a02c3a..f05e569 100644
--- a/libbutl/filesystem.mxx
+++ b/libbutl/filesystem.mxx
@@ -698,6 +698,35 @@ LIBBUTL_MODEXPORT namespace butl
// * - match any number of characters (including zero)
// ? - match any single character
+ // Path match/search flags.
+ //
+ enum class path_match_flags: std::uint16_t
+ {
+ // Follow symlinks. This only applies to symlinks that are matched against
+ // the rightmost component of the pattern. In particular, this mean that
+ // such symlinks will never match a directory pattern and some results can
+ // be missing for the recursive rightmost component.
+ //
+ follow_symlinks = 0x1,
+
+ // Make wildcard-only pattern component (e.g., `*/...`, `.../*/...`, or
+ // `.../*`) match absent path component. For example, with this flag
+ // set, the `a/*/b` pattern matches not only `a/x/b` path, but also `a/b`.
+ //
+ // Note that this does not apply to single-component patterns and the
+ // pattern type is always preserved. In particular, the `a/b/*` pattern
+ // matches `a/b` but not `a/b/`.
+ //
+ match_absent = 0x2,
+
+ none = 0
+ };
+
+ inline path_match_flags operator& (path_match_flags, path_match_flags);
+ inline path_match_flags operator| (path_match_flags, path_match_flags);
+ inline path_match_flags operator&= (path_match_flags&, path_match_flags);
+ inline path_match_flags operator|= (path_match_flags&, path_match_flags);
+
// Return true if name matches pattern. Both must be single path components,
// possibly with a trailing directory separator to indicate a directory.
//
@@ -714,10 +743,14 @@ LIBBUTL_MODEXPORT namespace butl
// start directory is used if the first pattern component is a self-matching
// wildcard (see below for the start directory and wildcard semantics).
//
+ // In addition to the wildcard characters, it also recognizes the ** and ***
+ // wildcard sequences (see path_search() for details).
+ //
LIBBUTL_SYMEXPORT bool
path_match (const path& pattern,
const path& entry,
- const dir_path& start = dir_path ());
+ const dir_path& start = dir_path (),
+ path_match_flags = path_match_flags::none);
// Search for paths matching the pattern calling the specified function for
// each matching path (see below for details).
@@ -730,14 +763,13 @@ LIBBUTL_MODEXPORT namespace butl
// The pattern may contain multiple components that include wildcards. On
// Windows the drive letter may not be a wildcard.
//
- // In addition to the wildcard characters listed in path_match(),
- // path_search() also recognizes the ** and *** wildcard sequences. If a
- // path component contains **, then it is matched just like * but in all the
- // subdirectories, recursively. The *** wildcard behaves like ** but also
- // matches the start directory itself. Note that if the first pattern
- // component contains ***, then the start directory must be empty or be
- // terminated with a "meaningful" component (e.g., probably not '.' or
- // '..').
+ // In addition to the wildcard characters, path_search() also recognizes the
+ // ** and *** wildcard sequences. If a path component contains **, then it
+ // is matched just like * but in all the subdirectories, recursively. The
+ // *** wildcard behaves like ** but also matches the start directory itself.
+ // Note that if the first pattern component contains ***, then the start
+ // directory must be empty or be terminated with a "meaningful" component
+ // (e.g., probably not '.' or '..').
//
// So, for example, foo/bar-**.txt will return all the files matching the
// bar-*.txt pattern in all the subdirectoris of foo/. And foo/f***/ will
@@ -783,12 +815,6 @@ LIBBUTL_MODEXPORT namespace butl
// (a/b/, b*/, true)
// (a/b/c/, c*/, false)
//
- // Symlinks are not followed if the follow_symlinks argument is false. This
- // rule is only applied for symlinks that are matched against the rightmost
- // component of the pattern. In particular, this mean that such symlinks will
- // never match a directory pattern, and some results can be missing for the
- // recursive rightmost component.
- //
// Note that recursive iterating through directories currently goes
// depth-first which make sense for the cleanup use cases. In future we may
// want to make it controllable.
@@ -799,7 +825,7 @@ LIBBUTL_MODEXPORT namespace butl
const std::string& pattern,
bool interm)>&,
const dir_path& start = dir_path (),
- bool follow_symlinks = true);
+ path_match_flags = path_match_flags::follow_symlinks);
// Same as above, but behaves as if the directory tree being searched
// through contains only the specified entry. The start directory is used if
@@ -816,7 +842,8 @@ LIBBUTL_MODEXPORT namespace butl
const std::function<bool (path&&,
const std::string& pattern,
bool interm)>&,
- const dir_path& start = dir_path ());
+ const dir_path& start = dir_path (),
+ path_match_flags = path_match_flags::none);
}
#include <libbutl/filesystem.ixx>
diff --git a/tests/wildcard/driver.cxx b/tests/wildcard/driver.cxx
index cecee07..f5fb346 100644
--- a/tests/wildcard/driver.cxx
+++ b/tests/wildcard/driver.cxx
@@ -48,8 +48,8 @@ int _CRT_glob = 0;
// Usages:
//
// argv[0] -mn <pattern> <name>
-// argv[0] -sd [-n] <pattern> [<dir>]
-// argv[0] -sp [-n] <pattern> <path> [<dir>]
+// argv[0] -sd [-i] [-n] <pattern> [<dir>]
+// argv[0] -sp [-i] [-n] <pattern> <path> [<dir>]
//
// Execute actions specified by the first option. Exit with code 0 if succeed,
// 1 if fail, 2 on the underlying OS error (print error description to STDERR).
@@ -73,6 +73,11 @@ int _CRT_glob = 0;
// through contains only the specified entry. The start directory is used if
// the first pattern component is a self-matching wildcard.
//
+// -i
+// Pass psflags::ignorable_components to the match/search functions.
+// Meaningful in combination with -sd or -sp options and must follow it, if
+// specified in the command line.
+//
// -n
// Do not sort paths found. Meaningful in combination with -sd or -sp
// options and must follow it, if specified in the command line.
@@ -100,12 +105,16 @@ try
assert (argc >= (op == "-sd" ? 3 : 4));
bool sort (true);
+ path_match_flags flags (path_match_flags::follow_symlinks);
+
int i (2);
for (; i != argc; ++i)
{
string o (argv[i]);
if (o == "-n")
sort = false;
+ else if (o == "-i")
+ flags |= path_match_flags::match_absent;
else
break; // End of options.
}
@@ -168,9 +177,9 @@ try
};
if (!entry)
- path_search (pattern, add, start);
+ path_search (pattern, add, start, flags);
else
- path_search (pattern, *entry, add, start);
+ path_search (pattern, *entry, add, start, flags);
// It the search succeeds, then test search in the directory tree
// represented by each matched path. Otherwise, if the directory tree is
@@ -185,13 +194,15 @@ try
//
size_t match_count (0);
- auto check = [&p, &match_count] (path&& pe, const string&, bool inter)
+ auto check = [&p, &match_count, flags]
+ (path&& pe, const string&, bool inter)
{
if (pe == p.first)
{
if (!inter)
++match_count;
- else
+ else if ((flags & path_match_flags::match_absent) ==
+ path_match_flags::none)
// For self-matching the callback is first called in the interim
// mode (through the preopen function) with an empty path.
//
@@ -201,16 +212,16 @@ try
return true;
};
- path_search (pattern, p.first, check, start);
+ path_search (pattern, p.first, check, start, flags);
assert (match_count == p.second);
// Test path match.
//
- assert (path_match (pattern, p.first, start));
+ assert (path_match (pattern, p.first, start, flags));
}
}
else if (entry)
- assert (!path_match (pattern, *entry, start));
+ assert (!path_match (pattern, *entry, start, flags));
// Print the found paths.
//
diff --git a/tests/wildcard/testscript b/tests/wildcard/testscript
index 0129809..885a7d5 100644
--- a/tests/wildcard/testscript
+++ b/tests/wildcard/testscript
@@ -1150,4 +1150,85 @@
}
}
}
+
+ : ignorable-components
+ :
+ {
+ test.options += -i
+
+ : middle
+ :
+ {
+ $* a/*/b a/b >/ a/b
+ $* a/*/b a/x/b >/ a/x/b
+ $* a/**/b a/b >/ a/b
+ $* a/**/b a/x/b >/ a/x/b
+
+ $* a/***/b a/b >>/EOE
+ a/b
+ a/b
+ EOE
+ }
+
+ : top-level
+ :
+ if ($cxx.target.class != 'windows')
+ {
+ $* -n /*/a /a > /a
+ $* -n /*/a /b/a > /b/a
+ }
+
+ : leading
+ :
+ {
+ $* -n */a a >/ a
+ $* -n */a b/a >/ b/a
+ }
+
+ : trailing
+ :
+ {
+ : file
+ :
+ {
+ $* -n a/* a >/ a
+ $* -n a/* a/b >/ a/b
+
+ $* -n a/* a/ == 1
+ $* -n a/* a/b/ == 1
+ }
+
+ : dir
+ :
+ {
+ $* -n a/*/ a/ >/ a/
+ $* -n a/*/ a/b >/ a/
+
+ $* -n a/*/ a/b/ >>/EOE
+ a/b/
+ a/
+ EOE
+
+ $* -n a/*/ a == 1
+ }
+ }
+
+ : leading-trailing
+ {
+ $* -n */* a >/ a
+ $* -n */* a/b >/ a/b
+
+ $* -n */a/* a >/ a
+ $* -n */a/* a/b >/ a/b
+ $* -n */a/* b/a >/ b/a
+ $* -n */a/* c/a/b >/ c/a/b
+
+ $* -n **/a/** a >/ a
+ $* -n **/a/** a/b/c/d >/ a/b/c/d
+ $* -n **/a/** d/c/b/a >/ d/c/b/a
+ $* -n **/a/** d/c/b/a/b/c/d >/ d/c/b/a/b/c/d
+ }
+
+
+ }
}