From abfee51c362cb1ed2e8eb62fec12b3eb5ca03fb0 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 23 Nov 2018 00:23:23 +0300 Subject: Add match_absent flag for path_{search,match}() functions --- libbutl/filesystem.cxx | 54 ++++++++++++++++++++++++++----- libbutl/filesystem.ixx | 26 +++++++++++++++ libbutl/filesystem.mxx | 61 +++++++++++++++++++++++++---------- tests/wildcard/driver.cxx | 29 +++++++++++------ tests/wildcard/testscript | 81 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 217 insertions(+), 34 deletions(-) diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx index e87669d..736426b 100644 --- a/libbutl/filesystem.cxx +++ b/libbutl/filesystem.cxx @@ -1594,10 +1594,13 @@ namespace butl search ( path pattern, dir_path pattern_dir, - bool follow_symlinks, + path_match_flags fl, const function& func, FS& filesystem) { + bool follow_symlinks ((fl & path_match_flags::follow_symlinks) != + path_match_flags::none); + // Fast-forward the leftmost pattern non-wildcard components. So, for // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. // @@ -1724,12 +1727,43 @@ namespace butl // if (!simple && !search (pattern.leaf (pc), pattern_dir / path_cast (move (p)), - follow_symlinks, + fl, func, filesystem)) return false; } + // If requested, also search with the absent-matching pattern path + // component omitted, unless this is the only pattern component. + // + if ((fl & path_match_flags::match_absent) != path_match_flags::none && + pc.string ().find_first_not_of ('*') == string::npos && + (!pattern_dir.empty () || !simple)) + { + // Stripping the (leading) absent-matching pattern component and calling + // search() with the resulting pattern and the same pattern dir works in + // most cases, except for a simple pattern. In the latter case, the + // pattern becomes empty and its type information gets lost. In other + // words, the patterns a/b/*/ and a/b/* become indistinguishable. Thus, + // for such a corner case we will strip the leaf from the pattern dir + // and use it as a pattern, stripping the trailing separator, if + // required. So for the above examples the search() calls will be as + // follows: + // + // search(b/, a/) + // search(b, a/) + // + const dir_path& d (!simple ? pattern_dir : pattern_dir.directory ()); + + const path& p ( + !simple ? pattern.leaf (pc) : + pattern.to_directory () ? pattern_dir.leaf () : + path (pattern_dir.leaf ().string ())); // Strip the trailing separator. + + if (!search (p, d, fl, func, filesystem)) + return false; + } + return true; } @@ -1945,10 +1979,10 @@ namespace butl const path& pattern, const function& func, const dir_path& start, - bool follow_symlinks) + path_match_flags flags) { real_filesystem fs (pattern.relative () ? start : empty_dir); - search (pattern, dir_path (), follow_symlinks, func, fs); + search (pattern, dir_path (), flags, func, fs); } // Search path in the directory tree represented by a path. @@ -2161,14 +2195,18 @@ namespace butl const path& pattern, const path& entry, const function& func, - const dir_path& start) + const dir_path& start, + path_match_flags flags) { path_filesystem fs (start, entry); - search (pattern, dir_path (), true, func, fs); + search (pattern, dir_path (), flags, func, fs); } bool - path_match (const path& pattern, const path& entry, const dir_path& start) + path_match (const path& pattern, + const path& entry, + const dir_path& start, + path_match_flags flags) { bool r (false); @@ -2186,7 +2224,7 @@ namespace butl return true; }; - path_search (pattern, entry, match, start); + path_search (pattern, entry, match, start, flags); return r; } } diff --git a/libbutl/filesystem.ixx b/libbutl/filesystem.ixx index 97891d5..866d603 100644 --- a/libbutl/filesystem.ixx +++ b/libbutl/filesystem.ixx @@ -105,6 +105,32 @@ namespace butl static_cast (y)); } + // path_match_flags + // + inline path_match_flags operator& (path_match_flags x, path_match_flags y) + { + return x &= y; + } + + inline path_match_flags operator| (path_match_flags x, path_match_flags y) + { + return x |= y; + } + + inline path_match_flags operator&= (path_match_flags& x, path_match_flags y) + { + return x = static_cast ( + static_cast (x) & + static_cast (y)); + } + + inline path_match_flags operator|= (path_match_flags& x, path_match_flags y) + { + return x = static_cast ( + static_cast (x) | + static_cast (y)); + } + // dir_entry // inline entry_type dir_entry:: diff --git a/libbutl/filesystem.mxx b/libbutl/filesystem.mxx index 2a02c3a..f05e569 100644 --- a/libbutl/filesystem.mxx +++ b/libbutl/filesystem.mxx @@ -698,6 +698,35 @@ LIBBUTL_MODEXPORT namespace butl // * - match any number of characters (including zero) // ? - match any single character + // Path match/search flags. + // + enum class path_match_flags: std::uint16_t + { + // Follow symlinks. This only applies to symlinks that are matched against + // the rightmost component of the pattern. In particular, this mean that + // such symlinks will never match a directory pattern and some results can + // be missing for the recursive rightmost component. + // + follow_symlinks = 0x1, + + // Make wildcard-only pattern component (e.g., `*/...`, `.../*/...`, or + // `.../*`) match absent path component. For example, with this flag + // set, the `a/*/b` pattern matches not only `a/x/b` path, but also `a/b`. + // + // Note that this does not apply to single-component patterns and the + // pattern type is always preserved. In particular, the `a/b/*` pattern + // matches `a/b` but not `a/b/`. + // + match_absent = 0x2, + + none = 0 + }; + + inline path_match_flags operator& (path_match_flags, path_match_flags); + inline path_match_flags operator| (path_match_flags, path_match_flags); + inline path_match_flags operator&= (path_match_flags&, path_match_flags); + inline path_match_flags operator|= (path_match_flags&, path_match_flags); + // Return true if name matches pattern. Both must be single path components, // possibly with a trailing directory separator to indicate a directory. // @@ -714,10 +743,14 @@ LIBBUTL_MODEXPORT namespace butl // start directory is used if the first pattern component is a self-matching // wildcard (see below for the start directory and wildcard semantics). // + // In addition to the wildcard characters, it also recognizes the ** and *** + // wildcard sequences (see path_search() for details). + // LIBBUTL_SYMEXPORT bool path_match (const path& pattern, const path& entry, - const dir_path& start = dir_path ()); + const dir_path& start = dir_path (), + path_match_flags = path_match_flags::none); // Search for paths matching the pattern calling the specified function for // each matching path (see below for details). @@ -730,14 +763,13 @@ LIBBUTL_MODEXPORT namespace butl // The pattern may contain multiple components that include wildcards. On // Windows the drive letter may not be a wildcard. // - // In addition to the wildcard characters listed in path_match(), - // path_search() also recognizes the ** and *** wildcard sequences. If a - // path component contains **, then it is matched just like * but in all the - // subdirectories, recursively. The *** wildcard behaves like ** but also - // matches the start directory itself. Note that if the first pattern - // component contains ***, then the start directory must be empty or be - // terminated with a "meaningful" component (e.g., probably not '.' or - // '..'). + // In addition to the wildcard characters, path_search() also recognizes the + // ** and *** wildcard sequences. If a path component contains **, then it + // is matched just like * but in all the subdirectories, recursively. The + // *** wildcard behaves like ** but also matches the start directory itself. + // Note that if the first pattern component contains ***, then the start + // directory must be empty or be terminated with a "meaningful" component + // (e.g., probably not '.' or '..'). // // So, for example, foo/bar-**.txt will return all the files matching the // bar-*.txt pattern in all the subdirectoris of foo/. And foo/f***/ will @@ -783,12 +815,6 @@ LIBBUTL_MODEXPORT namespace butl // (a/b/, b*/, true) // (a/b/c/, c*/, false) // - // Symlinks are not followed if the follow_symlinks argument is false. This - // rule is only applied for symlinks that are matched against the rightmost - // component of the pattern. In particular, this mean that such symlinks will - // never match a directory pattern, and some results can be missing for the - // recursive rightmost component. - // // Note that recursive iterating through directories currently goes // depth-first which make sense for the cleanup use cases. In future we may // want to make it controllable. @@ -799,7 +825,7 @@ LIBBUTL_MODEXPORT namespace butl const std::string& pattern, bool interm)>&, const dir_path& start = dir_path (), - bool follow_symlinks = true); + path_match_flags = path_match_flags::follow_symlinks); // Same as above, but behaves as if the directory tree being searched // through contains only the specified entry. The start directory is used if @@ -816,7 +842,8 @@ LIBBUTL_MODEXPORT namespace butl const std::function&, - const dir_path& start = dir_path ()); + const dir_path& start = dir_path (), + path_match_flags = path_match_flags::none); } #include diff --git a/tests/wildcard/driver.cxx b/tests/wildcard/driver.cxx index cecee07..f5fb346 100644 --- a/tests/wildcard/driver.cxx +++ b/tests/wildcard/driver.cxx @@ -48,8 +48,8 @@ int _CRT_glob = 0; // Usages: // // argv[0] -mn -// argv[0] -sd [-n] [] -// argv[0] -sp [-n] [] +// argv[0] -sd [-i] [-n] [] +// argv[0] -sp [-i] [-n] [] // // Execute actions specified by the first option. Exit with code 0 if succeed, // 1 if fail, 2 on the underlying OS error (print error description to STDERR). @@ -73,6 +73,11 @@ int _CRT_glob = 0; // through contains only the specified entry. The start directory is used if // the first pattern component is a self-matching wildcard. // +// -i +// Pass psflags::ignorable_components to the match/search functions. +// Meaningful in combination with -sd or -sp options and must follow it, if +// specified in the command line. +// // -n // Do not sort paths found. Meaningful in combination with -sd or -sp // options and must follow it, if specified in the command line. @@ -100,12 +105,16 @@ try assert (argc >= (op == "-sd" ? 3 : 4)); bool sort (true); + path_match_flags flags (path_match_flags::follow_symlinks); + int i (2); for (; i != argc; ++i) { string o (argv[i]); if (o == "-n") sort = false; + else if (o == "-i") + flags |= path_match_flags::match_absent; else break; // End of options. } @@ -168,9 +177,9 @@ try }; if (!entry) - path_search (pattern, add, start); + path_search (pattern, add, start, flags); else - path_search (pattern, *entry, add, start); + path_search (pattern, *entry, add, start, flags); // It the search succeeds, then test search in the directory tree // represented by each matched path. Otherwise, if the directory tree is @@ -185,13 +194,15 @@ try // size_t match_count (0); - auto check = [&p, &match_count] (path&& pe, const string&, bool inter) + auto check = [&p, &match_count, flags] + (path&& pe, const string&, bool inter) { if (pe == p.first) { if (!inter) ++match_count; - else + else if ((flags & path_match_flags::match_absent) == + path_match_flags::none) // For self-matching the callback is first called in the interim // mode (through the preopen function) with an empty path. // @@ -201,16 +212,16 @@ try return true; }; - path_search (pattern, p.first, check, start); + path_search (pattern, p.first, check, start, flags); assert (match_count == p.second); // Test path match. // - assert (path_match (pattern, p.first, start)); + assert (path_match (pattern, p.first, start, flags)); } } else if (entry) - assert (!path_match (pattern, *entry, start)); + assert (!path_match (pattern, *entry, start, flags)); // Print the found paths. // diff --git a/tests/wildcard/testscript b/tests/wildcard/testscript index 0129809..885a7d5 100644 --- a/tests/wildcard/testscript +++ b/tests/wildcard/testscript @@ -1150,4 +1150,85 @@ } } } + + : ignorable-components + : + { + test.options += -i + + : middle + : + { + $* a/*/b a/b >/ a/b + $* a/*/b a/x/b >/ a/x/b + $* a/**/b a/b >/ a/b + $* a/**/b a/x/b >/ a/x/b + + $* a/***/b a/b >>/EOE + a/b + a/b + EOE + } + + : top-level + : + if ($cxx.target.class != 'windows') + { + $* -n /*/a /a > /a + $* -n /*/a /b/a > /b/a + } + + : leading + : + { + $* -n */a a >/ a + $* -n */a b/a >/ b/a + } + + : trailing + : + { + : file + : + { + $* -n a/* a >/ a + $* -n a/* a/b >/ a/b + + $* -n a/* a/ == 1 + $* -n a/* a/b/ == 1 + } + + : dir + : + { + $* -n a/*/ a/ >/ a/ + $* -n a/*/ a/b >/ a/ + + $* -n a/*/ a/b/ >>/EOE + a/b/ + a/ + EOE + + $* -n a/*/ a == 1 + } + } + + : leading-trailing + { + $* -n */* a >/ a + $* -n */* a/b >/ a/b + + $* -n */a/* a >/ a + $* -n */a/* a/b >/ a/b + $* -n */a/* b/a >/ b/a + $* -n */a/* c/a/b >/ c/a/b + + $* -n **/a/** a >/ a + $* -n **/a/** a/b/c/d >/ a/b/c/d + $* -n **/a/** d/c/b/a >/ d/c/b/a + $* -n **/a/** d/c/b/a/b/c/d >/ d/c/b/a/b/c/d + } + + + } } -- cgit v1.1