From e6cee3c2f9b03852ed4837f9be05e0a2fa4542a8 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 30 Sep 2019 13:48:28 +0300 Subject: Move path match to path-pattern.?xx --- libbutl/filesystem.cxx | 410 +------------------------------------------------ 1 file changed, 2 insertions(+), 408 deletions(-) (limited to 'libbutl/filesystem.cxx') diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx index 832e1b6..b66dcfe 100644 --- a/libbutl/filesystem.cxx +++ b/libbutl/filesystem.cxx @@ -62,9 +62,10 @@ import std.core; #endif import butl.path; import butl.timestamp; +import butl.path_pattern; #endif -import butl.utility; // throw_generic_error(), lcase()[_WIN32] +import butl.utility; // throw_generic_error() import butl.fdstream; import butl.small_vector; #else @@ -1557,303 +1558,6 @@ namespace butl } #endif - // patterns - // - static inline bool - match (char c, char pc) - { -#ifndef _WIN32 - return c == pc; -#else - return lcase (c) == lcase (pc); -#endif - } - - bool - match_bracket (char c, const path_pattern_term& pt) - { - using iterator = string::const_iterator; - - assert (pt.bracket ()); - - iterator i (pt.begin + 1); // Position after '['. - iterator e (pt.end - 1); // Position at ']'. - - bool invert (*i == '!'); - if (invert) - ++i; - - bool r (false); - for (iterator b (i); i != e && !r; ++i) - { - char bc (*i); - - // If '-' is a first or last character in the bracket expression then - // match it literally and match the range otherwise. - // - if (bc == '-' && i != b && i + 1 != e) // Match the range? - { - // Note that we have already matched the range left endpoint character - // unsuccessfully (otherwise we wouldn't be here), so now we test if - // the character belongs to the (min-char, max-char] range. - // - // Also note that on Windows we match case insensitively and so can't - // just compare the character with the range endpoints. Thus, we - // fallback to matching each range character individually. - // -#ifndef _WIN32 - r = c > *(i - 1) && c <= *(i + 1); -#else - for (char bc (*(i - 1) + 1), mx (*(i + 1)); bc <= mx && !r; ++bc) - r = match (c, bc); -#endif - - ++i; // Position to the range max character. - } - else // Match against the expression character literally. - r = match (c, bc); - } - - return r != invert; - } - - // Match the name [ni, ne) to the pattern [pi, pe) that may not contain - // bracket expressions. Ranges can be empty. - // - static bool - match_no_brackets (string::const_iterator pi, string::const_iterator pe, - string::const_iterator ni, string::const_iterator ne) - { - using reverse_iterator = std::reverse_iterator; - - reverse_iterator rpi (pe); - reverse_iterator rpe (pi); - - reverse_iterator rni (ne); - reverse_iterator rne (ni); - - // Match the pattern suffix (follows the last *) to the name trailing - // characters. - // - char pc ('\0'); - for (; rpi != rpe && (pc = *rpi) != '*' && rni != rne; ++rpi, ++rni) - { - if (!match (*rni, pc) && pc != '?') - return false; - } - - // If we got to the (reversed) end of the pattern (no * is encountered) - // than we are done. The success depends on if we got to the (reversed) end - // of the name as well. - // - if (rpi == rpe) - return rni == rne; - - // If we didn't reach * in the pattern then we reached the (reversed) end - // of the name. That means we have unmatched non-star terms in the - // pattern, and so match failed. - // - if (pc != '*') - { - assert (rni == rne); - return false; - } - - // Match the pattern prefix (ends with the first *) to the name leading - // characters. If they mismatch we failed. Otherwise if this is an only * - // in the pattern (matches whatever is left in the name) then we succeed, - // otherwise we perform backtracking (recursively). - // - pe = rpi.base (); - ne = rni.base (); - - // Compare the pattern and the name term by char until the name suffix or - // * is encountered in the pattern (whichever happens first). Fail if a - // char mismatches. - // - for (; (pc = *pi) != '*' && ni != ne; ++pi, ++ni) - { - if (!match (*ni, pc) && pc != '?') - return false; - } - - // If we didn't get to * in the pattern then we got to the name suffix. - // That means that the pattern has unmatched non-star terms, and so match - // failed. - // - if (pc != '*') - { - assert (ni == ne); - return false; - } - - // If * that we have reached is the last one, then it matches whatever is - // left in the name (including an empty range). - // - if (++pi == pe) - return true; - - // Perform backtracking. - // - // From now on, we will call the pattern not-yet-matched part (starting - // the leftmost * and ending the rightmost one inclusively) as pattern, and - // the name not-yet-matched part as name. - // - // Here we sequentially assume that * that starts the pattern matches the - // name leading part (staring from an empty one and iterating till the full - // name). So if, at some iteration, the pattern trailing part (that follows - // the leftmost *) matches the name trailing part, then the pattern matches - // the name. - // - bool r; - for (; !(r = match_no_brackets (pi, pe, ni, ne)) && ni != ne; ++ni) ; - return r; - } - - // Match a character against the pattern term. - // - static inline bool - match (char c, const path_pattern_term& pt) - { - switch (pt.type) - { - // Matches any character. - // - case path_pattern_term_type::star: - case path_pattern_term_type::question: return true; - - case path_pattern_term_type::bracket: - { - return match_bracket (c, pt); - } - - case path_pattern_term_type::literal: - { - return match (c, get_literal (pt)); - } - } - - assert (false); // Can't be here. - return false; - } - - // Match the name [ni, ne) to the pattern [pi, pe). Ranges can be empty. - // - static bool - match (string::const_iterator pi, string::const_iterator pe, - string::const_iterator ni, string::const_iterator ne) - { - // If the pattern doesn't contain the bracket expressions then reduce to - // the "clever" approach (see the implementation notes below for details). - // - if (find (pi, pe, '[') == pe) - return match_no_brackets (pi, pe, ni, ne); - - // Match the pattern prefix (precedes the first *) to the name leading - // characters. - // - path_pattern_iterator ppi (pi, pe); - path_pattern_iterator ppe; - path_pattern_term pt; - - for (; ppi != ppe && !(pt = *ppi).star () && ni != ne; ++ppi, ++ni) - { - if (!match (*ni, pt)) - return false; - } - - // If we got to the end of the pattern (no * is encountered) than we are - // done. The success depends on if we got to the end of the name as well. - // - if (ppi == ppe) - return ni == ne; - - // If we didn't reach * in the pattern then we reached the end of the - // name. That means we have unmatched non-star terms in the pattern, and - // so match failed. - // - if (!pt.star ()) - { - assert (ni == ne); - return false; - } - - // If * that we have reached is the last term, then it matches whatever is - // left in the name (including an empty range). - // - if (++ppi == ppe) - return true; - - // Switch back to the string iterator and perform backtracking. - // - // From now on, we will call the pattern not-yet-matched part (starting - // the leftmost *) as pattern, and the name not-yet-matched part as name. - // - // Here we sequentially assume that * that starts the pattern matches the - // name leading part (see match_no_brackets() for details). - // - bool r; - for (pi = ppi->begin; !(r = match (pi, pe, ni, ne)) && ni != ne; ++ni) ; - return r; - } - - bool - path_match (const string& name, const string& pattern) - { - // Implementation notes: - // - // - This has a good potential of becoming hairy quickly so need to strive - // for an elegant way to implement this. - // - // - Most patterns will contains a single * wildcard with a prefix and/or - // suffix (e.g., *.txt, foo*, f*.txt). Something like this is not very - // common: *foo*. - // - // So it would be nice to have a clever implementation that first - // "anchors" itself with a literal prefix and/or suffix and only then - // continue with backtracking. In other words, reduce: - // - // *.txt vs foo.txt -> * vs foo - // foo* vs foo.txt -> * vs .txt - // f*.txt vs foo.txt -> * vs oo - // - // Note that this approach fails if the pattern may contain bracket - // expressions. You can't easily recognize a suffix scanning backwards - // since * semantics depends on the characters to the left: - // - // f[o*]o - * is not a wildcard - // fo*]o - * is a wildcard - // - // That's why we will start with the straightforward left-to-right - // matching and reduce to the "clever" approach when the remaining part - // of the pattern doesn't contain bracket expressions. - - auto pi (pattern.rbegin ()); - auto pe (pattern.rend ()); - - auto ni (name.rbegin ()); - auto ne (name.rend ()); - - // The name doesn't match the pattern if it is of a different type than the - // pattern is. - // - bool pd (pi != pe && path::traits_type::is_separator (*pi)); - bool nd (ni != ne && path::traits_type::is_separator (*ni)); - - if (pd != nd) - return false; - - // Skip trailing separators if present. - // - if (pd) - { - ++pi; - ++ni; - } - - return match (pattern.begin (), pi.base (), name.begin (), ni.base ()); - } - // Search for paths matching the pattern and call the specified function for // each matching path. Return false if the underlying func() call returns // false. Otherwise the function conforms to the path_search() description. @@ -2467,114 +2171,4 @@ namespace butl path_filesystem fs (start, entry); search (pattern, dir_path (), flags, func, fs); } - - bool - path_match (const path& entry, - const path& pattern, - const dir_path& start, - path_match_flags flags) - { - bool r (false); - - auto match = [&entry, &r] (path&& p, const string&, bool interim) - { - // If we found the entry (possibly through one of the recursive - // components) no need to search further. - // - if (p == entry && !interim) - { - r = true; - return false; - } - - return true; - }; - - path_search (pattern, entry, match, start, flags); - return r; - } - - // path_pattern_iterator - // - void path_pattern_iterator:: - next () - { - if (i_ == e_) - { - t_ = nullopt; // Convert the object into the end iterator. - return; - } - - auto next = [this] (path_pattern_term_type t) - { - assert (t != path_pattern_term_type::bracket); - - t_ = path_pattern_term {t, i_, i_ + 1}; - ++i_; - }; - - switch (*i_) - { - case '?': - { - next (path_pattern_term_type::question); - break; - } - case '*': - { - next (path_pattern_term_type::star); - break; - } - case '[': - { - // Try to find the bracket expression end. - // - // Note that '[' doesn't necessarily starts the bracket expression (no - // closing bracket, empty, etc). If that's the case, then we end up - // with the '[' literal terminal. - // - bool expr (false); - for (;;) // Breakout loop. - { - string::const_iterator i (i_ + 1); // Position after '['. - - if (i == e_) // Is '[' the pattern last character? - break; - - bool invert (*i == '!'); - if (invert && ++i == e_) // Is '!' the pattern last character? - break; - - // Find the bracket expression end. - // - // Note that the bracket expression may not be empty and ']' is a - // literal if it is the first expression character. - // - for (++i; i != e_ && *i != ']'; ++i) ; - - if (i == e_) // The closing bracket is not found? - break; - - expr = true; - - ++i; // Position after ']'. - - t_ = path_pattern_term {path_pattern_term_type::bracket, i_, i}; - - i_ = i; - break; - } - - // Fallback to '[' literal if it is not a bracket expression. - // - if (expr) - break; - } - // Fall through. - default: - { - next (path_pattern_term_type::literal); - } - } - } } -- cgit v1.1