// file : libbutl/filesystem.cxx -*- C++ -*- // copyright : Copyright (c) 2014-2017 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file #include #ifndef _WIN32 # include // rename() # include // struct dirent, *dir() # include // symlink(), link(), stat(), rmdir(), unlink() # include // utimes() # include // stat # include // stat(), lstat(), S_I*, mkdir(), chmod() #else # include # include // _find*(), _unlink(), _chmod() # include // _mkdir(), _rmdir() # include // _stat # include // _stat(), S_I* # ifdef _MSC_VER // Unlikely to be fixed in newer versions. # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) # define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) # endif # include // lcase() #endif #include // errno, E* #include #include #include // unique_ptr #include // pair #include // reverse_iterator #include #include #include // throw_generic_error() #include #include using namespace std; namespace butl { bool file_exists (const char* p, bool fl) { auto pe (path_entry (p, fl)); return pe.first && (pe.second == entry_type::regular || (!fl && pe.second == entry_type::symlink)); } bool entry_exists (const char* p, bool fl) { return path_entry (p, fl).first; } bool dir_exists (const char* p) { auto pe (path_entry (p, true)); return pe.first && pe.second == entry_type::directory; } #ifndef _WIN32 pair path_entry (const char* p, bool fl) { struct stat s; if ((fl ? stat (p, &s) : lstat (p, &s)) != 0) { if (errno == ENOENT || errno == ENOTDIR) return make_pair (false, entry_type::unknown); else throw_generic_error (errno); } auto m (s.st_mode); entry_type t (entry_type::unknown); if (S_ISREG (m)) t = entry_type::regular; else if (S_ISDIR (m)) t = entry_type::directory; else if (S_ISLNK (m)) t = entry_type::symlink; else if (S_ISBLK (m) || S_ISCHR (m) || S_ISFIFO (m) || S_ISSOCK (m)) t = entry_type::other; return make_pair (true, t); } #else pair path_entry (const char* p, bool) { // A path like 'C:', while being a root path in our terminology, is not as // such for Windows, that maintains current directory for each drive, and // so C: means the current directory on the drive C. This is not what we // mean here, so need to append the trailing directory separator in such a // case. // string d; if (path::traits::root (p, string::traits_type::length (p))) { d = p; d += path::traits::directory_separator; p = d.c_str (); } DWORD attr (GetFileAttributesA (p)); if (attr == INVALID_FILE_ATTRIBUTES) // Presumably not exists. return make_pair (false, entry_type::unknown); entry_type t (entry_type::unknown); // S_ISLNK/S_IFDIR are not defined for Win32 but it does have symlinks. // We will consider symlink entry to be of the unknown type. Note that // S_ISREG() and S_ISDIR() return as they would do for a symlink target. // if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) == 0) { struct _stat s; if (_stat (p, &s) != 0) { if (errno == ENOENT || errno == ENOTDIR) return make_pair (false, entry_type::unknown); else throw_generic_error (errno); } auto m (s.st_mode); if (S_ISREG (m)) t = entry_type::regular; else if (S_ISDIR (m)) t = entry_type::directory; // //else if (S_ISLNK (m)) // t = entry_type::symlink; } return make_pair (true, t); } #endif mkdir_status #ifndef _WIN32 try_mkdir (const dir_path& p, mode_t m) { if (mkdir (p.string ().c_str (), m) != 0) #else try_mkdir (const dir_path& p, mode_t) { if (_mkdir (p.string ().c_str ()) != 0) #endif { int e (errno); // EEXIST means the path already exists but not necessarily as // a directory. // if (e == EEXIST && dir_exists (p)) return mkdir_status::already_exists; else throw_generic_error (e); } return mkdir_status::success; } mkdir_status try_mkdir_p (const dir_path& p, mode_t m) { if (!p.root ()) { dir_path d (p.directory ()); if (!d.empty () && !dir_exists (d)) try_mkdir_p (d, m); } return try_mkdir (p, m); } rmdir_status try_rmdir (const dir_path& p, bool ignore_error) { rmdir_status r (rmdir_status::success); #ifndef _WIN32 if (rmdir (p.string ().c_str ()) != 0) #else if (_rmdir (p.string ().c_str ()) != 0) #endif { if (errno == ENOENT) r = rmdir_status::not_exist; else if (errno == ENOTEMPTY || errno == EEXIST) r = rmdir_status::not_empty; else if (!ignore_error) throw_generic_error (errno); } return r; } void rmdir_r (const dir_path& p, bool dir, bool ignore_error) { // An nftw()-based implementation (for platforms that support it) // might be a faster way. // for (const dir_entry& de: dir_iterator (p)) { path ep (p / de.path ()); //@@ Would be good to reuse the buffer. if (de.ltype () == entry_type::directory) rmdir_r (path_cast (move (ep)), true, ignore_error); else try_rmfile (ep, ignore_error); } if (dir) { rmdir_status r (try_rmdir (p)); if (r != rmdir_status::success && !ignore_error) throw_generic_error (r == rmdir_status::not_empty ? ENOTEMPTY : ENOENT); } } rmfile_status try_rmfile (const path& p, bool ignore_error) { rmfile_status r (rmfile_status::success); #ifndef _WIN32 if (unlink (p.string ().c_str ()) != 0) #else if (_unlink (p.string ().c_str ()) != 0) #endif { // Strangely on Linux unlink() removes a dangling symlink but returns // ENOENT. // if (errno == ENOENT || errno == ENOTDIR) r = rmfile_status::not_exist; else if (!ignore_error) throw_generic_error (errno); } return r; } #ifndef _WIN32 void mksymlink (const path& target, const path& link, bool) { if (symlink (target.string ().c_str (), link.string ().c_str ()) == -1) throw_generic_error (errno); } void mkhardlink (const path& target, const path& link, bool) { if (::link (target.string ().c_str (), link.string ().c_str ()) == -1) throw_generic_error (errno); } #else void mksymlink (const path&, const path&, bool) { throw_generic_error (ENOSYS, "symlinks not supported"); } void mkhardlink (const path& target, const path& link, bool dir) { if (!dir) { if (!CreateHardLinkA (link.string ().c_str (), target.string ().c_str (), nullptr)) throw_system_error (GetLastError ()); } else throw_generic_error (ENOSYS, "directory hard links not supported"); } #endif // For I/O operations cpfile() can throw ios_base::failure exception that is // not derived from system_error for old versions of g++ (as of 4.9). From // the other hand cpfile() must throw system_error only. Let's catch // ios_base::failure and rethrow as system_error in such a case. // template static inline typename enable_if::type cpfile (const path& from, const path& to, cpflags fl, permissions perm, auto_rmfile& rm) { ifdstream ifs (from, fdopen_mode::binary); fdopen_mode om (fdopen_mode::out | fdopen_mode::truncate | fdopen_mode::create | fdopen_mode::binary); if ((fl & cpflags::overwrite_content) != cpflags::overwrite_content) om |= fdopen_mode::exclusive; ofdstream ofs (fdopen (to, om, perm)); rm = auto_rmfile (to); // Throws ios::failure on fdbuf read/write failures. // // Note that the eof check is important: if the stream is at eof (empty // file) then this write will fail. // if (ifs.peek () != ifdstream::traits_type::eof ()) ofs << ifs.rdbuf (); ifs.close (); // Throws ios::failure on failure. ofs.close (); // Throws ios::failure on flush/close failure. } template static inline typename enable_if::type cpfile (const path& from, const path& to, cpflags fl, permissions perm, auto_rmfile& rm) { try { cpfile (from, to, fl, perm, rm); } catch (const ios_base::failure& e) { // While we try to preserve the original error information, we can not // make the description to be exactly the same, for example // // Is a directory // // becomes // // Is a directory: Input/output error // // Note that our custom operator<<(ostream, exception) doesn't strip this // suffix. This is a temporary code after all. // throw_generic_error (EIO, e.what ()); } } void cpfile (const path& from, const path& to, cpflags fl) { permissions perm (path_permissions (from)); auto_rmfile rm; cpfile::value> ( from, to, fl, perm, rm); if ((fl & cpflags::overwrite_permissions) == cpflags::overwrite_permissions) path_permissions (to, perm); rm.cancel (); } // Figuring out whether we have the nanoseconds in struct stat. Some // platforms (e.g., FreeBSD), may provide some "compatibility" #define's, // so use the second argument to not end up with the same signatures. // template inline constexpr auto mnsec (const S* s, bool) -> decltype(s->st_mtim.tv_nsec) { return s->st_mtim.tv_nsec; // POSIX (GNU/Linux, Solaris). } template inline constexpr auto mnsec (const S* s, int) -> decltype(s->st_mtimespec.tv_nsec) { return s->st_mtimespec.tv_nsec; // *BSD, MacOS. } template inline constexpr auto mnsec (const S* s, float) -> decltype(s->st_mtime_n) { return s->st_mtime_n; // AIX 5.2 and later. } // Things are not going to end up well with only seconds resolution so // let's make it a compile error. // // template // inline constexpr int // mnsec (...) {return 0;} template inline constexpr auto ansec (const S* s, bool) -> decltype(s->st_atim.tv_nsec) { return s->st_atim.tv_nsec; // POSIX (GNU/Linux, Solaris). } template inline constexpr auto ansec (const S* s, int) -> decltype(s->st_atimespec.tv_nsec) { return s->st_atimespec.tv_nsec; // *BSD, MacOS. } template inline constexpr auto ansec (const S* s, float) -> decltype(s->st_atime_n) { return s->st_atime_n; // AIX 5.2 and later. } // template // inline constexpr int // ansec (...) {return 0;} void mventry (const path& from, const path& to, cpflags fl) { assert ((fl & cpflags::overwrite_permissions) == cpflags::overwrite_permissions); bool ovr ((fl & cpflags::overwrite_content) == cpflags::overwrite_content); const char* f (from.string ().c_str ()); const char* t (to.string ().c_str ()); #ifndef _WIN32 if (!ovr && path_entry (to).first) throw_generic_error (EEXIST); if (::rename (f, t) == 0) // POSIX implementation. return; // If source and destination paths are on different file systems we need to // move the file ourselves. // if (errno != EXDEV) throw_generic_error (errno); // Note that cpfile() follows symlinks, so we need to remove destination if // exists. // try_rmfile (to); // Note that permissions are copied unconditionally to a new file. // cpfile (from, to, cpflags::none); // Copy file access and modification times. // struct stat s; if (stat (f, &s) != 0) throw_generic_error (errno); timeval times[2]; times[0].tv_sec = s.st_atime; times[0].tv_usec = ansec (&s, true) / 1000; times[1].tv_sec = s.st_mtime; times[1].tv_usec = mnsec (&s, true) / 1000; if (utimes (t, times) != 0) throw_generic_error (errno); // Finally, remove the source file. // try_rmfile (from); #else // While ::rename() is present on Windows, it is not POSIX but ISO C // implementation, that doesn't fit our needs well. // auto te (path_entry (to)); if (!ovr && te.first) throw_generic_error (EEXIST); bool td (te.first && te.second == entry_type::directory); auto fe (path_entry (from)); bool fd (fe.first && fe.second == entry_type::directory); // If source and destination filesystem entries exist, they both must be // either directories or not directories. // if (fe.first && te.first && fd != td) throw_generic_error (ENOTDIR); DWORD mfl (fd ? 0 : (MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)); if (MoveFileExA (f, t, mfl)) return; // If the destination already exists, then MoveFileExA() succeeds only if // it is a regular file or a symlink. Lets also support an empty directory // special case to comply with POSIX. If the destination is an empty // directory we will just remove it and retry the move operation. // // Note that under Wine we endup with ERROR_ACCESS_DENIED error code in // that case, and with ERROR_ALREADY_EXISTS when run natively. // DWORD ec (GetLastError ()); if ((ec == ERROR_ALREADY_EXISTS || ec == ERROR_ACCESS_DENIED) && td && try_rmdir (path_cast (to)) != rmdir_status::not_empty && MoveFileExA (f, t, mfl)) return; throw_system_error (ec); #endif } timestamp file_mtime (const char* p) { #ifndef _WIN32 struct stat s; if (stat (p, &s) != 0) { if (errno == ENOENT || errno == ENOTDIR) return timestamp_nonexistent; else throw_generic_error (errno); } if (!S_ISREG (s.st_mode)) return timestamp_nonexistent; return system_clock::from_time_t (s.st_mtime) + chrono::duration_cast ( chrono::nanoseconds (mnsec (&s, true))); #else WIN32_FILE_ATTRIBUTE_DATA s; if (!GetFileAttributesExA (p, GetFileExInfoStandard, &s)) { DWORD ec (GetLastError ()); if (ec == ERROR_FILE_NOT_FOUND || ec == ERROR_PATH_NOT_FOUND || ec == ERROR_INVALID_NAME || ec == ERROR_INVALID_DRIVE || ec == ERROR_BAD_PATHNAME || ec == ERROR_BAD_NETPATH) return timestamp_nonexistent; throw_system_error (ec); } if ((s.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) return timestamp_nonexistent; // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch" // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds. // const FILETIME& t (s.ftLastWriteTime); uint64_t ns ((static_cast (t.dwHighDateTime) << 32) | t.dwLowDateTime); ns -= 11644473600ULL * 10000000; // Now in UNIX epoch. ns *= 100; // Now in nanoseconds. return timestamp ( chrono::duration_cast ( chrono::nanoseconds (ns))); #endif } permissions path_permissions (const path& p) { #ifndef _WIN32 struct stat s; if (stat (p.string ().c_str (), &s) != 0) #else struct _stat s; if (_stat (p.string ().c_str (), &s) != 0) #endif throw_generic_error (errno); // VC++ has no S_IRWXU defined. MINGW GCC <= 4.9 has no S_IRWXG, S_IRWXO // defined. // // We could extrapolate user permissions to group/other permissions if // S_IRWXG/S_IRWXO are undefined. That is, we could consider their absence // as meaning that the platform does not distinguish between permissions // for different kinds of users. Let's wait for a use-case first. // mode_t f (S_IREAD | S_IWRITE | S_IEXEC); #ifdef S_IRWXG f |= S_IRWXG; #endif #ifdef S_IRWXO f |= S_IRWXO; #endif return static_cast (s.st_mode & f); } void path_permissions (const path& p, permissions f) { mode_t m (S_IREAD | S_IWRITE | S_IEXEC); #ifdef S_IRWXG m |= S_IRWXG; #endif #ifdef S_IRWXO m |= S_IRWXO; #endif m &= static_cast (f); #ifndef _WIN32 if (chmod (p.string ().c_str (), m) == -1) #else if (_chmod (p.string ().c_str (), m) == -1) #endif throw_generic_error (errno); } // dir_{entry,iterator} // #ifndef _WIN32 // dir_entry // dir_iterator:: ~dir_iterator () { if (h_ != nullptr) closedir (h_); // Ignore any errors. } dir_iterator& dir_iterator:: operator= (dir_iterator&& x) { if (this != &x) { e_ = move (x.e_); if (h_ != nullptr && closedir (h_) == -1) throw_generic_error (errno); h_ = x.h_; x.h_ = nullptr; } return *this; } entry_type dir_entry:: type (bool link) const { path_type p (b_ / p_); struct stat s; if ((link ? stat (p.string ().c_str (), &s) : lstat (p.string ().c_str (), &s)) != 0) { throw_generic_error (errno); } entry_type r; if (S_ISREG (s.st_mode)) r = entry_type::regular; else if (S_ISDIR (s.st_mode)) r = entry_type::directory; else if (S_ISLNK (s.st_mode)) r = entry_type::symlink; else r = entry_type::other; return r; } // dir_iterator // struct dir_deleter { void operator() (DIR* p) const {if (p != nullptr) closedir (p);} }; dir_iterator:: dir_iterator (const dir_path& d) { unique_ptr h (opendir (d.string ().c_str ())); h_ = h.get (); if (h_ == nullptr) throw_generic_error (errno); next (); if (h_ != nullptr) e_.b_ = d; h.release (); } template inline /*constexpr*/ entry_type d_type (const D* d, decltype(d->d_type)*) { switch (d->d_type) { #ifdef DT_DIR case DT_DIR: return entry_type::directory; #endif #ifdef DT_REG case DT_REG: return entry_type::regular; #endif #ifdef DT_LNK case DT_LNK: return entry_type::symlink; #endif #ifdef DT_BLK case DT_BLK: #endif #ifdef DT_CHR case DT_CHR: #endif #ifdef DT_FIFO case DT_FIFO: #endif #ifdef DT_SOCK case DT_SOCK: #endif return entry_type::other; default: return entry_type::unknown; } } template inline constexpr entry_type d_type (...) {return entry_type::unknown;} void dir_iterator:: next () { for (;;) { errno = 0; if (struct dirent* de = readdir (h_)) { // We can accept some overhead for '.' and '..' (relying on short // string optimization) in favor of a more compact code. // path p (de->d_name); // Skip '.' and '..'. // if (p.current () || p.parent ()) continue; e_.p_ = move (p); e_.t_ = d_type (de, nullptr); e_.lt_ = entry_type::unknown; } else if (errno == 0) { // End of stream. // closedir (h_); h_ = nullptr; } else throw_generic_error (errno); break; } } #else // dir_entry // dir_iterator:: ~dir_iterator () { if (h_ != -1) _findclose (h_); // Ignore any errors. } dir_iterator& dir_iterator:: operator= (dir_iterator&& x) { if (this != &x) { e_ = move (x.e_); if (h_ != -1 && _findclose (h_) == -1) throw_generic_error (errno); h_ = x.h_; x.h_ = -1; } return *this; } entry_type dir_entry:: type (bool) const { // Note that we currently do not support symlinks (yes, there is symlink // support since Vista). // path_type p (b_ / p_); struct _stat s; if (_stat (p.string ().c_str (), &s) != 0) throw_generic_error (errno); entry_type r; if (S_ISREG (s.st_mode)) r = entry_type::regular; else if (S_ISDIR (s.st_mode)) r = entry_type::directory; else r = entry_type::other; return r; } // dir_iterator // struct auto_dir { explicit auto_dir (intptr_t& h): h_ (&h) {} auto_dir (const auto_dir&) = delete; auto_dir& operator= (const auto_dir&) = delete; ~auto_dir () { if (h_ != nullptr && *h_ != -1) _findclose (*h_); } void release () {h_ = nullptr;} private: intptr_t* h_; }; dir_iterator:: dir_iterator (const dir_path& d) { auto_dir h (h_); e_.b_ = d; // Used by next() to call _findfirst(). next (); h.release (); } void dir_iterator:: next () { for (;;) { bool r; _finddata_t fi; if (h_ == -1) { // The call is made from the constructor. Any other call with h_ == -1 // is illegal. // // Check to distinguish non-existent vs empty directories. // if (!dir_exists (e_.b_)) throw_generic_error (ENOENT); h_ = _findfirst ((e_.b_ / path ("*")).string ().c_str (), &fi); r = h_ != -1; } else r = _findnext (h_, &fi) == 0; if (r) { // We can accept some overhead for '.' and '..' (relying on short // string optimization) in favor of a more compact code. // path p (fi.name); // Skip '.' and '..'. // if (p.current () || p.parent ()) continue; e_.p_ = move (p); // We do not support symlinks at the moment. // e_.t_ = fi.attrib & _A_SUBDIR ? entry_type::directory : entry_type::regular; e_.lt_ = entry_type::unknown; } else if (errno == ENOENT) { // End of stream. // if (h_ != -1) { _findclose (h_); h_ = -1; } } else throw_generic_error (errno); break; } } #endif // Match the name [ni, ne) to the pattern [pi, pe). Ranges can be empty. // static bool match (string::const_iterator pi, string::const_iterator pe, string::const_iterator ni, string::const_iterator ne) { using reverse_iterator = std::reverse_iterator; reverse_iterator rpi (pe); reverse_iterator rpe (pi); reverse_iterator rni (ne); reverse_iterator rne (ni); // Match the pattern suffix (follows the last *) to the name trailing // characters. // char pc; for (; rpi != rpe && (pc = *rpi) != '*' && rni != rne; ++rpi, ++rni) { #ifndef _WIN32 if (*rni != pc && pc != '?') #else if (lcase (*rni) != lcase (pc) && pc != '?') #endif return false; } // If we got to the (reversed) end of the pattern (no * is encountered) // than we are done. The success depends on if we got to the (reversed) end // of the name as well. // if (rpi == rpe) return rni == rne; // If we didn't reach * in the pattern then we reached the (reversed) end // of the name. That means we have unmatched non-star characters in the // pattern, and so match failed. // if (pc != '*') { assert (rni == rne); return false; } // Match the pattern prefix (ends with the first *) to the name leading // characters. If they mismatch we failed. Otherwise if this is an only * // in the pattern (matches whatever is left in the name) then we succeed, // otherwise we perform backtracking (recursively). // pe = rpi.base (); ne = rni.base (); // Compare the pattern and the name char by char until the name suffix or // * is encountered in the pattern (whichever happens first). Fail if a // char mismatches. // for (; (pc = *pi) != '*' && ni != ne; ++pi, ++ni) { #ifndef _WIN32 if (*ni != pc && pc != '?') #else if (lcase (*ni) != lcase (pc) && pc != '?') #endif return false; } // If we didn't get to * in the pattern then we got to the name suffix. // That means that the pattern has unmatched non-star characters, and so // match failed. // if (pc != '*') { assert (ni == ne); return false; } // If * that we have reached is the last one, then it matches whatever is // left in the name (including an empty range). // if (++pi == pe) return true; // Perform backtracking. // // From now on, we will call the pattern not-yet-matched part (starting // the leftmost * and ending the rightmost one inclusively) as pattern, and // the name not-yet-matched part as name. // // Here we sequentially assume that * that starts the pattern matches the // name leading part (staring from an empty one and iterating till the full // name). So if, at some iteration, the pattern trailing part (that follows // the leftmost *) matches the name trailing part, then the pattern matches // the name. // bool r; for (; !(r = match (pi, pe, ni, ne)) && ni != ne; ++ni) ; return r; } bool path_match (const string& pattern, const string& name) { // Implementation notes: // // - This has a good potential of becoming hairy quickly so need to strive // for an elegant way to implement this. // // - Most patterns will contains a single * wildcard with a prefix and/or // suffix (e.g., *.txt, foo*, f*.txt). Something like this is not very // common: *foo*. // // So it would be nice to have a clever implementation that first // "anchors" itself with a literal prefix and/or suffix and only then // continue with backtracking. In other words, reduce: // // *.txt vs foo.txt -> * vs foo // foo* vs foo.txt -> * vs .txt // f*.txt vs foo.txt -> * vs oo // auto pi (pattern.rbegin ()); auto pe (pattern.rend ()); auto ni (name.rbegin ()); auto ne (name.rend ()); // The name doesn't match the pattern if it is of a different type than the // pattern is. // bool pd (pi != pe && path::traits::is_separator (*pi)); bool nd (ni != ne && path::traits::is_separator (*ni)); if (pd != nd) return false; // Skip trailing separators if present. // if (pd) { ++pi; ++ni; } return match (pattern.begin (), pi.base (), name.begin (), ni.base ()); } // Iterate over directory sub-entries, recursively and including itself if // requested. Note that recursive iterating goes depth-first which make // sense for the cleanup use cases (@@ maybe this should be controllable // since for directory creation it won't make sense). // // Prior to recursively opening a directory for iterating the preopen // callback function is called. If false is returned, then the directory is // not traversed but still returned by the next() call. // // Note that iterating over non-existent directory is not en error. The // subsequent next() call returns false for such a directory. // using preopen = std::function; class recursive_dir_iterator { public: recursive_dir_iterator (dir_path p, bool recursive, bool self, bool fs, preopen po) : start_ (move (p)), recursive_ (recursive), self_ (self), follow_symlinks_ (fs), preopen_ (move (po)) { open (dir_path (), self_); } // Non-copyable, non-movable type. // recursive_dir_iterator (const recursive_dir_iterator&) = delete; recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete; // Return false if no more entries left. Otherwise save the next entry path // and return true. The path is relative against the directory being // traversed and contains a trailing separator for sub-directories. Throw // std::system_error in case of a failure (insufficient permissions, // dangling symlink encountered, etc). // bool next (path& p) { if (iters_.empty ()) return false; auto& i (iters_.back ()); // If we got to the end of directory sub-entries, then go one level up // and return this directory path. // if (i.first == dir_iterator ()) { path d (move (i.second)); iters_.pop_back (); // Return the path unless it is the last one (the directory we started // to iterate from) and the self flag is not set. // if (iters_.empty () && !self_) return false; p = move (d); return true; } const dir_entry& de (*i.first); // Append separator if a directory. Note that dir_entry::type() can // throw. // entry_type et (follow_symlinks_ ? de.type () : de.ltype ()); path pe (et == entry_type::directory ? path_cast (i.second / de.path ()) : i.second / de.path ()); ++i.first; if (recursive_ && pe.to_directory ()) { open (path_cast (move (pe)), true); return next (p); } p = move (pe); return true; } private: void open (dir_path p, bool preopen) { // We should consider a racing condition here. The directory can be // removed before we create an iterator for it. In this case we just do // nothing, so the directory is silently skipped. // try { // If preopen_() returns false, then the directory will not be // traversed (as we leave iterator with end semantics) but still be // returned by the next() call as a sub-entry. // dir_iterator i; if (!preopen || preopen_ (p)) { dir_path d (start_ / p); i = dir_iterator (!d.empty () ? d : dir_path (".")); } iters_.emplace_back (move (i), move (p)); } catch (const system_error& e) { // Ignore non-existent directory (ENOENT or ENOTDIR). Rethrow for any // other error. We consider ENOTDIR as a variety of removal, with a // new filesystem entry being created afterwards. // // Make sure that the error denotes errno portable code. // assert (e.code ().category () == generic_category ()); int ec (e.code ().value ()); if (ec != ENOENT && ec != ENOTDIR) throw; } } private: dir_path start_; bool recursive_; bool self_; bool follow_symlinks_; preopen preopen_; small_vector, 1> iters_; }; // Search for paths matching the pattern and call the specified function for // each matching path. Return false if the underlying func() call returns // false. Otherwise the function conforms to the path_search() description. // static const string any_dir ("*/"); static bool search ( path pattern, dir_path pattern_dir, const dir_path start_dir, bool follow_symlinks, const function& func) { // Fast-forward the leftmost pattern non-wildcard components. So, for // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. // { auto b (pattern.begin ()); auto e (pattern.end ()); auto i (b); for (; i != e && (*i).find_first_of ("*?") == string::npos; ++i) ; // If the pattern has no wildcards then we reduce to checking for the // filesystem entry existence. It matches if exists and is of the proper // type. // if (i == e) { path p (pattern_dir / pattern); auto pe (path_entry (start_dir / p, true)); if (pe.first && ((pe.second == entry_type::directory) == p.to_directory ())) return func (move (p), string (), false); return true; } else if (i != b) // There are non-wildcard components, so fast-forward. { path p (b, i); pattern = pattern.leaf (p); pattern_dir /= path_cast (move (p)); } } assert (!pattern.empty ()); // The pattern leftmost component. Will use it to match the start directory // sub-entries. // path pc (pattern.begin (), ++pattern.begin ()); string pcr (pc.representation ()); // Note that if the pattern has multiple components (is not a simple path), // then the leftmost one has a trailing separator, and so will match // sub-directories only. // bool simple (pattern.simple ()); // Note that we rely on "small function object" optimization here. // recursive_dir_iterator i ( start_dir / pattern_dir, pcr.find ("**") != string::npos, // Recursive. pcr.find ("***") != string::npos, // Self-inclusive. follow_symlinks, [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. { return func (pattern_dir / p, any_dir, true); }); // Canonicalize the pattern component collapsing consecutive stars (used to // express that it is recursive) into a single one. // size_t j (0); size_t n (pcr.size ()); for (size_t i (0); i < n; ++i) { char c (pcr[i]); if (!(c == '*' && i > 0 && pcr[i - 1] == '*')) pcr[j++] = c; } if (j != n) pcr.resize (j); // Note that the callback function can be called for the same directory // twice: first time as intermediate match from iterator's preopen() call, // and then, if the first call succeed, from the iterating loop (possibly // as the final match). // path p; while (i.next (p)) { // Skip sub-entry if its name doesn't match the pattern leftmost // component. // // Matching the directory we are iterating through (as for a pattern // component containing ***) is a bit tricky. This directory is // represented by the iterator as an empty path, and so we need to // compute it (the leaf would actually be enough) for matching. This // leaf can be aquired from the pattern_dir / start_dir path except the // case when both directories are empty. This is the case when we search // in the current directory (start_dir is empty) with a pattern that // starts with *** wildcard (for example f***/bar). All we can do here is // to fallback to path::current_directory() call. Note that this will be // the only call per path_search() as the next time pattern_dir will not // be empty. // const path& se (!p.empty () ? p : path_cast (!pattern_dir.empty () ? pattern_dir : !start_dir.empty () ? start_dir : path::current_directory ())); if (!path_match (pcr, se.leaf ().representation ())) continue; // If the callback function returns false, then we stop the entire search // for the final match, or do not search below the path for the // intermediate one. // if (!func (pattern_dir / p, pcr, !simple)) { if (simple) // Final match. return false; else continue; } // If the pattern is not a simple one, and it's leftmost component // matches the sub-entry, then the sub-entry is a directory (see the note // above), and we search in it using the trailing part of the pattern. // if (!simple && !search (pattern.leaf (pc), pattern_dir / path_cast (move (p)), start_dir, follow_symlinks, func)) return false; } return true; } void path_search ( const path& pattern, const function& func, const dir_path& start, bool follow_symlinks) { search (pattern, dir_path (), pattern.relative () ? start : dir_path (), follow_symlinks, func); } }