diff options
Diffstat (limited to 'libbutl/filesystem.cxx')
-rw-r--r-- | libbutl/filesystem.cxx | 880 |
1 files changed, 599 insertions, 281 deletions
diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx index acf2e09..28a0de8 100644 --- a/libbutl/filesystem.cxx +++ b/libbutl/filesystem.cxx @@ -16,7 +16,7 @@ #else # include <libbutl/win32-utility.hxx> -# include <io.h> // _find*(), _unlink(), _chmod() +# include <io.h> // _unlink(), _chmod() # include <direct.h> // _mkdir(), _rmdir() # include <winioctl.h> // FSCTL_SET_REPARSE_POINT # include <sys/types.h> // _stat @@ -28,8 +28,9 @@ # define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) # endif -# include <cwchar> // mbsrtowcs(), wcsrtombs(), mbstate_t -# include <cstring> // strncmp() +# include <cwchar> // mbsrtowcs(), wcsrtombs(), mbstate_t +# include <cstring> // strncmp() +# include <type_traits> // is_same #endif #include <chrono> @@ -183,6 +184,19 @@ namespace butl // static inline constexpr int // ansec (...) {return 0;} + static inline entry_time + entry_tm (const struct stat& s) noexcept + { + auto tm = [] (time_t sec, auto nsec) -> timestamp + { + return system_clock::from_time_t (sec) + + chrono::duration_cast<duration> (chrono::nanoseconds (nsec)); + }; + + return {tm (s.st_mtime, mnsec<struct stat> (&s, true)), + tm (s.st_atime, ansec<struct stat> (&s, true))}; + } + // Return the modification and access times of a regular file or directory. // static entry_time @@ -200,14 +214,7 @@ namespace butl if (dir ? !S_ISDIR (s.st_mode) : !S_ISREG (s.st_mode)) return {timestamp_nonexistent, timestamp_nonexistent}; - auto tm = [] (time_t sec, auto nsec) -> timestamp - { - return system_clock::from_time_t (sec) + - chrono::duration_cast<duration> (chrono::nanoseconds (nsec)); - }; - - return {tm (s.st_mtime, mnsec<struct stat> (&s, true)), - tm (s.st_atime, ansec<struct stat> (&s, true))}; + return entry_tm (s); } // Set the modification and access times for a regular file or directory. @@ -309,16 +316,15 @@ namespace butl // Open a filesystem entry for reading and optionally writing its // meta-information and return the entry handle and meta-information if the - // path refers to an existing entry and nullhandle otherwise. Follow reparse - // points by default. Underlying OS errors are reported by throwing - // std::system_error, unless ignore_error is true in which case nullhandle - // is returned. In the latter case the error code can be obtained by calling - // GetLastError(). + // path refers to an existing entry and nullhandle otherwise. Underlying OS + // errors are reported by throwing std::system_error, unless ignore_error is + // true in which case nullhandle is returned. In the latter case the error + // code can be obtained by calling GetLastError(). // static inline pair<win32::auto_handle, BY_HANDLE_FILE_INFORMATION> entry_info_handle (const char* p, bool write, - bool fr = true, + bool follow_reparse_points, bool ie = false) { // Open the entry for reading/writing its meta-information. Follow reparse @@ -333,7 +339,7 @@ namespace butl nullptr, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS | // Required for a directory. - (fr ? 0 : FILE_FLAG_OPEN_REPARSE_POINT), + (follow_reparse_points ? 0 : FILE_FLAG_OPEN_REPARSE_POINT), nullptr)); if (h == nullhandle) @@ -358,13 +364,15 @@ namespace butl } // Return a flag indicating whether the path is to an existing filesystem - // entry and its meta-information if so. Follow reparse points by default. + // entry and its meta-information if so. // static inline pair<bool, BY_HANDLE_FILE_INFORMATION> - path_entry_info (const char* p, bool fr = true, bool ie = false) + path_entry_handle_info (const char* p, + bool follow_reparse_points, + bool ie = false) { pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi ( - entry_info_handle (p, false /* write */, fr, ie)); + entry_info_handle (p, false /* write */, follow_reparse_points, ie)); if (hi.first == nullhandle) return make_pair (false, BY_HANDLE_FILE_INFORMATION ()); @@ -376,9 +384,34 @@ namespace butl } static inline pair<bool, BY_HANDLE_FILE_INFORMATION> - path_entry_info (const path& p, bool fr = true, bool ie = false) + path_entry_handle_info (const path& p, bool fr, bool ie = false) { - return path_entry_info (p.string ().c_str (), fr, ie); + return path_entry_handle_info (p.string ().c_str (), fr, ie); + } + + // Return a flag indicating whether the path is to an existing filesystem + // entry and its extended attributes if so. Don't follow reparse points. + // + static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA> + path_entry_info (const char* p, bool ie = false) + { + WIN32_FILE_ATTRIBUTE_DATA r; + if (!GetFileAttributesExA (p, GetFileExInfoStandard, &r)) + { + DWORD ec; + if (ie || error_file_not_found (ec = GetLastError ())) + return make_pair (false, WIN32_FILE_ATTRIBUTE_DATA ()); + + throw_system_error (ec); + } + + return make_pair (true, r); + } + + static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA> + path_entry_info (const path& p, bool ie = false) + { + return path_entry_info (p.string ().c_str (), ie); } // Reparse point data. @@ -614,8 +647,48 @@ namespace butl return reparse_point_entry (p.string ().c_str (), ie); } - pair<bool, entry_stat> - path_entry (const char* p, bool fl, bool ie) + static inline timestamp + to_timestamp (const FILETIME& t) + { + // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" + // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch" + // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds. + // + uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) | + t.dwLowDateTime); + + nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch. + nsec *= 100; // Now in nanoseconds. + + return timestamp ( + chrono::duration_cast<duration> (chrono::nanoseconds (nsec))); + } + + static inline FILETIME + to_filetime (timestamp t) + { + // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" + // (1601-01-01T00:00:00Z). To convert "UNIX epoch" (1970-01-01T00:00:00Z) + // to it we need to add 11644473600 seconds. + // + uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> ( + t.time_since_epoch ()).count ()); + + ticks /= 100; // Now in 100 nanosecond "ticks". + ticks += 11644473600ULL * 10000000; // Now in "Windows epoch". + + FILETIME r; + r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF; + r.dwLowDateTime = ticks & 0xFFFFFFFF; + return r; + } + + // If the being returned entry type is regular or directory and et is not + // NULL, then also save the entry modification and access times into the + // referenced variable. + // + static inline pair<bool, entry_stat> + path_entry (const char* p, bool fl, bool ie, entry_time* et) { // A path like 'C:', while being a root path in our terminology, is not as // such for Windows, that maintains current directory for each drive, and @@ -633,66 +706,98 @@ namespace butl // Stat the entry not following reparse points. // - pair<bool, BY_HANDLE_FILE_INFORMATION> pi ( - path_entry_info (p, false /* follow_reparse_points */, ie)); + pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p, ie)); if (!pi.first) return make_pair (false, entry_stat {entry_type::unknown, 0}); - if (reparse_point (pi.second.dwFileAttributes)) + auto entry_info = [et] (const auto& ei) { - pair<entry_type, path> rp (reparse_point_entry (p, ie)); + if (et != nullptr) + { + et->modification = to_timestamp (ei.ftLastWriteTime); + et->access = to_timestamp (ei.ftLastAccessTime); + } + + if (directory (ei.dwFileAttributes)) + return make_pair (true, entry_stat {entry_type::directory, 0}); + else + return make_pair ( + true, + entry_stat {entry_type::regular, + ((uint64_t (ei.nFileSizeHigh) << 32) | ei.nFileSizeLow)}); + }; - if (rp.first == entry_type::symlink) + if (!reparse_point (pi.second.dwFileAttributes)) + return entry_info (pi.second); + + pair<entry_type, path> rp (reparse_point_entry (p, ie)); + + if (rp.first == entry_type::symlink) + { + // If following symlinks is requested, then follow the reparse point and + // return its target information. Otherwise, return the symlink entry + // type. + // + if (fl) { - // If following symlinks is requested, then follow the reparse point, - // overwrite its own information with the resolved target information, - // and fall through. Otherwise, return the symlink entry type. - // - if (fl) - { - pi = path_entry_info (p, true /* follow_reparse_points */, ie); + pair<bool, BY_HANDLE_FILE_INFORMATION> pi ( + path_entry_handle_info (p, true /* follow_reparse_points */, ie)); - if (!pi.first) - return make_pair (false, entry_stat {entry_type::unknown, 0}); - } - else - return make_pair (true, entry_stat {entry_type::symlink, 0}); + return pi.first + ? entry_info (pi.second) + : make_pair (false, entry_stat {entry_type::unknown, 0}); } - else if (rp.first == entry_type::unknown) - return make_pair (false, entry_stat {entry_type::unknown, 0}); - else // entry_type::other - return make_pair (true, entry_stat {entry_type::other, 0}); + else + return make_pair (true, entry_stat {entry_type::symlink, 0}); } + else if (rp.first == entry_type::unknown) + return make_pair (false, entry_stat {entry_type::unknown, 0}); + else // entry_type::other + return make_pair (true, entry_stat {entry_type::other, 0}); + } - if (directory (pi.second.dwFileAttributes)) - return make_pair (true, entry_stat {entry_type::directory, 0}); - else - return make_pair ( - true, - entry_stat {entry_type::regular, - ((uint64_t (pi.second.nFileSizeHigh) << 32) | - pi.second.nFileSizeLow)}); + static inline pair<bool, entry_stat> + path_entry (const path& p, bool fl, bool ie, entry_time* et) + { + return path_entry (p.string ().c_str (), fl, ie, et); + } + + pair<bool, entry_stat> + path_entry (const char* p, bool fl, bool ie) + { + return path_entry (p, fl, ie, nullptr /* entry_time */); } permissions path_permissions (const path& p) { - pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p)); + // Let's optimize for the common case when the entry is not a reparse + // point. + // + auto attr_to_perm = [] (const auto& pi) -> permissions + { + if (!pi.first) + throw_generic_error (ENOENT); - if (!pi.first) - throw_generic_error (ENOENT); + // On Windows a filesystem entry is always readable. Also there is no + // notion of group/other permissions at OS level, so we extrapolate user + // permissions to group/other permissions (as the _stat() function + // does). + // + permissions r (permissions::ru | permissions::rg | permissions::ro); - // On Windows a filesystem entry is always readable. Also there is no - // notion of group/other permissions at OS level, so we extrapolate user - // permissions to group/other permissions (as the _stat() function does). - // - permissions r (permissions::ru | permissions::rg | permissions::ro); + if (!readonly (pi.second.dwFileAttributes)) + r |= permissions::wu | permissions::wg | permissions::wo; - if (!readonly (pi.second.dwFileAttributes)) - r |= permissions::wu | permissions::wg | permissions::wo; + return r; + }; - return r; + pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p)); + return !pi.first || !reparse_point (pi.second.dwFileAttributes) + ? attr_to_perm (pi) + : attr_to_perm ( + path_entry_handle_info (p, true /* follow_reparse_points */)); } void @@ -718,50 +823,26 @@ namespace butl static entry_time entry_tm (const char* p, bool dir) { - pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p)); - - // If the entry is of the wrong type, then let's pretend that it doesn't - // exists. + // Let's optimize for the common case when the entry is not a reparse + // point. // - if (!pi.first || directory (pi.second.dwFileAttributes) != dir) - return {timestamp_nonexistent, timestamp_nonexistent}; - - auto tm = [] (const FILETIME& t) -> timestamp + auto attr_to_time = [dir] (const auto& pi) -> entry_time { - // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" - // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch" - // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds. + // If the entry is of the wrong type, then let's pretend that it doesn't + // exists. // - uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) | - t.dwLowDateTime); + if (!pi.first || directory (pi.second.dwFileAttributes) != dir) + return entry_time {timestamp_nonexistent, timestamp_nonexistent}; - nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch. - nsec *= 100; // Now in nanoseconds. - - return timestamp ( - chrono::duration_cast<duration> (chrono::nanoseconds (nsec))); + return entry_time {to_timestamp (pi.second.ftLastWriteTime), + to_timestamp (pi.second.ftLastAccessTime)}; }; - return {tm (pi.second.ftLastWriteTime), tm (pi.second.ftLastAccessTime)}; - } - - static inline FILETIME - to_filetime (timestamp t) - { - // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" - // (1601-01-01T00:00:00Z). To convert "UNIX epoch" - // (1970-01-01T00:00:00Z) to it we need to add 11644473600 seconds. - // - uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> ( - t.time_since_epoch ()).count ()); - - ticks /= 100; // Now in 100 nanosecond "ticks". - ticks += 11644473600ULL * 10000000; // Now in "Windows epoch". - - FILETIME r; - r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF; - r.dwLowDateTime = ticks & 0xFFFFFFFF; - return r; + pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p)); + return !pi.first || !reparse_point (pi.second.dwFileAttributes) + ? attr_to_time (pi) + : attr_to_time ( + path_entry_handle_info (p, true /* follow_reparse_points */)); } // Set the modification and access times for a regular file or directory. @@ -772,7 +853,9 @@ namespace butl // See also touch_file() below. // pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi ( - entry_info_handle (p, true /* write */)); + entry_info_handle (p, + true /* write */, + true /* follow_reparse_points */)); // If the entry is of the wrong type, then let's pretend that it doesn't // exist. @@ -857,7 +940,9 @@ namespace butl // implicitly. // pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi ( - entry_info_handle (p.string ().c_str (), true /* write */)); + entry_info_handle (p.string ().c_str (), + true /* write */, + true /* follow_reparse_points */)); if (hi.first != nullhandle) { @@ -1006,7 +1091,7 @@ namespace butl // try { - for (const dir_entry& de: dir_iterator (p, false /* ignore_dangling */)) + for (const dir_entry& de: dir_iterator (p, dir_iterator::no_follow)) { path ep (p / de.path ()); //@@ Would be good to reuse the buffer. @@ -1057,12 +1142,12 @@ namespace butl // failure (see mventry() for details). If that's the case, we will keep // trying to move the file for two seconds. // - for (size_t i (0); i < 21; ++i) + for (size_t i (0); i < 41; ++i) { - // Sleep 100 milliseconds before the removal retry. + // Sleep 50 milliseconds before the removal retry. // if (i != 0) - Sleep (100); + Sleep (50); ur = _unlink (f); @@ -1610,9 +1695,12 @@ namespace butl } void - cpfile (const path& from, const path& to, cpflags fl) + cpfile (const path& from, + const path& to, + cpflags fl, + optional<permissions> cperm) { - permissions perm (path_permissions (from)); + permissions perm (cperm ? *cperm : path_permissions (from)); auto_rmfile rm; cpfile<is_base_of<system_error, ios_base::failure>::value> ( @@ -1704,12 +1792,12 @@ namespace butl // fdopen(). // DWORD ec; - for (size_t i (0); i < 21; ++i) + for (size_t i (0); i < 41; ++i) { // Sleep 100 milliseconds before the move retry. // if (i != 0) - Sleep (100); + Sleep (50); if (MoveFileExA (f, t, mfl)) return; @@ -1811,7 +1899,7 @@ namespace butl h_ = x.h_; x.h_ = nullptr; - ignore_dangling_ = x.ignore_dangling_; + mode_ = x.mode_; } return *this; } @@ -1832,6 +1920,11 @@ namespace butl entry_type dir_entry:: type (bool follow_symlinks) const { + // Note that this function can only be used for resolving an entry type + // lazily and thus can't be used with the detect_dangling dir_iterator + // mode (see dir_iterator::next () implementation for details). Thus, we + // always throw on the stat()/lstat() failure. + // path_type p (b_ / p_); struct stat s; if ((follow_symlinks @@ -1839,7 +1932,18 @@ namespace butl : lstat (p.string ().c_str (), &s)) != 0) throw_generic_error (errno); - return butl::type (s); + entry_type r (butl::type (s)); + + // While at it, also save the entry modification and access times. + // + if (r != entry_type::symlink) + { + entry_time t (entry_tm (s)); + mtime_ = t.modification; + atime_ = t.access; + } + + return r; } // dir_iterator @@ -1850,8 +1954,8 @@ namespace butl }; dir_iterator:: - dir_iterator (const dir_path& d, bool ignore_dangling) - : ignore_dangling_ (ignore_dangling) + dir_iterator (const dir_path& d, mode m) + : mode_ (m) { unique_ptr<DIR, dir_deleter> h (opendir (d.string ().c_str ())); h_ = h.get (); @@ -1867,7 +1971,7 @@ namespace butl } template <typename D> - static inline /*constexpr*/ entry_type + static inline /*constexpr*/ optional<entry_type> d_type (const D* d, decltype(d->d_type)*) { switch (d->d_type) @@ -1895,13 +1999,13 @@ namespace butl #endif return entry_type::other; - default: return entry_type::unknown; + default: return nullopt; } } template <typename D> - static inline constexpr entry_type - d_type (...) {return entry_type::unknown;} + static inline constexpr optional<entry_type> + d_type (...) {return nullopt;} void dir_iterator:: next () @@ -1923,25 +2027,43 @@ namespace butl e_.p_ = move (p); e_.t_ = d_type<struct dirent> (de, nullptr); - e_.lt_ = entry_type::unknown; + e_.lt_ = nullopt; + + e_.mtime_ = timestamp_unknown; + e_.atime_ = timestamp_unknown; // If requested, we ignore dangling symlinks, skipping ones with - // non-existing or inaccessible targets. + // non-existing or inaccessible targets (ignore_dangling mode), or set + // the entry_type::unknown type for them (detect_dangling mode). // - if (ignore_dangling_) + if (mode_ != no_follow) { - // Note that ltype () can potentially lstat() (see d_type() for + bool dd (mode_ == detect_dangling); + + // Note that ltype () can potentially lstat() (see type() for // details) and so throw. We, however, need to skip the entry if it // is already removed (due to a race) and throw on any other error. // path fp (e_.base () / e_.path ()); const char* p (fp.string ().c_str ()); - if (e_.t_ == entry_type::unknown) + if (!e_.t_) { struct stat s; if (lstat (p, &s) != 0) { + // Given that we have already enumerated the filesystem entry, + // these error codes can only mean that the entry doesn't exist + // anymore and so we always skip it. + // + // If errno is EACCES, then the permission to search a directory + // we currently iterate over has been revoked. Throwing in this + // case sounds like the best choice. + // + // Note that according to POSIX the filesystem entry we call + // lstat() on doesn't require any specific permissions to be + // granted. + // if (errno == ENOENT || errno == ENOTDIR) continue; @@ -1949,21 +2071,53 @@ namespace butl } e_.t_ = type (s); + + if (*e_.t_ != entry_type::symlink) + { + entry_time t (entry_tm (s)); + e_.mtime_ = t.modification; + e_.atime_ = t.access; + } } - if (e_.t_ == entry_type::symlink) + // The entry type should be present and may not be + // entry_type::unknown. + // + //assert (e_.t_ && *e_.t_ != entry_type::unknown); + + // Check if the symlink target exists and is accessible and set the + // target type. + // + if (*e_.t_ == entry_type::symlink) { struct stat s; if (stat (p, &s) != 0) { if (errno == ENOENT || errno == ENOTDIR || errno == EACCES) - continue; - - throw_generic_error (errno); + { + if (dd) + e_.lt_ = entry_type::unknown; + else + continue; + } + else + throw_generic_error (errno); } + else + { + e_.lt_ = type (s); - e_.lt_ = type (s); // While at it, set the target type. + entry_time t (entry_tm (s)); + e_.mtime_ = t.modification; + e_.atime_ = t.access; + } } + + // The symlink target type should be present and in the + // ignore_dangling mode it may not be entry_type::unknown. + // + //assert (*e_.t_ != entry_type::symlink || + // (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown))); } } else if (errno == 0) @@ -1984,11 +2138,49 @@ namespace butl // dir_entry // + entry_type dir_entry:: + type (bool follow_symlinks) const + { + // Note that this function can only be used for resolving an entry type + // lazily and thus can't be used with the detect_dangling dir_iterator + // mode (see dir_iterator::next () implementation for details). Thus, we + // always throw if the entry info can't be retrieved. + // + // While at it, also save the entry modification and access times. + // + path_type p (base () / path ()); + entry_time et; + pair<bool, entry_stat> e ( + path_entry (p, follow_symlinks, false /* ignore_error */, &et)); + + if (!e.first) + throw_generic_error (ENOENT); + + if (e.second.type == entry_type::regular || + e.second.type == entry_type::directory) + { + mtime_ = et.modification; + atime_ = et.access; + } + + return e.second.type; + } + + // dir_iterator + // + static_assert(is_same<HANDLE, void*>::value, "HANDLE is not void*"); + + static inline HANDLE + to_handle (intptr_t h) + { + return reinterpret_cast<HANDLE> (h); + } + dir_iterator:: ~dir_iterator () { if (h_ != -1) - _findclose (h_); // Ignore any errors. + FindClose (to_handle (h_)); // Ignore any errors. } dir_iterator& dir_iterator:: @@ -1998,56 +2190,32 @@ namespace butl { e_ = move (x.e_); - if (h_ != -1 && _findclose (h_) == -1) - throw_generic_error (errno); + if (h_ != -1 && !FindClose (to_handle (h_))) + throw_system_error (GetLastError ()); h_ = x.h_; x.h_ = -1; - ignore_dangling_ = x.ignore_dangling_; + mode_ = x.mode_; } return *this; } - entry_type dir_entry:: - type (bool follow_symlinks) const - { - path_type p (base () / path ()); - pair<bool, entry_stat> e (path_entry (p, follow_symlinks)); - - if (!e.first) - throw_generic_error (ENOENT); - - return e.second.type; - } - - // dir_iterator - // - struct auto_dir + dir_iterator:: + dir_iterator (const dir_path& d, mode m) + : mode_ (m) { - explicit - auto_dir (intptr_t& h): h_ (&h) {} - - auto_dir (const auto_dir&) = delete; - auto_dir& operator= (const auto_dir&) = delete; - - ~auto_dir () + struct deleter { - if (h_ != nullptr && *h_ != -1) - _findclose (*h_); - } - - void release () {h_ = nullptr;} + void operator() (intptr_t* p) const + { + if (p != nullptr && *p != -1) + FindClose (to_handle (*p)); + } + }; - private: - intptr_t* h_; - }; + unique_ptr<intptr_t, deleter> h (&h_); - dir_iterator:: - dir_iterator (const dir_path& d, bool ignore_dangling) - : ignore_dangling_ (ignore_dangling) - { - auto_dir h (h_); e_.b_ = d; // Used by next(). next (); @@ -2060,31 +2228,37 @@ namespace butl for (;;) { bool r; - _finddata_t fi; + WIN32_FIND_DATA fi; if (h_ == -1) { // The call is made from the constructor. Any other call with h_ == -1 // is illegal. // - - // Check to distinguish non-existent vs empty directories. + // Note that we used to check for the directory existence before + // iterating over it. However, let's not pessimize things and only + // check for the directory existence if FindFirstFileExA() fails. // - if (!dir_exists (e_.base ())) - throw_generic_error (ENOENT); - h_ = _findfirst ((e_.base () / path ("*")).string ().c_str (), &fi); - r = h_ != -1; + h_ = reinterpret_cast<intptr_t> ( + FindFirstFileExA ((e_.base () / path ("*")).string ().c_str (), + FindExInfoBasic, + &fi, + FindExSearchNameMatch, + NULL, + 0)); + + r = (h_ != -1); } else - r = _findnext (h_, &fi) == 0; + r = FindNextFileA (to_handle (h_), &fi); if (r) { // We can accept some overhead for '.' and '..' (relying on short // string optimization) in favor of a more compact code. // - path p (fi.name); + path p (fi.cFileName); // Skip '.' and '..'. // @@ -2093,26 +2267,47 @@ namespace butl e_.p_ = move (p); - // Note that the entry type detection always requires to additionally - // query the entry information. Thus, we evaluate its type lazily. + DWORD a (fi.dwFileAttributes); + bool rp (reparse_point (a)); + + // Evaluate the entry type lazily if this is a reparse point since it + // requires to additionally query the entry information (see + // reparse_point_entry() for details). // - e_.t_ = entry_type::unknown; + e_.t_ = rp ? nullopt : + directory (a) ? optional<entry_type> (entry_type::directory) : + optional<entry_type> (entry_type::regular) ; - e_.lt_ = entry_type::unknown; + e_.lt_ = nullopt; + + e_.mtime_ = rp ? timestamp_unknown : to_timestamp (fi.ftLastWriteTime); + + // Note that according to MSDN for the FindFirstFile[Ex]() function + // "the NTFS file system delays updates to the last access time for a + // file by up to 1 hour after the last access" and "on the FAT file + // system access time has a resolution of 1 day". + // + e_.atime_ = timestamp_unknown; // If requested, we ignore dangling symlinks and junctions, skipping - // ones with non-existing or inaccessible targets. + // ones with non-existing or inaccessible targets (ignore_dangling + // mode), or set the entry_type::unknown type for them + // (detect_dangling mode). // - if (ignore_dangling_) + if (rp && mode_ != no_follow) { + bool dd (mode_ == detect_dangling); + // Check the last error code throwing for codes other than "path not - // found" and "access denied". + // found" and "access denied" and returning this error code + // otherwise. // auto verify_error = [] () { DWORD ec (GetLastError ()); if (!error_file_not_found (ec) && ec != ERROR_ACCESS_DENIED) throw_system_error (ec); + return ec; }; // Note that ltype() queries the entry information due to the type @@ -2123,48 +2318,50 @@ namespace butl path fp (e_.base () / e_.path ()); const char* p (fp.string ().c_str ()); - DWORD a (GetFileAttributesA (p)); - if (a == INVALID_FILE_ATTRIBUTES) - { - // Note that sometimes trying to obtain attributes for a - // filesystem entry that was potentially removed ends up with - // ERROR_ACCESS_DENIED. One can argue that there can be another - // reason for this error (antivirus, indexer, etc). However, given - // that the entry is seen by a _find*() function and normally you - // can retrieve attributes for a read-only entry and for an entry - // opened in the non-shared mode (see the CreateFile() function - // documentation for details) the only meaningful explanation for - // ERROR_ACCESS_DENIED is that the entry is being removed. Also - // the DeleteFile() documentation mentions such a possibility. - // - verify_error (); - continue; - } + pair<entry_type, path> rpe ( + reparse_point_entry (p, true /* ignore_error */)); - if (reparse_point (a)) + if (rpe.first == entry_type::unknown) { - pair<entry_type, path> rp ( - reparse_point_entry (p, true /* ignore_error */)); + DWORD ec (verify_error ()); - if (rp.first == entry_type::unknown) - { - verify_error (); + // Silently skip the entry if it is not found (being already + // deleted) or we are in the ignore dangling mode. Otherwise, set + // the entry type to unknown. + // + // Note that sometimes trying to obtain information for a being + // removed filesystem entry ends up with ERROR_ACCESS_DENIED (see + // DeleteFile() and CreateFile() for details). Probably getting + // this error code while trying to obtain the reparse point + // information (involves calling CreateFile(FILE_READ_EA) and + // DeviceIoControl()) can also be interpreted differently. We, + // however, always treat it as "access denied" in the detect + // dangling mode for good measure. Let's see if that won't be too + // noisy. + // + if (ec != ERROR_ACCESS_DENIED || !dd) continue; - } - e_.t_ = rp.first; + // Fall through. } - else - e_.t_ = directory (a) - ? entry_type::directory - : entry_type::regular; - if (e_.t_ == entry_type::symlink) + e_.t_ = rpe.first; + + // In this mode the entry type should be present and in the + // ignore_dangling mode it may not be entry_type::unknown. + // + //assert (e_.t_ && (dd || *e_.t_ != entry_type::unknown)); + + // Check if the symlink target exists and is accessible and set the + // target type. + // + if (*e_.t_ == entry_type::symlink) { // Query the target info. // // Note that we use entry_info_handle() rather than - // path_entry_info() to be able to verify an error on failure. + // path_entry_handle_info() to be able to verify an error on + // failure. // pair<auto_handle, BY_HANDLE_FILE_INFORMATION> ti ( entry_info_handle (p, @@ -2175,31 +2372,59 @@ namespace butl if (ti.first == nullhandle) { verify_error (); - continue; + + if (dd) + e_.lt_ = entry_type::unknown; + else + continue; } + else + { + ti.first.close (); // Checks for error. - ti.first.close (); // Checks for error. + e_.lt_ = directory (ti.second.dwFileAttributes) + ? entry_type::directory + : entry_type::regular; - // While at it, set the target type. - // - e_.lt_ = directory (ti.second.dwFileAttributes) - ? entry_type::directory - : entry_type::regular; + e_.mtime_ = to_timestamp (ti.second.ftLastWriteTime); + e_.atime_ = to_timestamp (ti.second.ftLastAccessTime); + } } + + // In this mode the symlink target type should be present and in the + // ignore_dangling mode it may not be entry_type::unknown. + // + //assert (*e_.t_ != entry_type::symlink || + // (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown))); } } - else if (errno == ENOENT) + else { - // End of stream. + DWORD ec (GetLastError ()); + bool first (h_ == -1); + + // Check to distinguish non-existent vs empty directories. // - if (h_ != -1) + // Note that dir_exists() handles not only the "filesystem entry does + // not exist" case but also the case when the entry exists but is not + // a directory. + // + if (first && !dir_exists (e_.base ())) + throw_generic_error (ENOENT); + + if (ec == (first ? ERROR_FILE_NOT_FOUND : ERROR_NO_MORE_FILES)) { - _findclose (h_); - h_ = -1; + // End of stream. + // + if (h_ != -1) + { + FindClose (to_handle (h_)); + h_ = -1; + } } + else + throw_system_error (ec); } - else - throw_generic_error (errno); break; } @@ -2207,14 +2432,27 @@ namespace butl #endif // Search for paths matching the pattern and call the specified function for - // each matching path. Return false if the underlying func() call returns - // false. Otherwise the function conforms to the path_search() description. + // each matching path. Return false if the underlying func() or + // dangling_func() call returns false. Otherwise the function conforms to + // the path_search() description. // // Note that the access to the traversed directory tree (real or virtual) is // performed through the provided filesystem object. // static const string any_dir ("*/"); + // Filesystem traversal callbacks. + // + // Called before entering a directory for the recursive traversal. If + // returns false, then the directory is not entered. + // + using preopen = function<bool (const dir_path&)>; + + // Called before skipping a dangling link. If returns false, then the + // traversal is stopped. + // + using preskip = function<bool (const dir_entry&)>; + template <typename FS> static bool search ( @@ -2222,11 +2460,14 @@ namespace butl dir_path pattern_dir, path_match_flags fl, const function<bool (path&&, const string& pattern, bool interm)>& func, + const function<bool (const dir_entry&)>& dangling_func, FS& filesystem) { bool follow_symlinks ((fl & path_match_flags::follow_symlinks) != path_match_flags::none); + assert (follow_symlinks || dangling_func == nullptr); + // Fast-forward the leftmost pattern non-wildcard components. So, for // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. // @@ -2273,17 +2514,47 @@ namespace butl // bool simple (pattern.simple ()); - // Note that we rely on "small function object" optimization here. + // If symlinks need to be followed, then pass the preskip callback for the + // filesystem iterator. + // + bool fs (follow_symlinks || !simple); + preskip ps; + bool dangling_stop (false); + + if (fs) + { + if (dangling_func != nullptr) + { + // Note that we rely on the "small function object" optimization here. + // + ps = [&dangling_func, &dangling_stop] (const dir_entry& de) -> bool + { + dangling_stop = !dangling_func (de); + return !dangling_stop; + }; + } + else + { + ps = [] (const dir_entry& de) -> bool + { + throw_generic_error ( + de.ltype () == entry_type::symlink ? ENOENT : EACCES); + }; + } + } + + // Note that we rely on the "small function object" optimization here. // typename FS::iterator_type i (filesystem.iterator ( pattern_dir, path_pattern_recursive (pcr), path_pattern_self_matching (pcr), - follow_symlinks || !simple, + fs, [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. { return func (pattern_dir / p, any_dir, true); - })); + }, + move (ps))); // Canonicalize the pattern component collapsing consecutive stars (used to // express that it is recursive) into a single one. @@ -2329,7 +2600,7 @@ namespace butl // represented by the iterator as an empty path, and so we need to // compute it (the leaf would actually be enough) for matching. This // leaf can be acquired from the pattern_dir (if not empty) or - // start_dir. We don't expect the start_dir to be empty, as the + // start_dir. We don't expect the start_dir to be empty, as the // filesystem object must replace an empty start directory with the // current one. This is the case when we search in the current directory // (start_dir is empty) with a pattern that starts with a *** wildcard @@ -2368,10 +2639,14 @@ namespace butl pattern_dir / path_cast<dir_path> (move (p)), fl, func, + dangling_func, filesystem)) return false; } + if (dangling_stop) + return false; + // If requested, also search with the absent-matching pattern path // component omitted, unless this is the only pattern component. // @@ -2379,8 +2654,15 @@ namespace butl pc.to_directory () && (!pattern_dir.empty () || !simple) && pc.string ().find_first_not_of ('*') == string::npos && - !search (pattern.leaf (pc), pattern_dir, fl, func, filesystem)) + !search (pattern.leaf (pc), + pattern_dir, + fl, + func, + dangling_func, + filesystem)) + { return false; + } return true; } @@ -2389,8 +2671,6 @@ namespace butl // static const dir_path empty_dir; - using preopen = function<bool (const dir_path&)>; - // Base for filesystem (see above) implementations. // // Don't copy start directory. It is expected to exist till the end of the @@ -2440,13 +2720,17 @@ namespace butl bool recursive, bool self, bool fs, - preopen po) + preopen po, + preskip ps) : start_ (move (p)), recursive_ (recursive), self_ (self), follow_symlinks_ (fs), - preopen_ (move (po)) + preopen_ (move (po)), + preskip_ (move (ps)) { + assert (fs || ps == nullptr); + open (dir_path (), self_); } @@ -2456,12 +2740,16 @@ namespace butl recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete; recursive_dir_iterator (recursive_dir_iterator&&) = default; - // Return false if no more entries left. Otherwise save the next entry path - // and return true. The path is relative to the directory being + // Return false if no more entries left. Otherwise save the next entry + // path and return true. The path is relative to the directory being // traversed and contains a trailing separator for sub-directories. Throw // std::system_error in case of a failure (insufficient permissions, // dangling symlink encountered, etc). // + // If symlinks need to be followed, then skip inaccessible/dangling + // entries or, if the preskip callback is specified and returns false for + // such an entry, stop the entire traversal. + // bool next (path& p) { @@ -2470,44 +2758,64 @@ namespace butl auto& i (iters_.back ()); - // If we got to the end of directory sub-entries, then go one level up - // and return this directory path. - // - if (i.first == dir_iterator ()) + for (;;) // Skip inaccessible/dangling entries. { - path d (move (i.second)); - iters_.pop_back (); + // If we got to the end of directory sub-entries, then go one level up + // and return this directory path. + // + if (i.first == dir_iterator ()) + { + path d (move (i.second)); + iters_.pop_back (); - // Return the path unless it is the last one (the directory we started - // to iterate from) and the self flag is not set. + // Return the path unless it is the last one (the directory we + // started to iterate from) and the self flag is not set. + // + if (iters_.empty () && !self_) + return false; + + p = move (d); + return true; + } + + const dir_entry& de (*i.first); + + // Append separator if a directory. Note that dir_entry::type() can + // throw. // - if (iters_.empty () && !self_) - return false; + entry_type et (follow_symlinks_ ? de.type () : de.ltype ()); - p = move (d); - return true; - } + // If the entry turned out to be inaccessible/dangling, then skip it + // if the preskip function is not specified or returns true and stop + // the entire traversal otherwise. + // + if (et == entry_type::unknown) + { + if (preskip_ != nullptr && !preskip_ (de)) + { + iters_.clear (); + return false; + } - const dir_entry& de (*i.first); + ++i.first; + continue; + } - // Append separator if a directory. Note that dir_entry::type() can - // throw. - // - entry_type et (follow_symlinks_ ? de.type () : de.ltype ()); - path pe (et == entry_type::directory - ? path_cast<dir_path> (i.second / de.path ()) - : i.second / de.path ()); + path pe (et == entry_type::directory + ? path_cast<dir_path> (i.second / de.path ()) + : i.second / de.path ()); - ++i.first; + ++i.first; - if (recursive_ && pe.to_directory ()) - { - open (path_cast<dir_path> (move (pe)), true); - return next (p); - } + if (recursive_ && pe.to_directory ()) + { + open (path_cast<dir_path> (move (pe)), true); + return next (p); + } - p = move (pe); - return true; + p = move (pe); + return true; + } } private: @@ -2529,10 +2837,15 @@ namespace butl { dir_path d (start_ / p); - // If we follow symlinks, then we ignore the dangling ones. + // If we follow symlinks, then we may need to skip the dangling + // ones. Note, however, that we will be skipping them not at the + // dir_iterator level but ourselves, after calling the preskip + // callback function (see next() for details). // i = dir_iterator (!d.empty () ? d : dir_path ("."), - follow_symlinks_); + follow_symlinks_ + ? dir_iterator::detect_dangling + : dir_iterator::no_follow); } iters_.emplace_back (move (i), move (p)); @@ -2562,6 +2875,7 @@ namespace butl bool self_; bool follow_symlinks_; preopen preopen_; + preskip preskip_; small_vector<pair<dir_iterator, dir_path>, 1> iters_; }; @@ -2585,13 +2899,15 @@ namespace butl bool recursive, bool self, bool follow_symlinks, - preopen po) const + preopen po, + preskip ps) const { return iterator_type (start_ / p, recursive, self, follow_symlinks, - move (po)); + move (po), + move (ps)); } }; @@ -2600,10 +2916,11 @@ namespace butl const path& pattern, const function<bool (path&&, const string& pattern, bool interm)>& func, const dir_path& start, - path_match_flags flags) + path_match_flags flags, + const function<bool (const dir_entry&)>& dangling_func) { real_filesystem fs (pattern.relative () ? start : empty_dir); - search (pattern, dir_path (), flags, func, fs); + search (pattern, dir_path (), flags, func, dangling_func, fs); } // Search path in the directory tree represented by a path. @@ -2761,7 +3078,8 @@ namespace butl bool recursive, bool self, bool /*follow_symlinks*/, - preopen po) + preopen po, + preskip) { // If path and sub-path are non-empty, and both are absolute or relative, // then no extra effort is required (prior to checking if one is a @@ -2820,6 +3138,6 @@ namespace butl path_match_flags flags) { path_filesystem fs (start, entry); - search (pattern, dir_path (), flags, func, fs); + search (pattern, dir_path (), flags, func, nullptr /* dangle_func */, fs); } } |