aboutsummaryrefslogtreecommitdiff
path: root/libbutl/filesystem.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbutl/filesystem.cxx')
-rw-r--r--libbutl/filesystem.cxx927
1 files changed, 607 insertions, 320 deletions
diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx
index 3427ee9..28a0de8 100644
--- a/libbutl/filesystem.cxx
+++ b/libbutl/filesystem.cxx
@@ -1,9 +1,7 @@
// file : libbutl/filesystem.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/filesystem.mxx>
-#endif
+#include <libbutl/filesystem.hxx>
#include <errno.h> // errno, E*
@@ -18,7 +16,7 @@
#else
# include <libbutl/win32-utility.hxx>
-# include <io.h> // _find*(), _unlink(), _chmod()
+# include <io.h> // _unlink(), _chmod()
# include <direct.h> // _mkdir(), _rmdir()
# include <winioctl.h> // FSCTL_SET_REPARSE_POINT
# include <sys/types.h> // _stat
@@ -30,50 +28,22 @@
# define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
# endif
-# include <cwchar> // mbsrtowcs(), wcsrtombs(), mbstate_t
-# include <cstring> // strncmp()
+# include <cwchar> // mbsrtowcs(), wcsrtombs(), mbstate_t
+# include <cstring> // strncmp()
+# include <type_traits> // is_same
#endif
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstddef>
-#include <cstdint>
-#include <utility>
-#include <iterator>
-#include <functional>
-
+#include <chrono>
#include <vector>
#include <memory> // unique_ptr
+#include <cassert>
#include <algorithm> // find(), copy()
#include <system_error>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.filesystem;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-#endif
-import butl.path;
-import butl.timestamp;
-import butl.path_pattern;
-#endif
-import butl.utility; // throw_generic_error()
-import butl.fdstream;
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/fdstream.mxx>
-#include <libbutl/small-vector.mxx>
-#endif
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx> // throw_generic_error()
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
#ifndef _WIN32
# ifndef PATH_MAX
@@ -214,6 +184,19 @@ namespace butl
// static inline constexpr int
// ansec (...) {return 0;}
+ static inline entry_time
+ entry_tm (const struct stat& s) noexcept
+ {
+ auto tm = [] (time_t sec, auto nsec) -> timestamp
+ {
+ return system_clock::from_time_t (sec) +
+ chrono::duration_cast<duration> (chrono::nanoseconds (nsec));
+ };
+
+ return {tm (s.st_mtime, mnsec<struct stat> (&s, true)),
+ tm (s.st_atime, ansec<struct stat> (&s, true))};
+ }
+
// Return the modification and access times of a regular file or directory.
//
static entry_time
@@ -231,14 +214,7 @@ namespace butl
if (dir ? !S_ISDIR (s.st_mode) : !S_ISREG (s.st_mode))
return {timestamp_nonexistent, timestamp_nonexistent};
- auto tm = [] (time_t sec, auto nsec) -> timestamp
- {
- return system_clock::from_time_t (sec) +
- chrono::duration_cast<duration> (chrono::nanoseconds (nsec));
- };
-
- return {tm (s.st_mtime, mnsec<struct stat> (&s, true)),
- tm (s.st_atime, ansec<struct stat> (&s, true))};
+ return entry_tm (s);
}
// Set the modification and access times for a regular file or directory.
@@ -340,16 +316,15 @@ namespace butl
// Open a filesystem entry for reading and optionally writing its
// meta-information and return the entry handle and meta-information if the
- // path refers to an existing entry and nullhandle otherwise. Follow reparse
- // points by default. Underlying OS errors are reported by throwing
- // std::system_error, unless ignore_error is true in which case nullhandle
- // is returned. In the latter case the error code can be obtained by calling
- // GetLastError().
+ // path refers to an existing entry and nullhandle otherwise. Underlying OS
+ // errors are reported by throwing std::system_error, unless ignore_error is
+ // true in which case nullhandle is returned. In the latter case the error
+ // code can be obtained by calling GetLastError().
//
static inline pair<win32::auto_handle, BY_HANDLE_FILE_INFORMATION>
entry_info_handle (const char* p,
bool write,
- bool fr = true,
+ bool follow_reparse_points,
bool ie = false)
{
// Open the entry for reading/writing its meta-information. Follow reparse
@@ -364,7 +339,7 @@ namespace butl
nullptr,
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS | // Required for a directory.
- (fr ? 0 : FILE_FLAG_OPEN_REPARSE_POINT),
+ (follow_reparse_points ? 0 : FILE_FLAG_OPEN_REPARSE_POINT),
nullptr));
if (h == nullhandle)
@@ -389,13 +364,15 @@ namespace butl
}
// Return a flag indicating whether the path is to an existing filesystem
- // entry and its meta-information if so. Follow reparse points by default.
+ // entry and its meta-information if so.
//
static inline pair<bool, BY_HANDLE_FILE_INFORMATION>
- path_entry_info (const char* p, bool fr = true, bool ie = false)
+ path_entry_handle_info (const char* p,
+ bool follow_reparse_points,
+ bool ie = false)
{
pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
- entry_info_handle (p, false /* write */, fr, ie));
+ entry_info_handle (p, false /* write */, follow_reparse_points, ie));
if (hi.first == nullhandle)
return make_pair (false, BY_HANDLE_FILE_INFORMATION ());
@@ -407,9 +384,34 @@ namespace butl
}
static inline pair<bool, BY_HANDLE_FILE_INFORMATION>
- path_entry_info (const path& p, bool fr = true, bool ie = false)
+ path_entry_handle_info (const path& p, bool fr, bool ie = false)
+ {
+ return path_entry_handle_info (p.string ().c_str (), fr, ie);
+ }
+
+ // Return a flag indicating whether the path is to an existing filesystem
+ // entry and its extended attributes if so. Don't follow reparse points.
+ //
+ static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA>
+ path_entry_info (const char* p, bool ie = false)
+ {
+ WIN32_FILE_ATTRIBUTE_DATA r;
+ if (!GetFileAttributesExA (p, GetFileExInfoStandard, &r))
+ {
+ DWORD ec;
+ if (ie || error_file_not_found (ec = GetLastError ()))
+ return make_pair (false, WIN32_FILE_ATTRIBUTE_DATA ());
+
+ throw_system_error (ec);
+ }
+
+ return make_pair (true, r);
+ }
+
+ static inline pair<bool, WIN32_FILE_ATTRIBUTE_DATA>
+ path_entry_info (const path& p, bool ie = false)
{
- return path_entry_info (p.string ().c_str (), fr, ie);
+ return path_entry_info (p.string ().c_str (), ie);
}
// Reparse point data.
@@ -645,8 +647,48 @@ namespace butl
return reparse_point_entry (p.string ().c_str (), ie);
}
- pair<bool, entry_stat>
- path_entry (const char* p, bool fl, bool ie)
+ static inline timestamp
+ to_timestamp (const FILETIME& t)
+ {
+ // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
+ // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch"
+ // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds.
+ //
+ uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) |
+ t.dwLowDateTime);
+
+ nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch.
+ nsec *= 100; // Now in nanoseconds.
+
+ return timestamp (
+ chrono::duration_cast<duration> (chrono::nanoseconds (nsec)));
+ }
+
+ static inline FILETIME
+ to_filetime (timestamp t)
+ {
+ // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
+ // (1601-01-01T00:00:00Z). To convert "UNIX epoch" (1970-01-01T00:00:00Z)
+ // to it we need to add 11644473600 seconds.
+ //
+ uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> (
+ t.time_since_epoch ()).count ());
+
+ ticks /= 100; // Now in 100 nanosecond "ticks".
+ ticks += 11644473600ULL * 10000000; // Now in "Windows epoch".
+
+ FILETIME r;
+ r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF;
+ r.dwLowDateTime = ticks & 0xFFFFFFFF;
+ return r;
+ }
+
+ // If the being returned entry type is regular or directory and et is not
+ // NULL, then also save the entry modification and access times into the
+ // referenced variable.
+ //
+ static inline pair<bool, entry_stat>
+ path_entry (const char* p, bool fl, bool ie, entry_time* et)
{
// A path like 'C:', while being a root path in our terminology, is not as
// such for Windows, that maintains current directory for each drive, and
@@ -657,73 +699,105 @@ namespace butl
string d;
if (path::traits_type::root (p))
{
- d = p;
+ d = string (p); // GCC bug #105329.
d += path::traits_type::directory_separator;
p = d.c_str ();
}
// Stat the entry not following reparse points.
//
- pair<bool, BY_HANDLE_FILE_INFORMATION> pi (
- path_entry_info (p, false /* follow_reparse_points */, ie));
+ pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p, ie));
if (!pi.first)
return make_pair (false, entry_stat {entry_type::unknown, 0});
- if (reparse_point (pi.second.dwFileAttributes))
+ auto entry_info = [et] (const auto& ei)
{
- pair<entry_type, path> rp (reparse_point_entry (p, ie));
+ if (et != nullptr)
+ {
+ et->modification = to_timestamp (ei.ftLastWriteTime);
+ et->access = to_timestamp (ei.ftLastAccessTime);
+ }
- if (rp.first == entry_type::symlink)
+ if (directory (ei.dwFileAttributes))
+ return make_pair (true, entry_stat {entry_type::directory, 0});
+ else
+ return make_pair (
+ true,
+ entry_stat {entry_type::regular,
+ ((uint64_t (ei.nFileSizeHigh) << 32) | ei.nFileSizeLow)});
+ };
+
+ if (!reparse_point (pi.second.dwFileAttributes))
+ return entry_info (pi.second);
+
+ pair<entry_type, path> rp (reparse_point_entry (p, ie));
+
+ if (rp.first == entry_type::symlink)
+ {
+ // If following symlinks is requested, then follow the reparse point and
+ // return its target information. Otherwise, return the symlink entry
+ // type.
+ //
+ if (fl)
{
- // If following symlinks is requested, then follow the reparse point,
- // overwrite its own information with the resolved target information,
- // and fall through. Otherwise, return the symlink entry type.
- //
- if (fl)
- {
- pi = path_entry_info (p, true /* follow_reparse_points */, ie);
+ pair<bool, BY_HANDLE_FILE_INFORMATION> pi (
+ path_entry_handle_info (p, true /* follow_reparse_points */, ie));
- if (!pi.first)
- return make_pair (false, entry_stat {entry_type::unknown, 0});
- }
- else
- return make_pair (true, entry_stat {entry_type::symlink, 0});
+ return pi.first
+ ? entry_info (pi.second)
+ : make_pair (false, entry_stat {entry_type::unknown, 0});
}
- else if (rp.first == entry_type::unknown)
- return make_pair (false, entry_stat {entry_type::unknown, 0});
- else // entry_type::other
- return make_pair (true, entry_stat {entry_type::other, 0});
+ else
+ return make_pair (true, entry_stat {entry_type::symlink, 0});
}
+ else if (rp.first == entry_type::unknown)
+ return make_pair (false, entry_stat {entry_type::unknown, 0});
+ else // entry_type::other
+ return make_pair (true, entry_stat {entry_type::other, 0});
+ }
- if (directory (pi.second.dwFileAttributes))
- return make_pair (true, entry_stat {entry_type::directory, 0});
- else
- return make_pair (
- true,
- entry_stat {entry_type::regular,
- ((uint64_t (pi.second.nFileSizeHigh) << 32) |
- pi.second.nFileSizeLow)});
+ static inline pair<bool, entry_stat>
+ path_entry (const path& p, bool fl, bool ie, entry_time* et)
+ {
+ return path_entry (p.string ().c_str (), fl, ie, et);
+ }
+
+ pair<bool, entry_stat>
+ path_entry (const char* p, bool fl, bool ie)
+ {
+ return path_entry (p, fl, ie, nullptr /* entry_time */);
}
permissions
path_permissions (const path& p)
{
- pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p));
+ // Let's optimize for the common case when the entry is not a reparse
+ // point.
+ //
+ auto attr_to_perm = [] (const auto& pi) -> permissions
+ {
+ if (!pi.first)
+ throw_generic_error (ENOENT);
- if (!pi.first)
- throw_generic_error (ENOENT);
+ // On Windows a filesystem entry is always readable. Also there is no
+ // notion of group/other permissions at OS level, so we extrapolate user
+ // permissions to group/other permissions (as the _stat() function
+ // does).
+ //
+ permissions r (permissions::ru | permissions::rg | permissions::ro);
- // On Windows a filesystem entry is always readable. Also there is no
- // notion of group/other permissions at OS level, so we extrapolate user
- // permissions to group/other permissions (as the _stat() function does).
- //
- permissions r (permissions::ru | permissions::rg | permissions::ro);
+ if (!readonly (pi.second.dwFileAttributes))
+ r |= permissions::wu | permissions::wg | permissions::wo;
- if (!readonly (pi.second.dwFileAttributes))
- r |= permissions::wu | permissions::wg | permissions::wo;
+ return r;
+ };
- return r;
+ pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p));
+ return !pi.first || !reparse_point (pi.second.dwFileAttributes)
+ ? attr_to_perm (pi)
+ : attr_to_perm (
+ path_entry_handle_info (p, true /* follow_reparse_points */));
}
void
@@ -749,50 +823,26 @@ namespace butl
static entry_time
entry_tm (const char* p, bool dir)
{
- pair<bool, BY_HANDLE_FILE_INFORMATION> pi (path_entry_info (p));
-
- // If the entry is of the wrong type, then let's pretend that it doesn't
- // exists.
+ // Let's optimize for the common case when the entry is not a reparse
+ // point.
//
- if (!pi.first || directory (pi.second.dwFileAttributes) != dir)
- return {timestamp_nonexistent, timestamp_nonexistent};
-
- auto tm = [] (const FILETIME& t) -> timestamp
+ auto attr_to_time = [dir] (const auto& pi) -> entry_time
{
- // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
- // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch"
- // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds.
+ // If the entry is of the wrong type, then let's pretend that it doesn't
+ // exists.
//
- uint64_t nsec ((static_cast<uint64_t> (t.dwHighDateTime) << 32) |
- t.dwLowDateTime);
-
- nsec -= 11644473600ULL * 10000000; // Now in UNIX epoch.
- nsec *= 100; // Now in nanoseconds.
+ if (!pi.first || directory (pi.second.dwFileAttributes) != dir)
+ return entry_time {timestamp_nonexistent, timestamp_nonexistent};
- return timestamp (
- chrono::duration_cast<duration> (chrono::nanoseconds (nsec)));
+ return entry_time {to_timestamp (pi.second.ftLastWriteTime),
+ to_timestamp (pi.second.ftLastAccessTime)};
};
- return {tm (pi.second.ftLastWriteTime), tm (pi.second.ftLastAccessTime)};
- }
-
- static inline FILETIME
- to_filetime (timestamp t)
- {
- // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
- // (1601-01-01T00:00:00Z). To convert "UNIX epoch"
- // (1970-01-01T00:00:00Z) to it we need to add 11644473600 seconds.
- //
- uint64_t ticks (chrono::duration_cast<chrono::nanoseconds> (
- t.time_since_epoch ()).count ());
-
- ticks /= 100; // Now in 100 nanosecond "ticks".
- ticks += 11644473600ULL * 10000000; // Now in "Windows epoch".
-
- FILETIME r;
- r.dwHighDateTime = (ticks >> 32) & 0xFFFFFFFF;
- r.dwLowDateTime = ticks & 0xFFFFFFFF;
- return r;
+ pair<bool, WIN32_FILE_ATTRIBUTE_DATA> pi (path_entry_info (p));
+ return !pi.first || !reparse_point (pi.second.dwFileAttributes)
+ ? attr_to_time (pi)
+ : attr_to_time (
+ path_entry_handle_info (p, true /* follow_reparse_points */));
}
// Set the modification and access times for a regular file or directory.
@@ -803,7 +853,9 @@ namespace butl
// See also touch_file() below.
//
pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
- entry_info_handle (p, true /* write */));
+ entry_info_handle (p,
+ true /* write */,
+ true /* follow_reparse_points */));
// If the entry is of the wrong type, then let's pretend that it doesn't
// exist.
@@ -888,7 +940,9 @@ namespace butl
// implicitly.
//
pair<auto_handle, BY_HANDLE_FILE_INFORMATION> hi (
- entry_info_handle (p.string ().c_str (), true /* write */));
+ entry_info_handle (p.string ().c_str (),
+ true /* write */,
+ true /* follow_reparse_points */));
if (hi.first != nullhandle)
{
@@ -1037,7 +1091,7 @@ namespace butl
//
try
{
- for (const dir_entry& de: dir_iterator (p, false /* ignore_dangling */))
+ for (const dir_entry& de: dir_iterator (p, dir_iterator::no_follow))
{
path ep (p / de.path ()); //@@ Would be good to reuse the buffer.
@@ -1088,12 +1142,12 @@ namespace butl
// failure (see mventry() for details). If that's the case, we will keep
// trying to move the file for two seconds.
//
- for (size_t i (0); i < 21; ++i)
+ for (size_t i (0); i < 41; ++i)
{
- // Sleep 100 milliseconds before the removal retry.
+ // Sleep 50 milliseconds before the removal retry.
//
if (i != 0)
- Sleep (100);
+ Sleep (50);
ur = _unlink (f);
@@ -1641,9 +1695,12 @@ namespace butl
}
void
- cpfile (const path& from, const path& to, cpflags fl)
+ cpfile (const path& from,
+ const path& to,
+ cpflags fl,
+ optional<permissions> cperm)
{
- permissions perm (path_permissions (from));
+ permissions perm (cperm ? *cperm : path_permissions (from));
auto_rmfile rm;
cpfile<is_base_of<system_error, ios_base::failure>::value> (
@@ -1735,12 +1792,12 @@ namespace butl
// fdopen().
//
DWORD ec;
- for (size_t i (0); i < 21; ++i)
+ for (size_t i (0); i < 41; ++i)
{
// Sleep 100 milliseconds before the move retry.
//
if (i != 0)
- Sleep (100);
+ Sleep (50);
if (MoveFileExA (f, t, mfl))
return;
@@ -1842,7 +1899,7 @@ namespace butl
h_ = x.h_;
x.h_ = nullptr;
- ignore_dangling_ = x.ignore_dangling_;
+ mode_ = x.mode_;
}
return *this;
}
@@ -1863,6 +1920,11 @@ namespace butl
entry_type dir_entry::
type (bool follow_symlinks) const
{
+ // Note that this function can only be used for resolving an entry type
+ // lazily and thus can't be used with the detect_dangling dir_iterator
+ // mode (see dir_iterator::next () implementation for details). Thus, we
+ // always throw on the stat()/lstat() failure.
+ //
path_type p (b_ / p_);
struct stat s;
if ((follow_symlinks
@@ -1870,7 +1932,18 @@ namespace butl
: lstat (p.string ().c_str (), &s)) != 0)
throw_generic_error (errno);
- return butl::type (s);
+ entry_type r (butl::type (s));
+
+ // While at it, also save the entry modification and access times.
+ //
+ if (r != entry_type::symlink)
+ {
+ entry_time t (entry_tm (s));
+ mtime_ = t.modification;
+ atime_ = t.access;
+ }
+
+ return r;
}
// dir_iterator
@@ -1881,8 +1954,8 @@ namespace butl
};
dir_iterator::
- dir_iterator (const dir_path& d, bool ignore_dangling)
- : ignore_dangling_ (ignore_dangling)
+ dir_iterator (const dir_path& d, mode m)
+ : mode_ (m)
{
unique_ptr<DIR, dir_deleter> h (opendir (d.string ().c_str ()));
h_ = h.get ();
@@ -1898,7 +1971,7 @@ namespace butl
}
template <typename D>
- static inline /*constexpr*/ entry_type
+ static inline /*constexpr*/ optional<entry_type>
d_type (const D* d, decltype(d->d_type)*)
{
switch (d->d_type)
@@ -1926,13 +1999,13 @@ namespace butl
#endif
return entry_type::other;
- default: return entry_type::unknown;
+ default: return nullopt;
}
}
template <typename D>
- static inline constexpr entry_type
- d_type (...) {return entry_type::unknown;}
+ static inline constexpr optional<entry_type>
+ d_type (...) {return nullopt;}
void dir_iterator::
next ()
@@ -1954,25 +2027,43 @@ namespace butl
e_.p_ = move (p);
e_.t_ = d_type<struct dirent> (de, nullptr);
- e_.lt_ = entry_type::unknown;
+ e_.lt_ = nullopt;
+
+ e_.mtime_ = timestamp_unknown;
+ e_.atime_ = timestamp_unknown;
// If requested, we ignore dangling symlinks, skipping ones with
- // non-existing or inaccessible targets.
+ // non-existing or inaccessible targets (ignore_dangling mode), or set
+ // the entry_type::unknown type for them (detect_dangling mode).
//
- if (ignore_dangling_)
+ if (mode_ != no_follow)
{
- // Note that ltype () can potentially lstat() (see d_type() for
+ bool dd (mode_ == detect_dangling);
+
+ // Note that ltype () can potentially lstat() (see type() for
// details) and so throw. We, however, need to skip the entry if it
// is already removed (due to a race) and throw on any other error.
//
path fp (e_.base () / e_.path ());
const char* p (fp.string ().c_str ());
- if (e_.t_ == entry_type::unknown)
+ if (!e_.t_)
{
struct stat s;
if (lstat (p, &s) != 0)
{
+ // Given that we have already enumerated the filesystem entry,
+ // these error codes can only mean that the entry doesn't exist
+ // anymore and so we always skip it.
+ //
+ // If errno is EACCES, then the permission to search a directory
+ // we currently iterate over has been revoked. Throwing in this
+ // case sounds like the best choice.
+ //
+ // Note that according to POSIX the filesystem entry we call
+ // lstat() on doesn't require any specific permissions to be
+ // granted.
+ //
if (errno == ENOENT || errno == ENOTDIR)
continue;
@@ -1980,21 +2071,53 @@ namespace butl
}
e_.t_ = type (s);
+
+ if (*e_.t_ != entry_type::symlink)
+ {
+ entry_time t (entry_tm (s));
+ e_.mtime_ = t.modification;
+ e_.atime_ = t.access;
+ }
}
- if (e_.t_ == entry_type::symlink)
+ // The entry type should be present and may not be
+ // entry_type::unknown.
+ //
+ //assert (e_.t_ && *e_.t_ != entry_type::unknown);
+
+ // Check if the symlink target exists and is accessible and set the
+ // target type.
+ //
+ if (*e_.t_ == entry_type::symlink)
{
struct stat s;
if (stat (p, &s) != 0)
{
if (errno == ENOENT || errno == ENOTDIR || errno == EACCES)
- continue;
-
- throw_generic_error (errno);
+ {
+ if (dd)
+ e_.lt_ = entry_type::unknown;
+ else
+ continue;
+ }
+ else
+ throw_generic_error (errno);
}
+ else
+ {
+ e_.lt_ = type (s);
- e_.lt_ = type (s); // While at it, set the target type.
+ entry_time t (entry_tm (s));
+ e_.mtime_ = t.modification;
+ e_.atime_ = t.access;
+ }
}
+
+ // The symlink target type should be present and in the
+ // ignore_dangling mode it may not be entry_type::unknown.
+ //
+ //assert (*e_.t_ != entry_type::symlink ||
+ // (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown)));
}
}
else if (errno == 0)
@@ -2015,11 +2138,49 @@ namespace butl
// dir_entry
//
+ entry_type dir_entry::
+ type (bool follow_symlinks) const
+ {
+ // Note that this function can only be used for resolving an entry type
+ // lazily and thus can't be used with the detect_dangling dir_iterator
+ // mode (see dir_iterator::next () implementation for details). Thus, we
+ // always throw if the entry info can't be retrieved.
+ //
+ // While at it, also save the entry modification and access times.
+ //
+ path_type p (base () / path ());
+ entry_time et;
+ pair<bool, entry_stat> e (
+ path_entry (p, follow_symlinks, false /* ignore_error */, &et));
+
+ if (!e.first)
+ throw_generic_error (ENOENT);
+
+ if (e.second.type == entry_type::regular ||
+ e.second.type == entry_type::directory)
+ {
+ mtime_ = et.modification;
+ atime_ = et.access;
+ }
+
+ return e.second.type;
+ }
+
+ // dir_iterator
+ //
+ static_assert(is_same<HANDLE, void*>::value, "HANDLE is not void*");
+
+ static inline HANDLE
+ to_handle (intptr_t h)
+ {
+ return reinterpret_cast<HANDLE> (h);
+ }
+
dir_iterator::
~dir_iterator ()
{
if (h_ != -1)
- _findclose (h_); // Ignore any errors.
+ FindClose (to_handle (h_)); // Ignore any errors.
}
dir_iterator& dir_iterator::
@@ -2029,56 +2190,32 @@ namespace butl
{
e_ = move (x.e_);
- if (h_ != -1 && _findclose (h_) == -1)
- throw_generic_error (errno);
+ if (h_ != -1 && !FindClose (to_handle (h_)))
+ throw_system_error (GetLastError ());
h_ = x.h_;
x.h_ = -1;
- ignore_dangling_ = x.ignore_dangling_;
+ mode_ = x.mode_;
}
return *this;
}
- entry_type dir_entry::
- type (bool follow_symlinks) const
- {
- path_type p (base () / path ());
- pair<bool, entry_stat> e (path_entry (p, follow_symlinks));
-
- if (!e.first)
- throw_generic_error (ENOENT);
-
- return e.second.type;
- }
-
- // dir_iterator
- //
- struct auto_dir
+ dir_iterator::
+ dir_iterator (const dir_path& d, mode m)
+ : mode_ (m)
{
- explicit
- auto_dir (intptr_t& h): h_ (&h) {}
-
- auto_dir (const auto_dir&) = delete;
- auto_dir& operator= (const auto_dir&) = delete;
-
- ~auto_dir ()
+ struct deleter
{
- if (h_ != nullptr && *h_ != -1)
- _findclose (*h_);
- }
-
- void release () {h_ = nullptr;}
+ void operator() (intptr_t* p) const
+ {
+ if (p != nullptr && *p != -1)
+ FindClose (to_handle (*p));
+ }
+ };
- private:
- intptr_t* h_;
- };
+ unique_ptr<intptr_t, deleter> h (&h_);
- dir_iterator::
- dir_iterator (const dir_path& d, bool ignore_dangling)
- : ignore_dangling_ (ignore_dangling)
- {
- auto_dir h (h_);
e_.b_ = d; // Used by next().
next ();
@@ -2091,31 +2228,37 @@ namespace butl
for (;;)
{
bool r;
- _finddata_t fi;
+ WIN32_FIND_DATA fi;
if (h_ == -1)
{
// The call is made from the constructor. Any other call with h_ == -1
// is illegal.
//
-
- // Check to distinguish non-existent vs empty directories.
+ // Note that we used to check for the directory existence before
+ // iterating over it. However, let's not pessimize things and only
+ // check for the directory existence if FindFirstFileExA() fails.
//
- if (!dir_exists (e_.base ()))
- throw_generic_error (ENOENT);
- h_ = _findfirst ((e_.base () / path ("*")).string ().c_str (), &fi);
- r = h_ != -1;
+ h_ = reinterpret_cast<intptr_t> (
+ FindFirstFileExA ((e_.base () / path ("*")).string ().c_str (),
+ FindExInfoBasic,
+ &fi,
+ FindExSearchNameMatch,
+ NULL,
+ 0));
+
+ r = (h_ != -1);
}
else
- r = _findnext (h_, &fi) == 0;
+ r = FindNextFileA (to_handle (h_), &fi);
if (r)
{
// We can accept some overhead for '.' and '..' (relying on short
// string optimization) in favor of a more compact code.
//
- path p (fi.name);
+ path p (fi.cFileName);
// Skip '.' and '..'.
//
@@ -2124,26 +2267,47 @@ namespace butl
e_.p_ = move (p);
- // Note that the entry type detection always requires to additionally
- // query the entry information. Thus, we evaluate its type lazily.
+ DWORD a (fi.dwFileAttributes);
+ bool rp (reparse_point (a));
+
+ // Evaluate the entry type lazily if this is a reparse point since it
+ // requires to additionally query the entry information (see
+ // reparse_point_entry() for details).
//
- e_.t_ = entry_type::unknown;
+ e_.t_ = rp ? nullopt :
+ directory (a) ? optional<entry_type> (entry_type::directory) :
+ optional<entry_type> (entry_type::regular) ;
- e_.lt_ = entry_type::unknown;
+ e_.lt_ = nullopt;
+
+ e_.mtime_ = rp ? timestamp_unknown : to_timestamp (fi.ftLastWriteTime);
+
+ // Note that according to MSDN for the FindFirstFile[Ex]() function
+ // "the NTFS file system delays updates to the last access time for a
+ // file by up to 1 hour after the last access" and "on the FAT file
+ // system access time has a resolution of 1 day".
+ //
+ e_.atime_ = timestamp_unknown;
// If requested, we ignore dangling symlinks and junctions, skipping
- // ones with non-existing or inaccessible targets.
+ // ones with non-existing or inaccessible targets (ignore_dangling
+ // mode), or set the entry_type::unknown type for them
+ // (detect_dangling mode).
//
- if (ignore_dangling_)
+ if (rp && mode_ != no_follow)
{
+ bool dd (mode_ == detect_dangling);
+
// Check the last error code throwing for codes other than "path not
- // found" and "access denied".
+ // found" and "access denied" and returning this error code
+ // otherwise.
//
auto verify_error = [] ()
{
DWORD ec (GetLastError ());
if (!error_file_not_found (ec) && ec != ERROR_ACCESS_DENIED)
throw_system_error (ec);
+ return ec;
};
// Note that ltype() queries the entry information due to the type
@@ -2154,48 +2318,50 @@ namespace butl
path fp (e_.base () / e_.path ());
const char* p (fp.string ().c_str ());
- DWORD a (GetFileAttributesA (p));
- if (a == INVALID_FILE_ATTRIBUTES)
- {
- // Note that sometimes trying to obtain attributes for a
- // filesystem entry that was potentially removed ends up with
- // ERROR_ACCESS_DENIED. One can argue that there can be another
- // reason for this error (antivirus, indexer, etc). However, given
- // that the entry is seen by a _find*() function and normally you
- // can retrieve attributes for a read-only entry and for an entry
- // opened in the non-shared mode (see the CreateFile() function
- // documentation for details) the only meaningful explanation for
- // ERROR_ACCESS_DENIED is that the entry is being removed. Also
- // the DeleteFile() documentation mentions such a possibility.
- //
- verify_error ();
- continue;
- }
+ pair<entry_type, path> rpe (
+ reparse_point_entry (p, true /* ignore_error */));
- if (reparse_point (a))
+ if (rpe.first == entry_type::unknown)
{
- pair<entry_type, path> rp (
- reparse_point_entry (p, true /* ignore_error */));
+ DWORD ec (verify_error ());
- if (rp.first == entry_type::unknown)
- {
- verify_error ();
+ // Silently skip the entry if it is not found (being already
+ // deleted) or we are in the ignore dangling mode. Otherwise, set
+ // the entry type to unknown.
+ //
+ // Note that sometimes trying to obtain information for a being
+ // removed filesystem entry ends up with ERROR_ACCESS_DENIED (see
+ // DeleteFile() and CreateFile() for details). Probably getting
+ // this error code while trying to obtain the reparse point
+ // information (involves calling CreateFile(FILE_READ_EA) and
+ // DeviceIoControl()) can also be interpreted differently. We,
+ // however, always treat it as "access denied" in the detect
+ // dangling mode for good measure. Let's see if that won't be too
+ // noisy.
+ //
+ if (ec != ERROR_ACCESS_DENIED || !dd)
continue;
- }
- e_.t_ = rp.first;
+ // Fall through.
}
- else
- e_.t_ = directory (a)
- ? entry_type::directory
- : entry_type::regular;
- if (e_.t_ == entry_type::symlink)
+ e_.t_ = rpe.first;
+
+ // In this mode the entry type should be present and in the
+ // ignore_dangling mode it may not be entry_type::unknown.
+ //
+ //assert (e_.t_ && (dd || *e_.t_ != entry_type::unknown));
+
+ // Check if the symlink target exists and is accessible and set the
+ // target type.
+ //
+ if (*e_.t_ == entry_type::symlink)
{
// Query the target info.
//
// Note that we use entry_info_handle() rather than
- // path_entry_info() to be able to verify an error on failure.
+ // path_entry_handle_info() to be able to verify an error on
+ // failure.
//
pair<auto_handle, BY_HANDLE_FILE_INFORMATION> ti (
entry_info_handle (p,
@@ -2206,31 +2372,59 @@ namespace butl
if (ti.first == nullhandle)
{
verify_error ();
- continue;
+
+ if (dd)
+ e_.lt_ = entry_type::unknown;
+ else
+ continue;
}
+ else
+ {
+ ti.first.close (); // Checks for error.
- ti.first.close (); // Checks for error.
+ e_.lt_ = directory (ti.second.dwFileAttributes)
+ ? entry_type::directory
+ : entry_type::regular;
- // While at it, set the target type.
- //
- e_.lt_ = directory (ti.second.dwFileAttributes)
- ? entry_type::directory
- : entry_type::regular;
+ e_.mtime_ = to_timestamp (ti.second.ftLastWriteTime);
+ e_.atime_ = to_timestamp (ti.second.ftLastAccessTime);
+ }
}
+
+ // In this mode the symlink target type should be present and in the
+ // ignore_dangling mode it may not be entry_type::unknown.
+ //
+ //assert (*e_.t_ != entry_type::symlink ||
+ // (e_.lt_ && (dd || *e_.lt_ != entry_type::unknown)));
}
}
- else if (errno == ENOENT)
+ else
{
- // End of stream.
+ DWORD ec (GetLastError ());
+ bool first (h_ == -1);
+
+ // Check to distinguish non-existent vs empty directories.
//
- if (h_ != -1)
+ // Note that dir_exists() handles not only the "filesystem entry does
+ // not exist" case but also the case when the entry exists but is not
+ // a directory.
+ //
+ if (first && !dir_exists (e_.base ()))
+ throw_generic_error (ENOENT);
+
+ if (ec == (first ? ERROR_FILE_NOT_FOUND : ERROR_NO_MORE_FILES))
{
- _findclose (h_);
- h_ = -1;
+ // End of stream.
+ //
+ if (h_ != -1)
+ {
+ FindClose (to_handle (h_));
+ h_ = -1;
+ }
}
+ else
+ throw_system_error (ec);
}
- else
- throw_generic_error (errno);
break;
}
@@ -2238,14 +2432,27 @@ namespace butl
#endif
// Search for paths matching the pattern and call the specified function for
- // each matching path. Return false if the underlying func() call returns
- // false. Otherwise the function conforms to the path_search() description.
+ // each matching path. Return false if the underlying func() or
+ // dangling_func() call returns false. Otherwise the function conforms to
+ // the path_search() description.
//
// Note that the access to the traversed directory tree (real or virtual) is
// performed through the provided filesystem object.
//
static const string any_dir ("*/");
+ // Filesystem traversal callbacks.
+ //
+ // Called before entering a directory for the recursive traversal. If
+ // returns false, then the directory is not entered.
+ //
+ using preopen = function<bool (const dir_path&)>;
+
+ // Called before skipping a dangling link. If returns false, then the
+ // traversal is stopped.
+ //
+ using preskip = function<bool (const dir_entry&)>;
+
template <typename FS>
static bool
search (
@@ -2253,11 +2460,14 @@ namespace butl
dir_path pattern_dir,
path_match_flags fl,
const function<bool (path&&, const string& pattern, bool interm)>& func,
+ const function<bool (const dir_entry&)>& dangling_func,
FS& filesystem)
{
bool follow_symlinks ((fl & path_match_flags::follow_symlinks) !=
path_match_flags::none);
+ assert (follow_symlinks || dangling_func == nullptr);
+
// Fast-forward the leftmost pattern non-wildcard components. So, for
// example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/.
//
@@ -2304,17 +2514,47 @@ namespace butl
//
bool simple (pattern.simple ());
- // Note that we rely on "small function object" optimization here.
+ // If symlinks need to be followed, then pass the preskip callback for the
+ // filesystem iterator.
+ //
+ bool fs (follow_symlinks || !simple);
+ preskip ps;
+ bool dangling_stop (false);
+
+ if (fs)
+ {
+ if (dangling_func != nullptr)
+ {
+ // Note that we rely on the "small function object" optimization here.
+ //
+ ps = [&dangling_func, &dangling_stop] (const dir_entry& de) -> bool
+ {
+ dangling_stop = !dangling_func (de);
+ return !dangling_stop;
+ };
+ }
+ else
+ {
+ ps = [] (const dir_entry& de) -> bool
+ {
+ throw_generic_error (
+ de.ltype () == entry_type::symlink ? ENOENT : EACCES);
+ };
+ }
+ }
+
+ // Note that we rely on the "small function object" optimization here.
//
typename FS::iterator_type i (filesystem.iterator (
pattern_dir,
path_pattern_recursive (pcr),
path_pattern_self_matching (pcr),
- follow_symlinks || !simple,
+ fs,
[&pattern_dir, &func] (const dir_path& p) -> bool // Preopen.
{
return func (pattern_dir / p, any_dir, true);
- }));
+ },
+ move (ps)));
// Canonicalize the pattern component collapsing consecutive stars (used to
// express that it is recursive) into a single one.
@@ -2360,7 +2600,7 @@ namespace butl
// represented by the iterator as an empty path, and so we need to
// compute it (the leaf would actually be enough) for matching. This
// leaf can be acquired from the pattern_dir (if not empty) or
- // start_dir. We don't expect the start_dir to be empty, as the
+ // start_dir. We don't expect the start_dir to be empty, as the
// filesystem object must replace an empty start directory with the
// current one. This is the case when we search in the current directory
// (start_dir is empty) with a pattern that starts with a *** wildcard
@@ -2399,10 +2639,14 @@ namespace butl
pattern_dir / path_cast<dir_path> (move (p)),
fl,
func,
+ dangling_func,
filesystem))
return false;
}
+ if (dangling_stop)
+ return false;
+
// If requested, also search with the absent-matching pattern path
// component omitted, unless this is the only pattern component.
//
@@ -2410,8 +2654,15 @@ namespace butl
pc.to_directory () &&
(!pattern_dir.empty () || !simple) &&
pc.string ().find_first_not_of ('*') == string::npos &&
- !search (pattern.leaf (pc), pattern_dir, fl, func, filesystem))
+ !search (pattern.leaf (pc),
+ pattern_dir,
+ fl,
+ func,
+ dangling_func,
+ filesystem))
+ {
return false;
+ }
return true;
}
@@ -2420,8 +2671,6 @@ namespace butl
//
static const dir_path empty_dir;
- using preopen = function<bool (const dir_path&)>;
-
// Base for filesystem (see above) implementations.
//
// Don't copy start directory. It is expected to exist till the end of the
@@ -2471,13 +2720,17 @@ namespace butl
bool recursive,
bool self,
bool fs,
- preopen po)
+ preopen po,
+ preskip ps)
: start_ (move (p)),
recursive_ (recursive),
self_ (self),
follow_symlinks_ (fs),
- preopen_ (move (po))
+ preopen_ (move (po)),
+ preskip_ (move (ps))
{
+ assert (fs || ps == nullptr);
+
open (dir_path (), self_);
}
@@ -2487,12 +2740,16 @@ namespace butl
recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete;
recursive_dir_iterator (recursive_dir_iterator&&) = default;
- // Return false if no more entries left. Otherwise save the next entry path
- // and return true. The path is relative to the directory being
+ // Return false if no more entries left. Otherwise save the next entry
+ // path and return true. The path is relative to the directory being
// traversed and contains a trailing separator for sub-directories. Throw
// std::system_error in case of a failure (insufficient permissions,
// dangling symlink encountered, etc).
//
+ // If symlinks need to be followed, then skip inaccessible/dangling
+ // entries or, if the preskip callback is specified and returns false for
+ // such an entry, stop the entire traversal.
+ //
bool
next (path& p)
{
@@ -2501,44 +2758,64 @@ namespace butl
auto& i (iters_.back ());
- // If we got to the end of directory sub-entries, then go one level up
- // and return this directory path.
- //
- if (i.first == dir_iterator ())
+ for (;;) // Skip inaccessible/dangling entries.
{
- path d (move (i.second));
- iters_.pop_back ();
+ // If we got to the end of directory sub-entries, then go one level up
+ // and return this directory path.
+ //
+ if (i.first == dir_iterator ())
+ {
+ path d (move (i.second));
+ iters_.pop_back ();
+
+ // Return the path unless it is the last one (the directory we
+ // started to iterate from) and the self flag is not set.
+ //
+ if (iters_.empty () && !self_)
+ return false;
+
+ p = move (d);
+ return true;
+ }
- // Return the path unless it is the last one (the directory we started
- // to iterate from) and the self flag is not set.
+ const dir_entry& de (*i.first);
+
+ // Append separator if a directory. Note that dir_entry::type() can
+ // throw.
//
- if (iters_.empty () && !self_)
- return false;
+ entry_type et (follow_symlinks_ ? de.type () : de.ltype ());
- p = move (d);
- return true;
- }
+ // If the entry turned out to be inaccessible/dangling, then skip it
+ // if the preskip function is not specified or returns true and stop
+ // the entire traversal otherwise.
+ //
+ if (et == entry_type::unknown)
+ {
+ if (preskip_ != nullptr && !preskip_ (de))
+ {
+ iters_.clear ();
+ return false;
+ }
- const dir_entry& de (*i.first);
+ ++i.first;
+ continue;
+ }
- // Append separator if a directory. Note that dir_entry::type() can
- // throw.
- //
- entry_type et (follow_symlinks_ ? de.type () : de.ltype ());
- path pe (et == entry_type::directory
- ? path_cast<dir_path> (i.second / de.path ())
- : i.second / de.path ());
+ path pe (et == entry_type::directory
+ ? path_cast<dir_path> (i.second / de.path ())
+ : i.second / de.path ());
- ++i.first;
+ ++i.first;
- if (recursive_ && pe.to_directory ())
- {
- open (path_cast<dir_path> (move (pe)), true);
- return next (p);
- }
+ if (recursive_ && pe.to_directory ())
+ {
+ open (path_cast<dir_path> (move (pe)), true);
+ return next (p);
+ }
- p = move (pe);
- return true;
+ p = move (pe);
+ return true;
+ }
}
private:
@@ -2560,10 +2837,15 @@ namespace butl
{
dir_path d (start_ / p);
- // If we follow symlinks, then we ignore the dangling ones.
+ // If we follow symlinks, then we may need to skip the dangling
+ // ones. Note, however, that we will be skipping them not at the
+ // dir_iterator level but ourselves, after calling the preskip
+ // callback function (see next() for details).
//
i = dir_iterator (!d.empty () ? d : dir_path ("."),
- follow_symlinks_);
+ follow_symlinks_
+ ? dir_iterator::detect_dangling
+ : dir_iterator::no_follow);
}
iters_.emplace_back (move (i), move (p));
@@ -2593,6 +2875,7 @@ namespace butl
bool self_;
bool follow_symlinks_;
preopen preopen_;
+ preskip preskip_;
small_vector<pair<dir_iterator, dir_path>, 1> iters_;
};
@@ -2616,13 +2899,15 @@ namespace butl
bool recursive,
bool self,
bool follow_symlinks,
- preopen po) const
+ preopen po,
+ preskip ps) const
{
return iterator_type (start_ / p,
recursive,
self,
follow_symlinks,
- move (po));
+ move (po),
+ move (ps));
}
};
@@ -2631,10 +2916,11 @@ namespace butl
const path& pattern,
const function<bool (path&&, const string& pattern, bool interm)>& func,
const dir_path& start,
- path_match_flags flags)
+ path_match_flags flags,
+ const function<bool (const dir_entry&)>& dangling_func)
{
real_filesystem fs (pattern.relative () ? start : empty_dir);
- search (pattern, dir_path (), flags, func, fs);
+ search (pattern, dir_path (), flags, func, dangling_func, fs);
}
// Search path in the directory tree represented by a path.
@@ -2792,7 +3078,8 @@ namespace butl
bool recursive,
bool self,
bool /*follow_symlinks*/,
- preopen po)
+ preopen po,
+ preskip)
{
// If path and sub-path are non-empty, and both are absolute or relative,
// then no extra effort is required (prior to checking if one is a
@@ -2851,6 +3138,6 @@ namespace butl
path_match_flags flags)
{
path_filesystem fs (start, entry);
- search (pattern, dir_path (), flags, func, fs);
+ search (pattern, dir_path (), flags, func, nullptr /* dangle_func */, fs);
}
}