aboutsummaryrefslogtreecommitdiff
path: root/libbutl/filesystem.cxx
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-05-01 16:08:43 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-05-01 16:59:24 +0300
commit61377c582e0f2675baa5f5e6e30a35d1a4164b33 (patch)
tree11cdca992834d7f7f197f72856712fbcb3020e3d /libbutl/filesystem.cxx
parent442c1a6790e52baa0c081f310d4d9e9b6f1ff638 (diff)
Add hxx extension for headers and lib prefix for library dir
Diffstat (limited to 'libbutl/filesystem.cxx')
-rw-r--r--libbutl/filesystem.cxx1396
1 files changed, 1396 insertions, 0 deletions
diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx
new file mode 100644
index 0000000..6a64073
--- /dev/null
+++ b/libbutl/filesystem.cxx
@@ -0,0 +1,1396 @@
+// file : libbutl/filesystem.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbutl/filesystem.hxx>
+
+#ifndef _WIN32
+# include <stdio.h> // rename()
+# include <dirent.h> // struct dirent, *dir()
+# include <unistd.h> // symlink(), link(), stat(), rmdir(), unlink()
+# include <sys/time.h> // utimes()
+# include <sys/types.h> // stat
+# include <sys/stat.h> // stat(), lstat(), S_I*, mkdir(), chmod()
+#else
+# include <libbutl/win32-utility.hxx>
+
+# include <io.h> // _find*(), _unlink(), _chmod()
+# include <direct.h> // _mkdir(), _rmdir()
+# include <sys/types.h> // _stat
+# include <sys/stat.h> // _stat(), S_I*
+
+# ifdef _MSC_VER // Unlikely to be fixed in newer versions.
+# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+# endif
+
+# include <libbutl/utility.hxx> // lcase()
+#endif
+
+#include <errno.h> // errno, E*
+
+#include <string>
+#include <vector>
+#include <memory> // unique_ptr
+#include <utility> // pair
+#include <iterator> // reverse_iterator
+#include <system_error>
+
+#include <libbutl/path.hxx>
+#include <libbutl/utility.hxx> // throw_generic_error()
+#include <libbutl/fdstream.hxx>
+#include <libbutl/small-vector.hxx>
+
+using namespace std;
+
+namespace butl
+{
+ bool
+ file_exists (const char* p, bool fl)
+ {
+ auto pe (path_entry (p, fl));
+ return pe.first && (pe.second == entry_type::regular ||
+ (!fl && pe.second == entry_type::symlink));
+ }
+
+ bool
+ entry_exists (const char* p, bool fl)
+ {
+ return path_entry (p, fl).first;
+ }
+
+ bool
+ dir_exists (const char* p)
+ {
+ auto pe (path_entry (p, true));
+ return pe.first && pe.second == entry_type::directory;
+ }
+
+#ifndef _WIN32
+ pair<bool, entry_type>
+ path_entry (const char* p, bool fl)
+ {
+ struct stat s;
+ if ((fl ? stat (p, &s) : lstat (p, &s)) != 0)
+ {
+ if (errno == ENOENT || errno == ENOTDIR)
+ return make_pair (false, entry_type::unknown);
+ else
+ throw_generic_error (errno);
+ }
+
+ auto m (s.st_mode);
+ entry_type t (entry_type::unknown);
+
+ if (S_ISREG (m))
+ t = entry_type::regular;
+ else if (S_ISDIR (m))
+ t = entry_type::directory;
+ else if (S_ISLNK (m))
+ t = entry_type::symlink;
+ else if (S_ISBLK (m) || S_ISCHR (m) || S_ISFIFO (m) || S_ISSOCK (m))
+ t = entry_type::other;
+
+ return make_pair (true, t);
+ }
+#else
+ pair<bool, entry_type>
+ path_entry (const char* p, bool)
+ {
+ // A path like 'C:', while being a root path in our terminology, is not as
+ // such for Windows, that maintains current directory for each drive, and
+ // so C: means the current directory on the drive C. This is not what we
+ // mean here, so need to append the trailing directory separator in such a
+ // case.
+ //
+ string d;
+ if (path::traits::root (p, string::traits_type::length (p)))
+ {
+ d = p;
+ d += path::traits::directory_separator;
+ p = d.c_str ();
+ }
+
+ DWORD attr (GetFileAttributesA (p));
+ if (attr == INVALID_FILE_ATTRIBUTES) // Presumably not exists.
+ return make_pair (false, entry_type::unknown);
+
+ entry_type t (entry_type::unknown);
+
+ // S_ISLNK/S_IFDIR are not defined for Win32 but it does have symlinks.
+ // We will consider symlink entry to be of the unknown type. Note that
+ // S_ISREG() and S_ISDIR() return as they would do for a symlink target.
+ //
+ if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) == 0)
+ {
+ struct _stat s;
+
+ if (_stat (p, &s) != 0)
+ {
+ if (errno == ENOENT || errno == ENOTDIR)
+ return make_pair (false, entry_type::unknown);
+ else
+ throw_generic_error (errno);
+ }
+
+ auto m (s.st_mode);
+
+ if (S_ISREG (m))
+ t = entry_type::regular;
+ else if (S_ISDIR (m))
+ t = entry_type::directory;
+ //
+ //else if (S_ISLNK (m))
+ // t = entry_type::symlink;
+ }
+
+ return make_pair (true, t);
+ }
+#endif
+
+ mkdir_status
+#ifndef _WIN32
+ try_mkdir (const dir_path& p, mode_t m)
+ {
+ if (mkdir (p.string ().c_str (), m) != 0)
+#else
+ try_mkdir (const dir_path& p, mode_t)
+ {
+ if (_mkdir (p.string ().c_str ()) != 0)
+#endif
+ {
+ int e (errno);
+
+ // EEXIST means the path already exists but not necessarily as
+ // a directory.
+ //
+ if (e == EEXIST && dir_exists (p))
+ return mkdir_status::already_exists;
+ else
+ throw_generic_error (e);
+ }
+
+ return mkdir_status::success;
+ }
+
+ mkdir_status
+ try_mkdir_p (const dir_path& p, mode_t m)
+ {
+ if (!p.root ())
+ {
+ dir_path d (p.directory ());
+
+ if (!d.empty () && !dir_exists (d))
+ try_mkdir_p (d, m);
+ }
+
+ return try_mkdir (p, m);
+ }
+
+ rmdir_status
+ try_rmdir (const dir_path& p, bool ignore_error)
+ {
+ rmdir_status r (rmdir_status::success);
+
+#ifndef _WIN32
+ if (rmdir (p.string ().c_str ()) != 0)
+#else
+ if (_rmdir (p.string ().c_str ()) != 0)
+#endif
+ {
+ if (errno == ENOENT)
+ r = rmdir_status::not_exist;
+ else if (errno == ENOTEMPTY || errno == EEXIST)
+ r = rmdir_status::not_empty;
+ else if (!ignore_error)
+ throw_generic_error (errno);
+ }
+
+ return r;
+ }
+
+ void
+ rmdir_r (const dir_path& p, bool dir, bool ignore_error)
+ {
+ // An nftw()-based implementation (for platforms that support it)
+ // might be a faster way.
+ //
+ for (const dir_entry& de: dir_iterator (p))
+ {
+ path ep (p / de.path ()); //@@ Would be good to reuse the buffer.
+
+ if (de.ltype () == entry_type::directory)
+ rmdir_r (path_cast<dir_path> (move (ep)), true, ignore_error);
+ else
+ try_rmfile (ep, ignore_error);
+ }
+
+ if (dir)
+ {
+ rmdir_status r (try_rmdir (p));
+
+ if (r != rmdir_status::success && !ignore_error)
+ throw_generic_error (r == rmdir_status::not_empty
+ ? ENOTEMPTY
+ : ENOENT);
+ }
+ }
+
+ rmfile_status
+ try_rmfile (const path& p, bool ignore_error)
+ {
+ rmfile_status r (rmfile_status::success);
+
+#ifndef _WIN32
+ if (unlink (p.string ().c_str ()) != 0)
+#else
+ if (_unlink (p.string ().c_str ()) != 0)
+#endif
+ {
+ // Strangely on Linux unlink() removes a dangling symlink but returns
+ // ENOENT.
+ //
+ if (errno == ENOENT || errno == ENOTDIR)
+ r = rmfile_status::not_exist;
+ else if (!ignore_error)
+ throw_generic_error (errno);
+ }
+
+ return r;
+ }
+
+#ifndef _WIN32
+ void
+ mksymlink (const path& target, const path& link, bool)
+ {
+ if (symlink (target.string ().c_str (), link.string ().c_str ()) == -1)
+ throw_generic_error (errno);
+ }
+
+ void
+ mkhardlink (const path& target, const path& link, bool)
+ {
+ if (::link (target.string ().c_str (), link.string ().c_str ()) == -1)
+ throw_generic_error (errno);
+ }
+
+#else
+
+ void
+ mksymlink (const path&, const path&, bool)
+ {
+ throw_generic_error (ENOSYS, "symlinks not supported");
+ }
+
+ void
+ mkhardlink (const path& target, const path& link, bool dir)
+ {
+ if (!dir)
+ {
+ if (!CreateHardLinkA (link.string ().c_str (),
+ target.string ().c_str (),
+ nullptr))
+ throw_system_error (GetLastError ());
+ }
+ else
+ throw_generic_error (ENOSYS, "directory hard links not supported");
+ }
+#endif
+
+ // For I/O operations cpfile() can throw ios_base::failure exception that is
+ // not derived from system_error for old versions of g++ (as of 4.9). From
+ // the other hand cpfile() must throw system_error only. Let's catch
+ // ios_base::failure and rethrow as system_error in such a case.
+ //
+ template <bool v>
+ static inline typename enable_if<v>::type
+ cpfile (const path& from, const path& to,
+ cpflags fl,
+ permissions perm,
+ auto_rmfile& rm)
+ {
+ ifdstream ifs (from, fdopen_mode::binary);
+
+ fdopen_mode om (fdopen_mode::out |
+ fdopen_mode::truncate |
+ fdopen_mode::create |
+ fdopen_mode::binary);
+
+ if ((fl & cpflags::overwrite_content) != cpflags::overwrite_content)
+ om |= fdopen_mode::exclusive;
+
+ ofdstream ofs (fdopen (to, om, perm));
+
+ rm = auto_rmfile (to);
+
+ // Throws ios::failure on fdbuf read/write failures.
+ //
+ // Note that the eof check is important: if the stream is at eof (empty
+ // file) then this write will fail.
+ //
+ if (ifs.peek () != ifdstream::traits_type::eof ())
+ ofs << ifs.rdbuf ();
+
+ ifs.close (); // Throws ios::failure on failure.
+ ofs.close (); // Throws ios::failure on flush/close failure.
+ }
+
+ template <bool v>
+ static inline typename enable_if<!v>::type
+ cpfile (const path& from, const path& to,
+ cpflags fl,
+ permissions perm,
+ auto_rmfile& rm)
+ {
+ try
+ {
+ cpfile<true> (from, to, fl, perm, rm);
+ }
+ catch (const ios_base::failure& e)
+ {
+ // While we try to preserve the original error information, we can not
+ // make the description to be exactly the same, for example
+ //
+ // Is a directory
+ //
+ // becomes
+ //
+ // Is a directory: Input/output error
+ //
+ // Note that our custom operator<<(ostream, exception) doesn't strip this
+ // suffix. This is a temporary code after all.
+ //
+ throw_generic_error (EIO, e.what ());
+ }
+ }
+
+ void
+ cpfile (const path& from, const path& to, cpflags fl)
+ {
+ permissions perm (path_permissions (from));
+ auto_rmfile rm;
+
+ cpfile<is_base_of<system_error, ios_base::failure>::value> (
+ from, to, fl, perm, rm);
+
+ if ((fl & cpflags::overwrite_permissions) ==
+ cpflags::overwrite_permissions)
+ path_permissions (to, perm);
+
+ rm.cancel ();
+ }
+
+ // Figuring out whether we have the nanoseconds in struct stat. Some
+ // platforms (e.g., FreeBSD), may provide some "compatibility" #define's,
+ // so use the second argument to not end up with the same signatures.
+ //
+ template <typename S>
+ inline constexpr auto
+ mnsec (const S* s, bool) -> decltype(s->st_mtim.tv_nsec)
+ {
+ return s->st_mtim.tv_nsec; // POSIX (GNU/Linux, Solaris).
+ }
+
+ template <typename S>
+ inline constexpr auto
+ mnsec (const S* s, int) -> decltype(s->st_mtimespec.tv_nsec)
+ {
+ return s->st_mtimespec.tv_nsec; // *BSD, MacOS.
+ }
+
+ template <typename S>
+ inline constexpr auto
+ mnsec (const S* s, float) -> decltype(s->st_mtime_n)
+ {
+ return s->st_mtime_n; // AIX 5.2 and later.
+ }
+
+ // Things are not going to end up well with only seconds resolution so
+ // let's make it a compile error.
+ //
+ // template <typename S>
+ // inline constexpr int
+ // mnsec (...) {return 0;}
+
+ template <typename S>
+ inline constexpr auto
+ ansec (const S* s, bool) -> decltype(s->st_atim.tv_nsec)
+ {
+ return s->st_atim.tv_nsec; // POSIX (GNU/Linux, Solaris).
+ }
+
+ template <typename S>
+ inline constexpr auto
+ ansec (const S* s, int) -> decltype(s->st_atimespec.tv_nsec)
+ {
+ return s->st_atimespec.tv_nsec; // *BSD, MacOS.
+ }
+
+ template <typename S>
+ inline constexpr auto
+ ansec (const S* s, float) -> decltype(s->st_atime_n)
+ {
+ return s->st_atime_n; // AIX 5.2 and later.
+ }
+
+ // template <typename S>
+ // inline constexpr int
+ // ansec (...) {return 0;}
+
+ void
+ mventry (const path& from, const path& to, cpflags fl)
+ {
+ assert ((fl & cpflags::overwrite_permissions) ==
+ cpflags::overwrite_permissions);
+
+ bool ovr ((fl & cpflags::overwrite_content) == cpflags::overwrite_content);
+
+ const char* f (from.string ().c_str ());
+ const char* t (to.string ().c_str ());
+
+#ifndef _WIN32
+
+ if (!ovr && path_entry (to).first)
+ throw_generic_error (EEXIST);
+
+ if (::rename (f, t) == 0) // POSIX implementation.
+ return;
+
+ // If source and destination paths are on different file systems we need to
+ // move the file ourselves.
+ //
+ if (errno != EXDEV)
+ throw_generic_error (errno);
+
+ // Note that cpfile() follows symlinks, so we need to remove destination if
+ // exists.
+ //
+ try_rmfile (to);
+
+ // Note that permissions are copied unconditionally to a new file.
+ //
+ cpfile (from, to, cpflags::none);
+
+ // Copy file access and modification times.
+ //
+ struct stat s;
+ if (stat (f, &s) != 0)
+ throw_generic_error (errno);
+
+ timeval times[2];
+ times[0].tv_sec = s.st_atime;
+ times[0].tv_usec = ansec<struct stat> (&s, true) / 1000;
+ times[1].tv_sec = s.st_mtime;
+ times[1].tv_usec = mnsec<struct stat> (&s, true) / 1000;
+
+ if (utimes (t, times) != 0)
+ throw_generic_error (errno);
+
+ // Finally, remove the source file.
+ //
+ try_rmfile (from);
+
+#else
+
+ // While ::rename() is present on Windows, it is not POSIX but ISO C
+ // implementation, that doesn't fit our needs well.
+ //
+ auto te (path_entry (to));
+
+ if (!ovr && te.first)
+ throw_generic_error (EEXIST);
+
+ bool td (te.first && te.second == entry_type::directory);
+
+ auto fe (path_entry (from));
+ bool fd (fe.first && fe.second == entry_type::directory);
+
+ // If source and destination filesystem entries exist, they both must be
+ // either directories or not directories.
+ //
+ if (fe.first && te.first && fd != td)
+ throw_generic_error (ENOTDIR);
+
+ DWORD mfl (fd ? 0 : (MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING));
+
+ if (MoveFileExA (f, t, mfl))
+ return;
+
+ // If the destination already exists, then MoveFileExA() succeeds only if
+ // it is a regular file or a symlink. Lets also support an empty directory
+ // special case to comply with POSIX. If the destination is an empty
+ // directory we will just remove it and retry the move operation.
+ //
+ // Note that under Wine we endup with ERROR_ACCESS_DENIED error code in
+ // that case, and with ERROR_ALREADY_EXISTS when run natively.
+ //
+ DWORD ec (GetLastError ());
+ if ((ec == ERROR_ALREADY_EXISTS || ec == ERROR_ACCESS_DENIED) && td &&
+ try_rmdir (path_cast<dir_path> (to)) != rmdir_status::not_empty &&
+ MoveFileExA (f, t, mfl))
+ return;
+
+ throw_system_error (ec);
+
+#endif
+ }
+
+ timestamp
+ file_mtime (const char* p)
+ {
+#ifndef _WIN32
+ struct stat s;
+ if (stat (p, &s) != 0)
+ {
+ if (errno == ENOENT || errno == ENOTDIR)
+ return timestamp_nonexistent;
+ else
+ throw_generic_error (errno);
+ }
+
+ if (!S_ISREG (s.st_mode))
+ return timestamp_nonexistent;
+
+ return system_clock::from_time_t (s.st_mtime) +
+ chrono::duration_cast<duration> (
+ chrono::nanoseconds (mnsec<struct stat> (&s, true)));
+#else
+
+ WIN32_FILE_ATTRIBUTE_DATA s;
+
+ if (!GetFileAttributesExA (p, GetFileExInfoStandard, &s))
+ {
+ DWORD ec (GetLastError ());
+
+ if (ec == ERROR_FILE_NOT_FOUND ||
+ ec == ERROR_PATH_NOT_FOUND ||
+ ec == ERROR_INVALID_NAME ||
+ ec == ERROR_INVALID_DRIVE ||
+ ec == ERROR_BAD_PATHNAME ||
+ ec == ERROR_BAD_NETPATH)
+ return timestamp_nonexistent;
+
+ throw_system_error (ec);
+ }
+
+ if ((s.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
+ return timestamp_nonexistent;
+
+ // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
+ // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch"
+ // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds.
+ //
+ const FILETIME& t (s.ftLastWriteTime);
+
+ uint64_t ns ((static_cast<uint64_t> (t.dwHighDateTime) << 32) |
+ t.dwLowDateTime);
+
+ ns -= 11644473600ULL * 10000000; // Now in UNIX epoch.
+ ns *= 100; // Now in nanoseconds.
+
+ return timestamp (
+ chrono::duration_cast<duration> (
+ chrono::nanoseconds (ns)));
+#endif
+ }
+
+ permissions
+ path_permissions (const path& p)
+ {
+#ifndef _WIN32
+ struct stat s;
+ if (stat (p.string ().c_str (), &s) != 0)
+#else
+ struct _stat s;
+ if (_stat (p.string ().c_str (), &s) != 0)
+#endif
+ throw_generic_error (errno);
+
+ // VC++ has no S_IRWXU defined. MINGW GCC <= 4.9 has no S_IRWXG, S_IRWXO
+ // defined.
+ //
+ // We could extrapolate user permissions to group/other permissions if
+ // S_IRWXG/S_IRWXO are undefined. That is, we could consider their absence
+ // as meaning that the platform does not distinguish between permissions
+ // for different kinds of users. Let's wait for a use-case first.
+ //
+ mode_t f (S_IREAD | S_IWRITE | S_IEXEC);
+
+#ifdef S_IRWXG
+ f |= S_IRWXG;
+#endif
+
+#ifdef S_IRWXO
+ f |= S_IRWXO;
+#endif
+
+ return static_cast<permissions> (s.st_mode & f);
+ }
+
+ void
+ path_permissions (const path& p, permissions f)
+ {
+ mode_t m (S_IREAD | S_IWRITE | S_IEXEC);
+
+#ifdef S_IRWXG
+ m |= S_IRWXG;
+#endif
+
+#ifdef S_IRWXO
+ m |= S_IRWXO;
+#endif
+
+ m &= static_cast<mode_t> (f);
+
+#ifndef _WIN32
+ if (chmod (p.string ().c_str (), m) == -1)
+#else
+ if (_chmod (p.string ().c_str (), m) == -1)
+#endif
+ throw_generic_error (errno);
+ }
+
+ // dir_{entry,iterator}
+ //
+#ifndef _WIN32
+
+ // dir_entry
+ //
+ dir_iterator::
+ ~dir_iterator ()
+ {
+ if (h_ != nullptr)
+ closedir (h_); // Ignore any errors.
+ }
+
+ dir_iterator& dir_iterator::
+ operator= (dir_iterator&& x)
+ {
+ if (this != &x)
+ {
+ e_ = move (x.e_);
+
+ if (h_ != nullptr && closedir (h_) == -1)
+ throw_generic_error (errno);
+
+ h_ = x.h_;
+ x.h_ = nullptr;
+ }
+ return *this;
+ }
+
+ entry_type dir_entry::
+ type (bool link) const
+ {
+ path_type p (b_ / p_);
+ struct stat s;
+ if ((link
+ ? stat (p.string ().c_str (), &s)
+ : lstat (p.string ().c_str (), &s)) != 0)
+ {
+ throw_generic_error (errno);
+ }
+
+ entry_type r;
+
+ if (S_ISREG (s.st_mode))
+ r = entry_type::regular;
+ else if (S_ISDIR (s.st_mode))
+ r = entry_type::directory;
+ else if (S_ISLNK (s.st_mode))
+ r = entry_type::symlink;
+ else
+ r = entry_type::other;
+
+ return r;
+ }
+
+ // dir_iterator
+ //
+ struct dir_deleter
+ {
+ void operator() (DIR* p) const {if (p != nullptr) closedir (p);}
+ };
+
+ dir_iterator::
+ dir_iterator (const dir_path& d)
+ {
+ unique_ptr<DIR, dir_deleter> h (opendir (d.string ().c_str ()));
+ h_ = h.get ();
+
+ if (h_ == nullptr)
+ throw_generic_error (errno);
+
+ next ();
+
+ if (h_ != nullptr)
+ e_.b_ = d;
+
+ h.release ();
+ }
+
+ template <typename D>
+ inline /*constexpr*/ entry_type d_type (const D* d, decltype(d->d_type)*)
+ {
+ switch (d->d_type)
+ {
+#ifdef DT_DIR
+ case DT_DIR: return entry_type::directory;
+#endif
+#ifdef DT_REG
+ case DT_REG: return entry_type::regular;
+#endif
+#ifdef DT_LNK
+ case DT_LNK: return entry_type::symlink;
+#endif
+#ifdef DT_BLK
+ case DT_BLK:
+#endif
+#ifdef DT_CHR
+ case DT_CHR:
+#endif
+#ifdef DT_FIFO
+ case DT_FIFO:
+#endif
+#ifdef DT_SOCK
+ case DT_SOCK:
+#endif
+ return entry_type::other;
+
+ default: return entry_type::unknown;
+ }
+ }
+
+ template <typename D>
+ inline constexpr entry_type d_type (...) {return entry_type::unknown;}
+
+ void dir_iterator::
+ next ()
+ {
+ for (;;)
+ {
+ errno = 0;
+ if (struct dirent* de = readdir (h_))
+ {
+ // We can accept some overhead for '.' and '..' (relying on short
+ // string optimization) in favor of a more compact code.
+ //
+ path p (de->d_name);
+
+ // Skip '.' and '..'.
+ //
+ if (p.current () || p.parent ())
+ continue;
+
+ e_.p_ = move (p);
+ e_.t_ = d_type<struct dirent> (de, nullptr);
+ e_.lt_ = entry_type::unknown;
+ }
+ else if (errno == 0)
+ {
+ // End of stream.
+ //
+ closedir (h_);
+ h_ = nullptr;
+ }
+ else
+ throw_generic_error (errno);
+
+ break;
+ }
+ }
+
+#else
+
+ // dir_entry
+ //
+ dir_iterator::
+ ~dir_iterator ()
+ {
+ if (h_ != -1)
+ _findclose (h_); // Ignore any errors.
+ }
+
+ dir_iterator& dir_iterator::
+ operator= (dir_iterator&& x)
+ {
+ if (this != &x)
+ {
+ e_ = move (x.e_);
+
+ if (h_ != -1 && _findclose (h_) == -1)
+ throw_generic_error (errno);
+
+ h_ = x.h_;
+ x.h_ = -1;
+ }
+ return *this;
+ }
+
+ entry_type dir_entry::
+ type (bool) const
+ {
+ // Note that we currently do not support symlinks (yes, there is symlink
+ // support since Vista).
+ //
+ path_type p (b_ / p_);
+
+ struct _stat s;
+ if (_stat (p.string ().c_str (), &s) != 0)
+ throw_generic_error (errno);
+
+ entry_type r;
+ if (S_ISREG (s.st_mode))
+ r = entry_type::regular;
+ else if (S_ISDIR (s.st_mode))
+ r = entry_type::directory;
+ else
+ r = entry_type::other;
+
+ return r;
+ }
+
+ // dir_iterator
+ //
+ struct auto_dir
+ {
+ explicit
+ auto_dir (intptr_t& h): h_ (&h) {}
+
+ auto_dir (const auto_dir&) = delete;
+ auto_dir& operator= (const auto_dir&) = delete;
+
+ ~auto_dir ()
+ {
+ if (h_ != nullptr && *h_ != -1)
+ _findclose (*h_);
+ }
+
+ void release () {h_ = nullptr;}
+
+ private:
+ intptr_t* h_;
+ };
+
+ dir_iterator::
+ dir_iterator (const dir_path& d)
+ {
+ auto_dir h (h_);
+ e_.b_ = d; // Used by next() to call _findfirst().
+
+ next ();
+ h.release ();
+ }
+
+ void dir_iterator::
+ next ()
+ {
+ for (;;)
+ {
+ bool r;
+ _finddata_t fi;
+
+ if (h_ == -1)
+ {
+ // The call is made from the constructor. Any other call with h_ == -1
+ // is illegal.
+ //
+
+ // Check to distinguish non-existent vs empty directories.
+ //
+ if (!dir_exists (e_.b_))
+ throw_generic_error (ENOENT);
+
+ h_ = _findfirst ((e_.b_ / path ("*")).string ().c_str (), &fi);
+ r = h_ != -1;
+ }
+ else
+ r = _findnext (h_, &fi) == 0;
+
+ if (r)
+ {
+ // We can accept some overhead for '.' and '..' (relying on short
+ // string optimization) in favor of a more compact code.
+ //
+ path p (fi.name);
+
+ // Skip '.' and '..'.
+ //
+ if (p.current () || p.parent ())
+ continue;
+
+ e_.p_ = move (p);
+
+ // We do not support symlinks at the moment.
+ //
+ e_.t_ = fi.attrib & _A_SUBDIR
+ ? entry_type::directory
+ : entry_type::regular;
+
+ e_.lt_ = entry_type::unknown;
+ }
+ else if (errno == ENOENT)
+ {
+ // End of stream.
+ //
+ if (h_ != -1)
+ {
+ _findclose (h_);
+ h_ = -1;
+ }
+ }
+ else
+ throw_generic_error (errno);
+
+ break;
+ }
+ }
+#endif
+
+ // Match the name [ni, ne) to the pattern [pi, pe). Ranges can be empty.
+ //
+ static bool
+ match (string::const_iterator pi, string::const_iterator pe,
+ string::const_iterator ni, string::const_iterator ne)
+ {
+ using reverse_iterator = std::reverse_iterator<string::const_iterator>;
+
+ reverse_iterator rpi (pe);
+ reverse_iterator rpe (pi);
+
+ reverse_iterator rni (ne);
+ reverse_iterator rne (ni);
+
+ // Match the pattern suffix (follows the last *) to the name trailing
+ // characters.
+ //
+ char pc;
+ for (; rpi != rpe && (pc = *rpi) != '*' && rni != rne; ++rpi, ++rni)
+ {
+#ifndef _WIN32
+ if (*rni != pc && pc != '?')
+#else
+ if (lcase (*rni) != lcase (pc) && pc != '?')
+#endif
+ return false;
+ }
+
+ // If we got to the (reversed) end of the pattern (no * is encountered)
+ // than we are done. The success depends on if we got to the (reversed) end
+ // of the name as well.
+ //
+ if (rpi == rpe)
+ return rni == rne;
+
+ // If we didn't reach * in the pattern then we reached the (reversed) end
+ // of the name. That means we have unmatched non-star characters in the
+ // pattern, and so match failed.
+ //
+ if (pc != '*')
+ {
+ assert (rni == rne);
+ return false;
+ }
+
+ // Match the pattern prefix (ends with the first *) to the name leading
+ // characters. If they mismatch we failed. Otherwise if this is an only *
+ // in the pattern (matches whatever is left in the name) then we succeed,
+ // otherwise we perform backtracking (recursively).
+ //
+ pe = rpi.base ();
+ ne = rni.base ();
+
+ // Compare the pattern and the name char by char until the name suffix or
+ // * is encountered in the pattern (whichever happens first). Fail if a
+ // char mismatches.
+ //
+ for (; (pc = *pi) != '*' && ni != ne; ++pi, ++ni)
+ {
+#ifndef _WIN32
+ if (*ni != pc && pc != '?')
+#else
+ if (lcase (*ni) != lcase (pc) && pc != '?')
+#endif
+ return false;
+ }
+
+ // If we didn't get to * in the pattern then we got to the name suffix.
+ // That means that the pattern has unmatched non-star characters, and so
+ // match failed.
+ //
+ if (pc != '*')
+ {
+ assert (ni == ne);
+ return false;
+ }
+
+ // If * that we have reached is the last one, then it matches whatever is
+ // left in the name (including an empty range).
+ //
+ if (++pi == pe)
+ return true;
+
+ // Perform backtracking.
+ //
+ // From now on, we will call the pattern not-yet-matched part (starting
+ // the leftmost * and ending the rightmost one inclusively) as pattern, and
+ // the name not-yet-matched part as name.
+ //
+ // Here we sequentially assume that * that starts the pattern matches the
+ // name leading part (staring from an empty one and iterating till the full
+ // name). So if, at some iteration, the pattern trailing part (that follows
+ // the leftmost *) matches the name trailing part, then the pattern matches
+ // the name.
+ //
+ bool r;
+ for (; !(r = match (pi, pe, ni, ne)) && ni != ne; ++ni) ;
+ return r;
+ }
+
+ bool
+ path_match (const string& pattern, const string& name)
+ {
+ // Implementation notes:
+ //
+ // - This has a good potential of becoming hairy quickly so need to strive
+ // for an elegant way to implement this.
+ //
+ // - Most patterns will contains a single * wildcard with a prefix and/or
+ // suffix (e.g., *.txt, foo*, f*.txt). Something like this is not very
+ // common: *foo*.
+ //
+ // So it would be nice to have a clever implementation that first
+ // "anchors" itself with a literal prefix and/or suffix and only then
+ // continue with backtracking. In other words, reduce:
+ //
+ // *.txt vs foo.txt -> * vs foo
+ // foo* vs foo.txt -> * vs .txt
+ // f*.txt vs foo.txt -> * vs oo
+ //
+
+ auto pi (pattern.rbegin ());
+ auto pe (pattern.rend ());
+
+ auto ni (name.rbegin ());
+ auto ne (name.rend ());
+
+ // The name doesn't match the pattern if it is of a different type than the
+ // pattern is.
+ //
+ bool pd (pi != pe && path::traits::is_separator (*pi));
+ bool nd (ni != ne && path::traits::is_separator (*ni));
+
+ if (pd != nd)
+ return false;
+
+ // Skip trailing separators if present.
+ //
+ if (pd)
+ {
+ ++pi;
+ ++ni;
+ }
+
+ return match (pattern.begin (), pi.base (), name.begin (), ni.base ());
+ }
+
+ // Iterate over directory sub-entries, recursively and including itself if
+ // requested. Note that recursive iterating goes depth-first which make
+ // sense for the cleanup use cases (@@ maybe this should be controllable
+ // since for directory creation it won't make sense).
+ //
+ // Prior to recursively opening a directory for iterating the preopen
+ // callback function is called. If false is returned, then the directory is
+ // not traversed but still returned by the next() call.
+ //
+ // Note that iterating over non-existent directory is not en error. The
+ // subsequent next() call returns false for such a directory.
+ //
+ using preopen = std::function<bool (const dir_path&)>;
+
+ class recursive_dir_iterator
+ {
+ public:
+ recursive_dir_iterator (dir_path p,
+ bool recursive,
+ bool self,
+ bool fs,
+ preopen po)
+ : start_ (move (p)),
+ recursive_ (recursive),
+ self_ (self),
+ follow_symlinks_ (fs),
+ preopen_ (move (po))
+ {
+ open (dir_path (), self_);
+ }
+
+ // Non-copyable, non-movable type.
+ //
+ recursive_dir_iterator (const recursive_dir_iterator&) = delete;
+ recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete;
+
+ // Return false if no more entries left. Otherwise save the next entry path
+ // and return true. The path is relative against the directory being
+ // traversed and contains a trailing separator for sub-directories. Throw
+ // std::system_error in case of a failure (insufficient permissions,
+ // dangling symlink encountered, etc).
+ //
+ bool
+ next (path& p)
+ {
+ if (iters_.empty ())
+ return false;
+
+ auto& i (iters_.back ());
+
+ // If we got to the end of directory sub-entries, then go one level up
+ // and return this directory path.
+ //
+ if (i.first == dir_iterator ())
+ {
+ path d (move (i.second));
+ iters_.pop_back ();
+
+ // Return the path unless it is the last one (the directory we started
+ // to iterate from) and the self flag is not set.
+ //
+ if (iters_.empty () && !self_)
+ return false;
+
+ p = move (d);
+ return true;
+ }
+
+ const dir_entry& de (*i.first);
+
+ // Append separator if a directory. Note that dir_entry::type() can
+ // throw.
+ //
+ entry_type et (follow_symlinks_ ? de.type () : de.ltype ());
+ path pe (et == entry_type::directory
+ ? path_cast<dir_path> (i.second / de.path ())
+ : i.second / de.path ());
+
+ ++i.first;
+
+ if (recursive_ && pe.to_directory ())
+ {
+ open (path_cast<dir_path> (move (pe)), true);
+ return next (p);
+ }
+
+ p = move (pe);
+ return true;
+ }
+
+ private:
+ void
+ open (dir_path p, bool preopen)
+ {
+ // We should consider a racing condition here. The directory can be
+ // removed before we create an iterator for it. In this case we just do
+ // nothing, so the directory is silently skipped.
+ //
+ try
+ {
+ // If preopen_() returns false, then the directory will not be
+ // traversed (as we leave iterator with end semantics) but still be
+ // returned by the next() call as a sub-entry.
+ //
+ dir_iterator i;
+ if (!preopen || preopen_ (p))
+ {
+ dir_path d (start_ / p);
+ i = dir_iterator (!d.empty () ? d : dir_path ("."));
+ }
+
+ iters_.emplace_back (move (i), move (p));
+ }
+ catch (const system_error& e)
+ {
+ // Ignore non-existent directory (ENOENT or ENOTDIR). Rethrow for any
+ // other error. We consider ENOTDIR as a variety of removal, with a
+ // new filesystem entry being created afterwards.
+ //
+ // Make sure that the error denotes errno portable code.
+ //
+ assert (e.code ().category () == generic_category ());
+
+ int ec (e.code ().value ());
+ if (ec != ENOENT && ec != ENOTDIR)
+ throw;
+ }
+ }
+
+ private:
+ dir_path start_;
+ bool recursive_;
+ bool self_;
+ bool follow_symlinks_;
+ preopen preopen_;
+ small_vector<pair<dir_iterator, dir_path>, 1> iters_;
+ };
+
+ // Search for paths matching the pattern and call the specified function for
+ // each matching path. Return false if the underlying func() call returns
+ // false. Otherwise the function conforms to the path_search() description.
+ //
+ static const string any_dir ("*/");
+
+ static bool
+ search (
+ path pattern,
+ dir_path pattern_dir,
+ const dir_path start_dir,
+ bool follow_symlinks,
+ const function<bool (path&&, const string& pattern, bool interm)>& func)
+ {
+ // Fast-forward the leftmost pattern non-wildcard components. So, for
+ // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/.
+ //
+ {
+ auto b (pattern.begin ());
+ auto e (pattern.end ());
+ auto i (b);
+ for (; i != e && (*i).find_first_of ("*?") == string::npos; ++i) ;
+
+ // If the pattern has no wildcards then we reduce to checking for the
+ // filesystem entry existence. It matches if exists and is of the proper
+ // type.
+ //
+ if (i == e)
+ {
+ path p (pattern_dir / pattern);
+ auto pe (path_entry (start_dir / p, true));
+
+ if (pe.first &&
+ ((pe.second == entry_type::directory) == p.to_directory ()))
+ return func (move (p), string (), false);
+
+ return true;
+ }
+ else if (i != b) // There are non-wildcard components, so fast-forward.
+ {
+ path p (b, i);
+ pattern = pattern.leaf (p);
+ pattern_dir /= path_cast<dir_path> (move (p));
+ }
+ }
+
+ assert (!pattern.empty ());
+
+ // The pattern leftmost component. Will use it to match the start directory
+ // sub-entries.
+ //
+ path pc (pattern.begin (), ++pattern.begin ());
+ string pcr (pc.representation ());
+
+ // Note that if the pattern has multiple components (is not a simple path),
+ // then the leftmost one has a trailing separator, and so will match
+ // sub-directories only.
+ //
+ bool simple (pattern.simple ());
+
+ // Note that we rely on "small function object" optimization here.
+ //
+ recursive_dir_iterator i (
+ start_dir / pattern_dir,
+ pcr.find ("**") != string::npos, // Recursive.
+ pcr.find ("***") != string::npos, // Self-inclusive.
+ follow_symlinks,
+ [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen.
+ {
+ return func (pattern_dir / p, any_dir, true);
+ });
+
+ // Canonicalize the pattern component collapsing consecutive stars (used to
+ // express that it is recursive) into a single one.
+ //
+ size_t j (0);
+ size_t n (pcr.size ());
+ for (size_t i (0); i < n; ++i)
+ {
+ char c (pcr[i]);
+ if (!(c == '*' && i > 0 && pcr[i - 1] == '*'))
+ pcr[j++] = c;
+ }
+
+ if (j != n)
+ pcr.resize (j);
+
+ // Note that the callback function can be called for the same directory
+ // twice: first time as intermediate match from iterator's preopen() call,
+ // and then, if the first call succeed, from the iterating loop (possibly
+ // as the final match).
+ //
+ path p;
+ while (i.next (p))
+ {
+ // Skip sub-entry if its name doesn't match the pattern leftmost
+ // component.
+ //
+ // Matching the directory we are iterating through (as for a pattern
+ // component containing ***) is a bit tricky. This directory is
+ // represented by the iterator as an empty path, and so we need to
+ // compute it (the leaf would actually be enough) for matching. This
+ // leaf can be aquired from the pattern_dir / start_dir path except the
+ // case when both directories are empty. This is the case when we search
+ // in the current directory (start_dir is empty) with a pattern that
+ // starts with *** wildcard (for example f***/bar). All we can do here is
+ // to fallback to path::current_directory() call. Note that this will be
+ // the only call per path_search() as the next time pattern_dir will not
+ // be empty.
+ //
+ const path& se (!p.empty ()
+ ? p
+ : path_cast<path> (!pattern_dir.empty ()
+ ? pattern_dir
+ : !start_dir.empty ()
+ ? start_dir
+ : path::current_directory ()));
+
+ if (!path_match (pcr, se.leaf ().representation ()))
+ continue;
+
+ // If the callback function returns false, then we stop the entire search
+ // for the final match, or do not search below the path for the
+ // intermediate one.
+ //
+ if (!func (pattern_dir / p, pcr, !simple))
+ {
+ if (simple) // Final match.
+ return false;
+ else
+ continue;
+ }
+
+ // If the pattern is not a simple one, and it's leftmost component
+ // matches the sub-entry, then the sub-entry is a directory (see the note
+ // above), and we search in it using the trailing part of the pattern.
+ //
+ if (!simple && !search (pattern.leaf (pc),
+ pattern_dir / path_cast<dir_path> (move (p)),
+ start_dir,
+ follow_symlinks,
+ func))
+ return false;
+ }
+
+ return true;
+ }
+
+ void
+ path_search (
+ const path& pattern,
+ const function<bool (path&&, const string& pattern, bool interm)>& func,
+ const dir_path& start,
+ bool follow_symlinks)
+ {
+ search (pattern,
+ dir_path (),
+ pattern.relative () ? start : dir_path (),
+ follow_symlinks,
+ func);
+ }
+}