aboutsummaryrefslogtreecommitdiff
path: root/butl/filesystem.cxx
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-05-01 16:08:43 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-05-01 16:59:24 +0300
commit61377c582e0f2675baa5f5e6e30a35d1a4164b33 (patch)
tree11cdca992834d7f7f197f72856712fbcb3020e3d /butl/filesystem.cxx
parent442c1a6790e52baa0c081f310d4d9e9b6f1ff638 (diff)
Add hxx extension for headers and lib prefix for library dir
Diffstat (limited to 'butl/filesystem.cxx')
-rw-r--r--butl/filesystem.cxx1396
1 files changed, 0 insertions, 1396 deletions
diff --git a/butl/filesystem.cxx b/butl/filesystem.cxx
deleted file mode 100644
index d6e1f63..0000000
--- a/butl/filesystem.cxx
+++ /dev/null
@@ -1,1396 +0,0 @@
-// file : butl/filesystem.cxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#include <butl/filesystem>
-
-#ifndef _WIN32
-# include <stdio.h> // rename()
-# include <dirent.h> // struct dirent, *dir()
-# include <unistd.h> // symlink(), link(), stat(), rmdir(), unlink()
-# include <sys/time.h> // utimes()
-# include <sys/types.h> // stat
-# include <sys/stat.h> // stat(), lstat(), S_I*, mkdir(), chmod()
-#else
-# include <butl/win32-utility>
-
-# include <io.h> // _find*(), _unlink(), _chmod()
-# include <direct.h> // _mkdir(), _rmdir()
-# include <sys/types.h> // _stat
-# include <sys/stat.h> // _stat(), S_I*
-
-# ifdef _MSC_VER // Unlikely to be fixed in newer versions.
-# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
-# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
-# endif
-
-# include <butl/utility> // lcase()
-#endif
-
-#include <errno.h> // errno, E*
-
-#include <string>
-#include <vector>
-#include <memory> // unique_ptr
-#include <utility> // pair
-#include <iterator> // reverse_iterator
-#include <system_error>
-
-#include <butl/path>
-#include <butl/utility> // throw_generic_error()
-#include <butl/fdstream>
-#include <butl/small-vector>
-
-using namespace std;
-
-namespace butl
-{
- bool
- file_exists (const char* p, bool fl)
- {
- auto pe (path_entry (p, fl));
- return pe.first && (pe.second == entry_type::regular ||
- (!fl && pe.second == entry_type::symlink));
- }
-
- bool
- entry_exists (const char* p, bool fl)
- {
- return path_entry (p, fl).first;
- }
-
- bool
- dir_exists (const char* p)
- {
- auto pe (path_entry (p, true));
- return pe.first && pe.second == entry_type::directory;
- }
-
-#ifndef _WIN32
- pair<bool, entry_type>
- path_entry (const char* p, bool fl)
- {
- struct stat s;
- if ((fl ? stat (p, &s) : lstat (p, &s)) != 0)
- {
- if (errno == ENOENT || errno == ENOTDIR)
- return make_pair (false, entry_type::unknown);
- else
- throw_generic_error (errno);
- }
-
- auto m (s.st_mode);
- entry_type t (entry_type::unknown);
-
- if (S_ISREG (m))
- t = entry_type::regular;
- else if (S_ISDIR (m))
- t = entry_type::directory;
- else if (S_ISLNK (m))
- t = entry_type::symlink;
- else if (S_ISBLK (m) || S_ISCHR (m) || S_ISFIFO (m) || S_ISSOCK (m))
- t = entry_type::other;
-
- return make_pair (true, t);
- }
-#else
- pair<bool, entry_type>
- path_entry (const char* p, bool)
- {
- // A path like 'C:', while being a root path in our terminology, is not as
- // such for Windows, that maintains current directory for each drive, and
- // so C: means the current directory on the drive C. This is not what we
- // mean here, so need to append the trailing directory separator in such a
- // case.
- //
- string d;
- if (path::traits::root (p, string::traits_type::length (p)))
- {
- d = p;
- d += path::traits::directory_separator;
- p = d.c_str ();
- }
-
- DWORD attr (GetFileAttributesA (p));
- if (attr == INVALID_FILE_ATTRIBUTES) // Presumably not exists.
- return make_pair (false, entry_type::unknown);
-
- entry_type t (entry_type::unknown);
-
- // S_ISLNK/S_IFDIR are not defined for Win32 but it does have symlinks.
- // We will consider symlink entry to be of the unknown type. Note that
- // S_ISREG() and S_ISDIR() return as they would do for a symlink target.
- //
- if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) == 0)
- {
- struct _stat s;
-
- if (_stat (p, &s) != 0)
- {
- if (errno == ENOENT || errno == ENOTDIR)
- return make_pair (false, entry_type::unknown);
- else
- throw_generic_error (errno);
- }
-
- auto m (s.st_mode);
-
- if (S_ISREG (m))
- t = entry_type::regular;
- else if (S_ISDIR (m))
- t = entry_type::directory;
- //
- //else if (S_ISLNK (m))
- // t = entry_type::symlink;
- }
-
- return make_pair (true, t);
- }
-#endif
-
- mkdir_status
-#ifndef _WIN32
- try_mkdir (const dir_path& p, mode_t m)
- {
- if (mkdir (p.string ().c_str (), m) != 0)
-#else
- try_mkdir (const dir_path& p, mode_t)
- {
- if (_mkdir (p.string ().c_str ()) != 0)
-#endif
- {
- int e (errno);
-
- // EEXIST means the path already exists but not necessarily as
- // a directory.
- //
- if (e == EEXIST && dir_exists (p))
- return mkdir_status::already_exists;
- else
- throw_generic_error (e);
- }
-
- return mkdir_status::success;
- }
-
- mkdir_status
- try_mkdir_p (const dir_path& p, mode_t m)
- {
- if (!p.root ())
- {
- dir_path d (p.directory ());
-
- if (!d.empty () && !dir_exists (d))
- try_mkdir_p (d, m);
- }
-
- return try_mkdir (p, m);
- }
-
- rmdir_status
- try_rmdir (const dir_path& p, bool ignore_error)
- {
- rmdir_status r (rmdir_status::success);
-
-#ifndef _WIN32
- if (rmdir (p.string ().c_str ()) != 0)
-#else
- if (_rmdir (p.string ().c_str ()) != 0)
-#endif
- {
- if (errno == ENOENT)
- r = rmdir_status::not_exist;
- else if (errno == ENOTEMPTY || errno == EEXIST)
- r = rmdir_status::not_empty;
- else if (!ignore_error)
- throw_generic_error (errno);
- }
-
- return r;
- }
-
- void
- rmdir_r (const dir_path& p, bool dir, bool ignore_error)
- {
- // An nftw()-based implementation (for platforms that support it)
- // might be a faster way.
- //
- for (const dir_entry& de: dir_iterator (p))
- {
- path ep (p / de.path ()); //@@ Would be good to reuse the buffer.
-
- if (de.ltype () == entry_type::directory)
- rmdir_r (path_cast<dir_path> (move (ep)), true, ignore_error);
- else
- try_rmfile (ep, ignore_error);
- }
-
- if (dir)
- {
- rmdir_status r (try_rmdir (p));
-
- if (r != rmdir_status::success && !ignore_error)
- throw_generic_error (r == rmdir_status::not_empty
- ? ENOTEMPTY
- : ENOENT);
- }
- }
-
- rmfile_status
- try_rmfile (const path& p, bool ignore_error)
- {
- rmfile_status r (rmfile_status::success);
-
-#ifndef _WIN32
- if (unlink (p.string ().c_str ()) != 0)
-#else
- if (_unlink (p.string ().c_str ()) != 0)
-#endif
- {
- // Strangely on Linux unlink() removes a dangling symlink but returns
- // ENOENT.
- //
- if (errno == ENOENT || errno == ENOTDIR)
- r = rmfile_status::not_exist;
- else if (!ignore_error)
- throw_generic_error (errno);
- }
-
- return r;
- }
-
-#ifndef _WIN32
- void
- mksymlink (const path& target, const path& link, bool)
- {
- if (symlink (target.string ().c_str (), link.string ().c_str ()) == -1)
- throw_generic_error (errno);
- }
-
- void
- mkhardlink (const path& target, const path& link, bool)
- {
- if (::link (target.string ().c_str (), link.string ().c_str ()) == -1)
- throw_generic_error (errno);
- }
-
-#else
-
- void
- mksymlink (const path&, const path&, bool)
- {
- throw_generic_error (ENOSYS, "symlinks not supported");
- }
-
- void
- mkhardlink (const path& target, const path& link, bool dir)
- {
- if (!dir)
- {
- if (!CreateHardLinkA (link.string ().c_str (),
- target.string ().c_str (),
- nullptr))
- throw_system_error (GetLastError ());
- }
- else
- throw_generic_error (ENOSYS, "directory hard links not supported");
- }
-#endif
-
- // For I/O operations cpfile() can throw ios_base::failure exception that is
- // not derived from system_error for old versions of g++ (as of 4.9). From
- // the other hand cpfile() must throw system_error only. Let's catch
- // ios_base::failure and rethrow as system_error in such a case.
- //
- template <bool v>
- static inline typename enable_if<v>::type
- cpfile (const path& from, const path& to,
- cpflags fl,
- permissions perm,
- auto_rmfile& rm)
- {
- ifdstream ifs (from, fdopen_mode::binary);
-
- fdopen_mode om (fdopen_mode::out |
- fdopen_mode::truncate |
- fdopen_mode::create |
- fdopen_mode::binary);
-
- if ((fl & cpflags::overwrite_content) != cpflags::overwrite_content)
- om |= fdopen_mode::exclusive;
-
- ofdstream ofs (fdopen (to, om, perm));
-
- rm = auto_rmfile (to);
-
- // Throws ios::failure on fdbuf read/write failures.
- //
- // Note that the eof check is important: if the stream is at eof (empty
- // file) then this write will fail.
- //
- if (ifs.peek () != ifdstream::traits_type::eof ())
- ofs << ifs.rdbuf ();
-
- ifs.close (); // Throws ios::failure on failure.
- ofs.close (); // Throws ios::failure on flush/close failure.
- }
-
- template <bool v>
- static inline typename enable_if<!v>::type
- cpfile (const path& from, const path& to,
- cpflags fl,
- permissions perm,
- auto_rmfile& rm)
- {
- try
- {
- cpfile<true> (from, to, fl, perm, rm);
- }
- catch (const ios_base::failure& e)
- {
- // While we try to preserve the original error information, we can not
- // make the description to be exactly the same, for example
- //
- // Is a directory
- //
- // becomes
- //
- // Is a directory: Input/output error
- //
- // Note that our custom operator<<(ostream, exception) doesn't strip this
- // suffix. This is a temporary code after all.
- //
- throw_generic_error (EIO, e.what ());
- }
- }
-
- void
- cpfile (const path& from, const path& to, cpflags fl)
- {
- permissions perm (path_permissions (from));
- auto_rmfile rm;
-
- cpfile<is_base_of<system_error, ios_base::failure>::value> (
- from, to, fl, perm, rm);
-
- if ((fl & cpflags::overwrite_permissions) ==
- cpflags::overwrite_permissions)
- path_permissions (to, perm);
-
- rm.cancel ();
- }
-
- // Figuring out whether we have the nanoseconds in struct stat. Some
- // platforms (e.g., FreeBSD), may provide some "compatibility" #define's,
- // so use the second argument to not end up with the same signatures.
- //
- template <typename S>
- inline constexpr auto
- mnsec (const S* s, bool) -> decltype(s->st_mtim.tv_nsec)
- {
- return s->st_mtim.tv_nsec; // POSIX (GNU/Linux, Solaris).
- }
-
- template <typename S>
- inline constexpr auto
- mnsec (const S* s, int) -> decltype(s->st_mtimespec.tv_nsec)
- {
- return s->st_mtimespec.tv_nsec; // *BSD, MacOS.
- }
-
- template <typename S>
- inline constexpr auto
- mnsec (const S* s, float) -> decltype(s->st_mtime_n)
- {
- return s->st_mtime_n; // AIX 5.2 and later.
- }
-
- // Things are not going to end up well with only seconds resolution so
- // let's make it a compile error.
- //
- // template <typename S>
- // inline constexpr int
- // mnsec (...) {return 0;}
-
- template <typename S>
- inline constexpr auto
- ansec (const S* s, bool) -> decltype(s->st_atim.tv_nsec)
- {
- return s->st_atim.tv_nsec; // POSIX (GNU/Linux, Solaris).
- }
-
- template <typename S>
- inline constexpr auto
- ansec (const S* s, int) -> decltype(s->st_atimespec.tv_nsec)
- {
- return s->st_atimespec.tv_nsec; // *BSD, MacOS.
- }
-
- template <typename S>
- inline constexpr auto
- ansec (const S* s, float) -> decltype(s->st_atime_n)
- {
- return s->st_atime_n; // AIX 5.2 and later.
- }
-
- // template <typename S>
- // inline constexpr int
- // ansec (...) {return 0;}
-
- void
- mventry (const path& from, const path& to, cpflags fl)
- {
- assert ((fl & cpflags::overwrite_permissions) ==
- cpflags::overwrite_permissions);
-
- bool ovr ((fl & cpflags::overwrite_content) == cpflags::overwrite_content);
-
- const char* f (from.string ().c_str ());
- const char* t (to.string ().c_str ());
-
-#ifndef _WIN32
-
- if (!ovr && path_entry (to).first)
- throw_generic_error (EEXIST);
-
- if (::rename (f, t) == 0) // POSIX implementation.
- return;
-
- // If source and destination paths are on different file systems we need to
- // move the file ourselves.
- //
- if (errno != EXDEV)
- throw_generic_error (errno);
-
- // Note that cpfile() follows symlinks, so we need to remove destination if
- // exists.
- //
- try_rmfile (to);
-
- // Note that permissions are copied unconditionally to a new file.
- //
- cpfile (from, to, cpflags::none);
-
- // Copy file access and modification times.
- //
- struct stat s;
- if (stat (f, &s) != 0)
- throw_generic_error (errno);
-
- timeval times[2];
- times[0].tv_sec = s.st_atime;
- times[0].tv_usec = ansec<struct stat> (&s, true) / 1000;
- times[1].tv_sec = s.st_mtime;
- times[1].tv_usec = mnsec<struct stat> (&s, true) / 1000;
-
- if (utimes (t, times) != 0)
- throw_generic_error (errno);
-
- // Finally, remove the source file.
- //
- try_rmfile (from);
-
-#else
-
- // While ::rename() is present on Windows, it is not POSIX but ISO C
- // implementation, that doesn't fit our needs well.
- //
- auto te (path_entry (to));
-
- if (!ovr && te.first)
- throw_generic_error (EEXIST);
-
- bool td (te.first && te.second == entry_type::directory);
-
- auto fe (path_entry (from));
- bool fd (fe.first && fe.second == entry_type::directory);
-
- // If source and destination filesystem entries exist, they both must be
- // either directories or not directories.
- //
- if (fe.first && te.first && fd != td)
- throw_generic_error (ENOTDIR);
-
- DWORD mfl (fd ? 0 : (MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING));
-
- if (MoveFileExA (f, t, mfl))
- return;
-
- // If the destination already exists, then MoveFileExA() succeeds only if
- // it is a regular file or a symlink. Lets also support an empty directory
- // special case to comply with POSIX. If the destination is an empty
- // directory we will just remove it and retry the move operation.
- //
- // Note that under Wine we endup with ERROR_ACCESS_DENIED error code in
- // that case, and with ERROR_ALREADY_EXISTS when run natively.
- //
- DWORD ec (GetLastError ());
- if ((ec == ERROR_ALREADY_EXISTS || ec == ERROR_ACCESS_DENIED) && td &&
- try_rmdir (path_cast<dir_path> (to)) != rmdir_status::not_empty &&
- MoveFileExA (f, t, mfl))
- return;
-
- throw_system_error (ec);
-
-#endif
- }
-
- timestamp
- file_mtime (const char* p)
- {
-#ifndef _WIN32
- struct stat s;
- if (stat (p, &s) != 0)
- {
- if (errno == ENOENT || errno == ENOTDIR)
- return timestamp_nonexistent;
- else
- throw_generic_error (errno);
- }
-
- if (!S_ISREG (s.st_mode))
- return timestamp_nonexistent;
-
- return system_clock::from_time_t (s.st_mtime) +
- chrono::duration_cast<duration> (
- chrono::nanoseconds (mnsec<struct stat> (&s, true)));
-#else
-
- WIN32_FILE_ATTRIBUTE_DATA s;
-
- if (!GetFileAttributesExA (p, GetFileExInfoStandard, &s))
- {
- DWORD ec (GetLastError ());
-
- if (ec == ERROR_FILE_NOT_FOUND ||
- ec == ERROR_PATH_NOT_FOUND ||
- ec == ERROR_INVALID_NAME ||
- ec == ERROR_INVALID_DRIVE ||
- ec == ERROR_BAD_PATHNAME ||
- ec == ERROR_BAD_NETPATH)
- return timestamp_nonexistent;
-
- throw_system_error (ec);
- }
-
- if ((s.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
- return timestamp_nonexistent;
-
- // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch"
- // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch"
- // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds.
- //
- const FILETIME& t (s.ftLastWriteTime);
-
- uint64_t ns ((static_cast<uint64_t> (t.dwHighDateTime) << 32) |
- t.dwLowDateTime);
-
- ns -= 11644473600ULL * 10000000; // Now in UNIX epoch.
- ns *= 100; // Now in nanoseconds.
-
- return timestamp (
- chrono::duration_cast<duration> (
- chrono::nanoseconds (ns)));
-#endif
- }
-
- permissions
- path_permissions (const path& p)
- {
-#ifndef _WIN32
- struct stat s;
- if (stat (p.string ().c_str (), &s) != 0)
-#else
- struct _stat s;
- if (_stat (p.string ().c_str (), &s) != 0)
-#endif
- throw_generic_error (errno);
-
- // VC++ has no S_IRWXU defined. MINGW GCC <= 4.9 has no S_IRWXG, S_IRWXO
- // defined.
- //
- // We could extrapolate user permissions to group/other permissions if
- // S_IRWXG/S_IRWXO are undefined. That is, we could consider their absence
- // as meaning that the platform does not distinguish between permissions
- // for different kinds of users. Let's wait for a use-case first.
- //
- mode_t f (S_IREAD | S_IWRITE | S_IEXEC);
-
-#ifdef S_IRWXG
- f |= S_IRWXG;
-#endif
-
-#ifdef S_IRWXO
- f |= S_IRWXO;
-#endif
-
- return static_cast<permissions> (s.st_mode & f);
- }
-
- void
- path_permissions (const path& p, permissions f)
- {
- mode_t m (S_IREAD | S_IWRITE | S_IEXEC);
-
-#ifdef S_IRWXG
- m |= S_IRWXG;
-#endif
-
-#ifdef S_IRWXO
- m |= S_IRWXO;
-#endif
-
- m &= static_cast<mode_t> (f);
-
-#ifndef _WIN32
- if (chmod (p.string ().c_str (), m) == -1)
-#else
- if (_chmod (p.string ().c_str (), m) == -1)
-#endif
- throw_generic_error (errno);
- }
-
- // dir_{entry,iterator}
- //
-#ifndef _WIN32
-
- // dir_entry
- //
- dir_iterator::
- ~dir_iterator ()
- {
- if (h_ != nullptr)
- closedir (h_); // Ignore any errors.
- }
-
- dir_iterator& dir_iterator::
- operator= (dir_iterator&& x)
- {
- if (this != &x)
- {
- e_ = move (x.e_);
-
- if (h_ != nullptr && closedir (h_) == -1)
- throw_generic_error (errno);
-
- h_ = x.h_;
- x.h_ = nullptr;
- }
- return *this;
- }
-
- entry_type dir_entry::
- type (bool link) const
- {
- path_type p (b_ / p_);
- struct stat s;
- if ((link
- ? stat (p.string ().c_str (), &s)
- : lstat (p.string ().c_str (), &s)) != 0)
- {
- throw_generic_error (errno);
- }
-
- entry_type r;
-
- if (S_ISREG (s.st_mode))
- r = entry_type::regular;
- else if (S_ISDIR (s.st_mode))
- r = entry_type::directory;
- else if (S_ISLNK (s.st_mode))
- r = entry_type::symlink;
- else
- r = entry_type::other;
-
- return r;
- }
-
- // dir_iterator
- //
- struct dir_deleter
- {
- void operator() (DIR* p) const {if (p != nullptr) closedir (p);}
- };
-
- dir_iterator::
- dir_iterator (const dir_path& d)
- {
- unique_ptr<DIR, dir_deleter> h (opendir (d.string ().c_str ()));
- h_ = h.get ();
-
- if (h_ == nullptr)
- throw_generic_error (errno);
-
- next ();
-
- if (h_ != nullptr)
- e_.b_ = d;
-
- h.release ();
- }
-
- template <typename D>
- inline /*constexpr*/ entry_type d_type (const D* d, decltype(d->d_type)*)
- {
- switch (d->d_type)
- {
-#ifdef DT_DIR
- case DT_DIR: return entry_type::directory;
-#endif
-#ifdef DT_REG
- case DT_REG: return entry_type::regular;
-#endif
-#ifdef DT_LNK
- case DT_LNK: return entry_type::symlink;
-#endif
-#ifdef DT_BLK
- case DT_BLK:
-#endif
-#ifdef DT_CHR
- case DT_CHR:
-#endif
-#ifdef DT_FIFO
- case DT_FIFO:
-#endif
-#ifdef DT_SOCK
- case DT_SOCK:
-#endif
- return entry_type::other;
-
- default: return entry_type::unknown;
- }
- }
-
- template <typename D>
- inline constexpr entry_type d_type (...) {return entry_type::unknown;}
-
- void dir_iterator::
- next ()
- {
- for (;;)
- {
- errno = 0;
- if (struct dirent* de = readdir (h_))
- {
- // We can accept some overhead for '.' and '..' (relying on short
- // string optimization) in favor of a more compact code.
- //
- path p (de->d_name);
-
- // Skip '.' and '..'.
- //
- if (p.current () || p.parent ())
- continue;
-
- e_.p_ = move (p);
- e_.t_ = d_type<struct dirent> (de, nullptr);
- e_.lt_ = entry_type::unknown;
- }
- else if (errno == 0)
- {
- // End of stream.
- //
- closedir (h_);
- h_ = nullptr;
- }
- else
- throw_generic_error (errno);
-
- break;
- }
- }
-
-#else
-
- // dir_entry
- //
- dir_iterator::
- ~dir_iterator ()
- {
- if (h_ != -1)
- _findclose (h_); // Ignore any errors.
- }
-
- dir_iterator& dir_iterator::
- operator= (dir_iterator&& x)
- {
- if (this != &x)
- {
- e_ = move (x.e_);
-
- if (h_ != -1 && _findclose (h_) == -1)
- throw_generic_error (errno);
-
- h_ = x.h_;
- x.h_ = -1;
- }
- return *this;
- }
-
- entry_type dir_entry::
- type (bool) const
- {
- // Note that we currently do not support symlinks (yes, there is symlink
- // support since Vista).
- //
- path_type p (b_ / p_);
-
- struct _stat s;
- if (_stat (p.string ().c_str (), &s) != 0)
- throw_generic_error (errno);
-
- entry_type r;
- if (S_ISREG (s.st_mode))
- r = entry_type::regular;
- else if (S_ISDIR (s.st_mode))
- r = entry_type::directory;
- else
- r = entry_type::other;
-
- return r;
- }
-
- // dir_iterator
- //
- struct auto_dir
- {
- explicit
- auto_dir (intptr_t& h): h_ (&h) {}
-
- auto_dir (const auto_dir&) = delete;
- auto_dir& operator= (const auto_dir&) = delete;
-
- ~auto_dir ()
- {
- if (h_ != nullptr && *h_ != -1)
- _findclose (*h_);
- }
-
- void release () {h_ = nullptr;}
-
- private:
- intptr_t* h_;
- };
-
- dir_iterator::
- dir_iterator (const dir_path& d)
- {
- auto_dir h (h_);
- e_.b_ = d; // Used by next() to call _findfirst().
-
- next ();
- h.release ();
- }
-
- void dir_iterator::
- next ()
- {
- for (;;)
- {
- bool r;
- _finddata_t fi;
-
- if (h_ == -1)
- {
- // The call is made from the constructor. Any other call with h_ == -1
- // is illegal.
- //
-
- // Check to distinguish non-existent vs empty directories.
- //
- if (!dir_exists (e_.b_))
- throw_generic_error (ENOENT);
-
- h_ = _findfirst ((e_.b_ / path ("*")).string ().c_str (), &fi);
- r = h_ != -1;
- }
- else
- r = _findnext (h_, &fi) == 0;
-
- if (r)
- {
- // We can accept some overhead for '.' and '..' (relying on short
- // string optimization) in favor of a more compact code.
- //
- path p (fi.name);
-
- // Skip '.' and '..'.
- //
- if (p.current () || p.parent ())
- continue;
-
- e_.p_ = move (p);
-
- // We do not support symlinks at the moment.
- //
- e_.t_ = fi.attrib & _A_SUBDIR
- ? entry_type::directory
- : entry_type::regular;
-
- e_.lt_ = entry_type::unknown;
- }
- else if (errno == ENOENT)
- {
- // End of stream.
- //
- if (h_ != -1)
- {
- _findclose (h_);
- h_ = -1;
- }
- }
- else
- throw_generic_error (errno);
-
- break;
- }
- }
-#endif
-
- // Match the name [ni, ne) to the pattern [pi, pe). Ranges can be empty.
- //
- static bool
- match (string::const_iterator pi, string::const_iterator pe,
- string::const_iterator ni, string::const_iterator ne)
- {
- using reverse_iterator = std::reverse_iterator<string::const_iterator>;
-
- reverse_iterator rpi (pe);
- reverse_iterator rpe (pi);
-
- reverse_iterator rni (ne);
- reverse_iterator rne (ni);
-
- // Match the pattern suffix (follows the last *) to the name trailing
- // characters.
- //
- char pc;
- for (; rpi != rpe && (pc = *rpi) != '*' && rni != rne; ++rpi, ++rni)
- {
-#ifndef _WIN32
- if (*rni != pc && pc != '?')
-#else
- if (lcase (*rni) != lcase (pc) && pc != '?')
-#endif
- return false;
- }
-
- // If we got to the (reversed) end of the pattern (no * is encountered)
- // than we are done. The success depends on if we got to the (reversed) end
- // of the name as well.
- //
- if (rpi == rpe)
- return rni == rne;
-
- // If we didn't reach * in the pattern then we reached the (reversed) end
- // of the name. That means we have unmatched non-star characters in the
- // pattern, and so match failed.
- //
- if (pc != '*')
- {
- assert (rni == rne);
- return false;
- }
-
- // Match the pattern prefix (ends with the first *) to the name leading
- // characters. If they mismatch we failed. Otherwise if this is an only *
- // in the pattern (matches whatever is left in the name) then we succeed,
- // otherwise we perform backtracking (recursively).
- //
- pe = rpi.base ();
- ne = rni.base ();
-
- // Compare the pattern and the name char by char until the name suffix or
- // * is encountered in the pattern (whichever happens first). Fail if a
- // char mismatches.
- //
- for (; (pc = *pi) != '*' && ni != ne; ++pi, ++ni)
- {
-#ifndef _WIN32
- if (*ni != pc && pc != '?')
-#else
- if (lcase (*ni) != lcase (pc) && pc != '?')
-#endif
- return false;
- }
-
- // If we didn't get to * in the pattern then we got to the name suffix.
- // That means that the pattern has unmatched non-star characters, and so
- // match failed.
- //
- if (pc != '*')
- {
- assert (ni == ne);
- return false;
- }
-
- // If * that we have reached is the last one, then it matches whatever is
- // left in the name (including an empty range).
- //
- if (++pi == pe)
- return true;
-
- // Perform backtracking.
- //
- // From now on, we will call the pattern not-yet-matched part (starting
- // the leftmost * and ending the rightmost one inclusively) as pattern, and
- // the name not-yet-matched part as name.
- //
- // Here we sequentially assume that * that starts the pattern matches the
- // name leading part (staring from an empty one and iterating till the full
- // name). So if, at some iteration, the pattern trailing part (that follows
- // the leftmost *) matches the name trailing part, then the pattern matches
- // the name.
- //
- bool r;
- for (; !(r = match (pi, pe, ni, ne)) && ni != ne; ++ni) ;
- return r;
- }
-
- bool
- path_match (const string& pattern, const string& name)
- {
- // Implementation notes:
- //
- // - This has a good potential of becoming hairy quickly so need to strive
- // for an elegant way to implement this.
- //
- // - Most patterns will contains a single * wildcard with a prefix and/or
- // suffix (e.g., *.txt, foo*, f*.txt). Something like this is not very
- // common: *foo*.
- //
- // So it would be nice to have a clever implementation that first
- // "anchors" itself with a literal prefix and/or suffix and only then
- // continue with backtracking. In other words, reduce:
- //
- // *.txt vs foo.txt -> * vs foo
- // foo* vs foo.txt -> * vs .txt
- // f*.txt vs foo.txt -> * vs oo
- //
-
- auto pi (pattern.rbegin ());
- auto pe (pattern.rend ());
-
- auto ni (name.rbegin ());
- auto ne (name.rend ());
-
- // The name doesn't match the pattern if it is of a different type than the
- // pattern is.
- //
- bool pd (pi != pe && path::traits::is_separator (*pi));
- bool nd (ni != ne && path::traits::is_separator (*ni));
-
- if (pd != nd)
- return false;
-
- // Skip trailing separators if present.
- //
- if (pd)
- {
- ++pi;
- ++ni;
- }
-
- return match (pattern.begin (), pi.base (), name.begin (), ni.base ());
- }
-
- // Iterate over directory sub-entries, recursively and including itself if
- // requested. Note that recursive iterating goes depth-first which make
- // sense for the cleanup use cases (@@ maybe this should be controllable
- // since for directory creation it won't make sense).
- //
- // Prior to recursively opening a directory for iterating the preopen
- // callback function is called. If false is returned, then the directory is
- // not traversed but still returned by the next() call.
- //
- // Note that iterating over non-existent directory is not en error. The
- // subsequent next() call returns false for such a directory.
- //
- using preopen = std::function<bool (const dir_path&)>;
-
- class recursive_dir_iterator
- {
- public:
- recursive_dir_iterator (dir_path p,
- bool recursive,
- bool self,
- bool fs,
- preopen po)
- : start_ (move (p)),
- recursive_ (recursive),
- self_ (self),
- follow_symlinks_ (fs),
- preopen_ (move (po))
- {
- open (dir_path (), self_);
- }
-
- // Non-copyable, non-movable type.
- //
- recursive_dir_iterator (const recursive_dir_iterator&) = delete;
- recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete;
-
- // Return false if no more entries left. Otherwise save the next entry path
- // and return true. The path is relative against the directory being
- // traversed and contains a trailing separator for sub-directories. Throw
- // std::system_error in case of a failure (insufficient permissions,
- // dangling symlink encountered, etc).
- //
- bool
- next (path& p)
- {
- if (iters_.empty ())
- return false;
-
- auto& i (iters_.back ());
-
- // If we got to the end of directory sub-entries, then go one level up
- // and return this directory path.
- //
- if (i.first == dir_iterator ())
- {
- path d (move (i.second));
- iters_.pop_back ();
-
- // Return the path unless it is the last one (the directory we started
- // to iterate from) and the self flag is not set.
- //
- if (iters_.empty () && !self_)
- return false;
-
- p = move (d);
- return true;
- }
-
- const dir_entry& de (*i.first);
-
- // Append separator if a directory. Note that dir_entry::type() can
- // throw.
- //
- entry_type et (follow_symlinks_ ? de.type () : de.ltype ());
- path pe (et == entry_type::directory
- ? path_cast<dir_path> (i.second / de.path ())
- : i.second / de.path ());
-
- ++i.first;
-
- if (recursive_ && pe.to_directory ())
- {
- open (path_cast<dir_path> (move (pe)), true);
- return next (p);
- }
-
- p = move (pe);
- return true;
- }
-
- private:
- void
- open (dir_path p, bool preopen)
- {
- // We should consider a racing condition here. The directory can be
- // removed before we create an iterator for it. In this case we just do
- // nothing, so the directory is silently skipped.
- //
- try
- {
- // If preopen_() returns false, then the directory will not be
- // traversed (as we leave iterator with end semantics) but still be
- // returned by the next() call as a sub-entry.
- //
- dir_iterator i;
- if (!preopen || preopen_ (p))
- {
- dir_path d (start_ / p);
- i = dir_iterator (!d.empty () ? d : dir_path ("."));
- }
-
- iters_.emplace_back (move (i), move (p));
- }
- catch (const system_error& e)
- {
- // Ignore non-existent directory (ENOENT or ENOTDIR). Rethrow for any
- // other error. We consider ENOTDIR as a variety of removal, with a
- // new filesystem entry being created afterwards.
- //
- // Make sure that the error denotes errno portable code.
- //
- assert (e.code ().category () == generic_category ());
-
- int ec (e.code ().value ());
- if (ec != ENOENT && ec != ENOTDIR)
- throw;
- }
- }
-
- private:
- dir_path start_;
- bool recursive_;
- bool self_;
- bool follow_symlinks_;
- preopen preopen_;
- small_vector<pair<dir_iterator, dir_path>, 1> iters_;
- };
-
- // Search for paths matching the pattern and call the specified function for
- // each matching path. Return false if the underlying func() call returns
- // false. Otherwise the function conforms to the path_search() description.
- //
- static const string any_dir ("*/");
-
- static bool
- search (
- path pattern,
- dir_path pattern_dir,
- const dir_path start_dir,
- bool follow_symlinks,
- const function<bool (path&&, const string& pattern, bool interm)>& func)
- {
- // Fast-forward the leftmost pattern non-wildcard components. So, for
- // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/.
- //
- {
- auto b (pattern.begin ());
- auto e (pattern.end ());
- auto i (b);
- for (; i != e && (*i).find_first_of ("*?") == string::npos; ++i) ;
-
- // If the pattern has no wildcards then we reduce to checking for the
- // filesystem entry existence. It matches if exists and is of the proper
- // type.
- //
- if (i == e)
- {
- path p (pattern_dir / pattern);
- auto pe (path_entry (start_dir / p, true));
-
- if (pe.first &&
- ((pe.second == entry_type::directory) == p.to_directory ()))
- return func (move (p), string (), false);
-
- return true;
- }
- else if (i != b) // There are non-wildcard components, so fast-forward.
- {
- path p (b, i);
- pattern = pattern.leaf (p);
- pattern_dir /= path_cast<dir_path> (move (p));
- }
- }
-
- assert (!pattern.empty ());
-
- // The pattern leftmost component. Will use it to match the start directory
- // sub-entries.
- //
- path pc (pattern.begin (), ++pattern.begin ());
- string pcr (pc.representation ());
-
- // Note that if the pattern has multiple components (is not a simple path),
- // then the leftmost one has a trailing separator, and so will match
- // sub-directories only.
- //
- bool simple (pattern.simple ());
-
- // Note that we rely on "small function object" optimization here.
- //
- recursive_dir_iterator i (
- start_dir / pattern_dir,
- pcr.find ("**") != string::npos, // Recursive.
- pcr.find ("***") != string::npos, // Self-inclusive.
- follow_symlinks,
- [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen.
- {
- return func (pattern_dir / p, any_dir, true);
- });
-
- // Canonicalize the pattern component collapsing consecutive stars (used to
- // express that it is recursive) into a single one.
- //
- size_t j (0);
- size_t n (pcr.size ());
- for (size_t i (0); i < n; ++i)
- {
- char c (pcr[i]);
- if (!(c == '*' && i > 0 && pcr[i - 1] == '*'))
- pcr[j++] = c;
- }
-
- if (j != n)
- pcr.resize (j);
-
- // Note that the callback function can be called for the same directory
- // twice: first time as intermediate match from iterator's preopen() call,
- // and then, if the first call succeed, from the iterating loop (possibly
- // as the final match).
- //
- path p;
- while (i.next (p))
- {
- // Skip sub-entry if its name doesn't match the pattern leftmost
- // component.
- //
- // Matching the directory we are iterating through (as for a pattern
- // component containing ***) is a bit tricky. This directory is
- // represented by the iterator as an empty path, and so we need to
- // compute it (the leaf would actually be enough) for matching. This
- // leaf can be aquired from the pattern_dir / start_dir path except the
- // case when both directories are empty. This is the case when we search
- // in the current directory (start_dir is empty) with a pattern that
- // starts with *** wildcard (for example f***/bar). All we can do here is
- // to fallback to path::current_directory() call. Note that this will be
- // the only call per path_search() as the next time pattern_dir will not
- // be empty.
- //
- const path& se (!p.empty ()
- ? p
- : path_cast<path> (!pattern_dir.empty ()
- ? pattern_dir
- : !start_dir.empty ()
- ? start_dir
- : path::current_directory ()));
-
- if (!path_match (pcr, se.leaf ().representation ()))
- continue;
-
- // If the callback function returns false, then we stop the entire search
- // for the final match, or do not search below the path for the
- // intermediate one.
- //
- if (!func (pattern_dir / p, pcr, !simple))
- {
- if (simple) // Final match.
- return false;
- else
- continue;
- }
-
- // If the pattern is not a simple one, and it's leftmost component
- // matches the sub-entry, then the sub-entry is a directory (see the note
- // above), and we search in it using the trailing part of the pattern.
- //
- if (!simple && !search (pattern.leaf (pc),
- pattern_dir / path_cast<dir_path> (move (p)),
- start_dir,
- follow_symlinks,
- func))
- return false;
- }
-
- return true;
- }
-
- void
- path_search (
- const path& pattern,
- const function<bool (path&&, const string& pattern, bool interm)>& func,
- const dir_path& start,
- bool follow_symlinks)
- {
- search (pattern,
- dir_path (),
- pattern.relative () ? start : dir_path (),
- follow_symlinks,
- func);
- }
-}