From 61377c582e0f2675baa5f5e6e30a35d1a4164b33 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 1 May 2017 16:08:43 +0300 Subject: Add hxx extension for headers and lib prefix for library dir --- libbutl/filesystem.cxx | 1396 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1396 insertions(+) create mode 100644 libbutl/filesystem.cxx (limited to 'libbutl/filesystem.cxx') diff --git a/libbutl/filesystem.cxx b/libbutl/filesystem.cxx new file mode 100644 index 0000000..6a64073 --- /dev/null +++ b/libbutl/filesystem.cxx @@ -0,0 +1,1396 @@ +// file : libbutl/filesystem.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#ifndef _WIN32 +# include // rename() +# include // struct dirent, *dir() +# include // symlink(), link(), stat(), rmdir(), unlink() +# include // utimes() +# include // stat +# include // stat(), lstat(), S_I*, mkdir(), chmod() +#else +# include + +# include // _find*(), _unlink(), _chmod() +# include // _mkdir(), _rmdir() +# include // _stat +# include // _stat(), S_I* + +# ifdef _MSC_VER // Unlikely to be fixed in newer versions. +# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +# endif + +# include // lcase() +#endif + +#include // errno, E* + +#include +#include +#include // unique_ptr +#include // pair +#include // reverse_iterator +#include + +#include +#include // throw_generic_error() +#include +#include + +using namespace std; + +namespace butl +{ + bool + file_exists (const char* p, bool fl) + { + auto pe (path_entry (p, fl)); + return pe.first && (pe.second == entry_type::regular || + (!fl && pe.second == entry_type::symlink)); + } + + bool + entry_exists (const char* p, bool fl) + { + return path_entry (p, fl).first; + } + + bool + dir_exists (const char* p) + { + auto pe (path_entry (p, true)); + return pe.first && pe.second == entry_type::directory; + } + +#ifndef _WIN32 + pair + path_entry (const char* p, bool fl) + { + struct stat s; + if ((fl ? stat (p, &s) : lstat (p, &s)) != 0) + { + if (errno == ENOENT || errno == ENOTDIR) + return make_pair (false, entry_type::unknown); + else + throw_generic_error (errno); + } + + auto m (s.st_mode); + entry_type t (entry_type::unknown); + + if (S_ISREG (m)) + t = entry_type::regular; + else if (S_ISDIR (m)) + t = entry_type::directory; + else if (S_ISLNK (m)) + t = entry_type::symlink; + else if (S_ISBLK (m) || S_ISCHR (m) || S_ISFIFO (m) || S_ISSOCK (m)) + t = entry_type::other; + + return make_pair (true, t); + } +#else + pair + path_entry (const char* p, bool) + { + // A path like 'C:', while being a root path in our terminology, is not as + // such for Windows, that maintains current directory for each drive, and + // so C: means the current directory on the drive C. This is not what we + // mean here, so need to append the trailing directory separator in such a + // case. + // + string d; + if (path::traits::root (p, string::traits_type::length (p))) + { + d = p; + d += path::traits::directory_separator; + p = d.c_str (); + } + + DWORD attr (GetFileAttributesA (p)); + if (attr == INVALID_FILE_ATTRIBUTES) // Presumably not exists. + return make_pair (false, entry_type::unknown); + + entry_type t (entry_type::unknown); + + // S_ISLNK/S_IFDIR are not defined for Win32 but it does have symlinks. + // We will consider symlink entry to be of the unknown type. Note that + // S_ISREG() and S_ISDIR() return as they would do for a symlink target. + // + if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) == 0) + { + struct _stat s; + + if (_stat (p, &s) != 0) + { + if (errno == ENOENT || errno == ENOTDIR) + return make_pair (false, entry_type::unknown); + else + throw_generic_error (errno); + } + + auto m (s.st_mode); + + if (S_ISREG (m)) + t = entry_type::regular; + else if (S_ISDIR (m)) + t = entry_type::directory; + // + //else if (S_ISLNK (m)) + // t = entry_type::symlink; + } + + return make_pair (true, t); + } +#endif + + mkdir_status +#ifndef _WIN32 + try_mkdir (const dir_path& p, mode_t m) + { + if (mkdir (p.string ().c_str (), m) != 0) +#else + try_mkdir (const dir_path& p, mode_t) + { + if (_mkdir (p.string ().c_str ()) != 0) +#endif + { + int e (errno); + + // EEXIST means the path already exists but not necessarily as + // a directory. + // + if (e == EEXIST && dir_exists (p)) + return mkdir_status::already_exists; + else + throw_generic_error (e); + } + + return mkdir_status::success; + } + + mkdir_status + try_mkdir_p (const dir_path& p, mode_t m) + { + if (!p.root ()) + { + dir_path d (p.directory ()); + + if (!d.empty () && !dir_exists (d)) + try_mkdir_p (d, m); + } + + return try_mkdir (p, m); + } + + rmdir_status + try_rmdir (const dir_path& p, bool ignore_error) + { + rmdir_status r (rmdir_status::success); + +#ifndef _WIN32 + if (rmdir (p.string ().c_str ()) != 0) +#else + if (_rmdir (p.string ().c_str ()) != 0) +#endif + { + if (errno == ENOENT) + r = rmdir_status::not_exist; + else if (errno == ENOTEMPTY || errno == EEXIST) + r = rmdir_status::not_empty; + else if (!ignore_error) + throw_generic_error (errno); + } + + return r; + } + + void + rmdir_r (const dir_path& p, bool dir, bool ignore_error) + { + // An nftw()-based implementation (for platforms that support it) + // might be a faster way. + // + for (const dir_entry& de: dir_iterator (p)) + { + path ep (p / de.path ()); //@@ Would be good to reuse the buffer. + + if (de.ltype () == entry_type::directory) + rmdir_r (path_cast (move (ep)), true, ignore_error); + else + try_rmfile (ep, ignore_error); + } + + if (dir) + { + rmdir_status r (try_rmdir (p)); + + if (r != rmdir_status::success && !ignore_error) + throw_generic_error (r == rmdir_status::not_empty + ? ENOTEMPTY + : ENOENT); + } + } + + rmfile_status + try_rmfile (const path& p, bool ignore_error) + { + rmfile_status r (rmfile_status::success); + +#ifndef _WIN32 + if (unlink (p.string ().c_str ()) != 0) +#else + if (_unlink (p.string ().c_str ()) != 0) +#endif + { + // Strangely on Linux unlink() removes a dangling symlink but returns + // ENOENT. + // + if (errno == ENOENT || errno == ENOTDIR) + r = rmfile_status::not_exist; + else if (!ignore_error) + throw_generic_error (errno); + } + + return r; + } + +#ifndef _WIN32 + void + mksymlink (const path& target, const path& link, bool) + { + if (symlink (target.string ().c_str (), link.string ().c_str ()) == -1) + throw_generic_error (errno); + } + + void + mkhardlink (const path& target, const path& link, bool) + { + if (::link (target.string ().c_str (), link.string ().c_str ()) == -1) + throw_generic_error (errno); + } + +#else + + void + mksymlink (const path&, const path&, bool) + { + throw_generic_error (ENOSYS, "symlinks not supported"); + } + + void + mkhardlink (const path& target, const path& link, bool dir) + { + if (!dir) + { + if (!CreateHardLinkA (link.string ().c_str (), + target.string ().c_str (), + nullptr)) + throw_system_error (GetLastError ()); + } + else + throw_generic_error (ENOSYS, "directory hard links not supported"); + } +#endif + + // For I/O operations cpfile() can throw ios_base::failure exception that is + // not derived from system_error for old versions of g++ (as of 4.9). From + // the other hand cpfile() must throw system_error only. Let's catch + // ios_base::failure and rethrow as system_error in such a case. + // + template + static inline typename enable_if::type + cpfile (const path& from, const path& to, + cpflags fl, + permissions perm, + auto_rmfile& rm) + { + ifdstream ifs (from, fdopen_mode::binary); + + fdopen_mode om (fdopen_mode::out | + fdopen_mode::truncate | + fdopen_mode::create | + fdopen_mode::binary); + + if ((fl & cpflags::overwrite_content) != cpflags::overwrite_content) + om |= fdopen_mode::exclusive; + + ofdstream ofs (fdopen (to, om, perm)); + + rm = auto_rmfile (to); + + // Throws ios::failure on fdbuf read/write failures. + // + // Note that the eof check is important: if the stream is at eof (empty + // file) then this write will fail. + // + if (ifs.peek () != ifdstream::traits_type::eof ()) + ofs << ifs.rdbuf (); + + ifs.close (); // Throws ios::failure on failure. + ofs.close (); // Throws ios::failure on flush/close failure. + } + + template + static inline typename enable_if::type + cpfile (const path& from, const path& to, + cpflags fl, + permissions perm, + auto_rmfile& rm) + { + try + { + cpfile (from, to, fl, perm, rm); + } + catch (const ios_base::failure& e) + { + // While we try to preserve the original error information, we can not + // make the description to be exactly the same, for example + // + // Is a directory + // + // becomes + // + // Is a directory: Input/output error + // + // Note that our custom operator<<(ostream, exception) doesn't strip this + // suffix. This is a temporary code after all. + // + throw_generic_error (EIO, e.what ()); + } + } + + void + cpfile (const path& from, const path& to, cpflags fl) + { + permissions perm (path_permissions (from)); + auto_rmfile rm; + + cpfile::value> ( + from, to, fl, perm, rm); + + if ((fl & cpflags::overwrite_permissions) == + cpflags::overwrite_permissions) + path_permissions (to, perm); + + rm.cancel (); + } + + // Figuring out whether we have the nanoseconds in struct stat. Some + // platforms (e.g., FreeBSD), may provide some "compatibility" #define's, + // so use the second argument to not end up with the same signatures. + // + template + inline constexpr auto + mnsec (const S* s, bool) -> decltype(s->st_mtim.tv_nsec) + { + return s->st_mtim.tv_nsec; // POSIX (GNU/Linux, Solaris). + } + + template + inline constexpr auto + mnsec (const S* s, int) -> decltype(s->st_mtimespec.tv_nsec) + { + return s->st_mtimespec.tv_nsec; // *BSD, MacOS. + } + + template + inline constexpr auto + mnsec (const S* s, float) -> decltype(s->st_mtime_n) + { + return s->st_mtime_n; // AIX 5.2 and later. + } + + // Things are not going to end up well with only seconds resolution so + // let's make it a compile error. + // + // template + // inline constexpr int + // mnsec (...) {return 0;} + + template + inline constexpr auto + ansec (const S* s, bool) -> decltype(s->st_atim.tv_nsec) + { + return s->st_atim.tv_nsec; // POSIX (GNU/Linux, Solaris). + } + + template + inline constexpr auto + ansec (const S* s, int) -> decltype(s->st_atimespec.tv_nsec) + { + return s->st_atimespec.tv_nsec; // *BSD, MacOS. + } + + template + inline constexpr auto + ansec (const S* s, float) -> decltype(s->st_atime_n) + { + return s->st_atime_n; // AIX 5.2 and later. + } + + // template + // inline constexpr int + // ansec (...) {return 0;} + + void + mventry (const path& from, const path& to, cpflags fl) + { + assert ((fl & cpflags::overwrite_permissions) == + cpflags::overwrite_permissions); + + bool ovr ((fl & cpflags::overwrite_content) == cpflags::overwrite_content); + + const char* f (from.string ().c_str ()); + const char* t (to.string ().c_str ()); + +#ifndef _WIN32 + + if (!ovr && path_entry (to).first) + throw_generic_error (EEXIST); + + if (::rename (f, t) == 0) // POSIX implementation. + return; + + // If source and destination paths are on different file systems we need to + // move the file ourselves. + // + if (errno != EXDEV) + throw_generic_error (errno); + + // Note that cpfile() follows symlinks, so we need to remove destination if + // exists. + // + try_rmfile (to); + + // Note that permissions are copied unconditionally to a new file. + // + cpfile (from, to, cpflags::none); + + // Copy file access and modification times. + // + struct stat s; + if (stat (f, &s) != 0) + throw_generic_error (errno); + + timeval times[2]; + times[0].tv_sec = s.st_atime; + times[0].tv_usec = ansec (&s, true) / 1000; + times[1].tv_sec = s.st_mtime; + times[1].tv_usec = mnsec (&s, true) / 1000; + + if (utimes (t, times) != 0) + throw_generic_error (errno); + + // Finally, remove the source file. + // + try_rmfile (from); + +#else + + // While ::rename() is present on Windows, it is not POSIX but ISO C + // implementation, that doesn't fit our needs well. + // + auto te (path_entry (to)); + + if (!ovr && te.first) + throw_generic_error (EEXIST); + + bool td (te.first && te.second == entry_type::directory); + + auto fe (path_entry (from)); + bool fd (fe.first && fe.second == entry_type::directory); + + // If source and destination filesystem entries exist, they both must be + // either directories or not directories. + // + if (fe.first && te.first && fd != td) + throw_generic_error (ENOTDIR); + + DWORD mfl (fd ? 0 : (MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)); + + if (MoveFileExA (f, t, mfl)) + return; + + // If the destination already exists, then MoveFileExA() succeeds only if + // it is a regular file or a symlink. Lets also support an empty directory + // special case to comply with POSIX. If the destination is an empty + // directory we will just remove it and retry the move operation. + // + // Note that under Wine we endup with ERROR_ACCESS_DENIED error code in + // that case, and with ERROR_ALREADY_EXISTS when run natively. + // + DWORD ec (GetLastError ()); + if ((ec == ERROR_ALREADY_EXISTS || ec == ERROR_ACCESS_DENIED) && td && + try_rmdir (path_cast (to)) != rmdir_status::not_empty && + MoveFileExA (f, t, mfl)) + return; + + throw_system_error (ec); + +#endif + } + + timestamp + file_mtime (const char* p) + { +#ifndef _WIN32 + struct stat s; + if (stat (p, &s) != 0) + { + if (errno == ENOENT || errno == ENOTDIR) + return timestamp_nonexistent; + else + throw_generic_error (errno); + } + + if (!S_ISREG (s.st_mode)) + return timestamp_nonexistent; + + return system_clock::from_time_t (s.st_mtime) + + chrono::duration_cast ( + chrono::nanoseconds (mnsec (&s, true))); +#else + + WIN32_FILE_ATTRIBUTE_DATA s; + + if (!GetFileAttributesExA (p, GetFileExInfoStandard, &s)) + { + DWORD ec (GetLastError ()); + + if (ec == ERROR_FILE_NOT_FOUND || + ec == ERROR_PATH_NOT_FOUND || + ec == ERROR_INVALID_NAME || + ec == ERROR_INVALID_DRIVE || + ec == ERROR_BAD_PATHNAME || + ec == ERROR_BAD_NETPATH) + return timestamp_nonexistent; + + throw_system_error (ec); + } + + if ((s.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) + return timestamp_nonexistent; + + // Time in FILETIME is in 100 nanosecond "ticks" since "Windows epoch" + // (1601-01-01T00:00:00Z). To convert it to "UNIX epoch" + // (1970-01-01T00:00:00Z) we need to subtract 11644473600 seconds. + // + const FILETIME& t (s.ftLastWriteTime); + + uint64_t ns ((static_cast (t.dwHighDateTime) << 32) | + t.dwLowDateTime); + + ns -= 11644473600ULL * 10000000; // Now in UNIX epoch. + ns *= 100; // Now in nanoseconds. + + return timestamp ( + chrono::duration_cast ( + chrono::nanoseconds (ns))); +#endif + } + + permissions + path_permissions (const path& p) + { +#ifndef _WIN32 + struct stat s; + if (stat (p.string ().c_str (), &s) != 0) +#else + struct _stat s; + if (_stat (p.string ().c_str (), &s) != 0) +#endif + throw_generic_error (errno); + + // VC++ has no S_IRWXU defined. MINGW GCC <= 4.9 has no S_IRWXG, S_IRWXO + // defined. + // + // We could extrapolate user permissions to group/other permissions if + // S_IRWXG/S_IRWXO are undefined. That is, we could consider their absence + // as meaning that the platform does not distinguish between permissions + // for different kinds of users. Let's wait for a use-case first. + // + mode_t f (S_IREAD | S_IWRITE | S_IEXEC); + +#ifdef S_IRWXG + f |= S_IRWXG; +#endif + +#ifdef S_IRWXO + f |= S_IRWXO; +#endif + + return static_cast (s.st_mode & f); + } + + void + path_permissions (const path& p, permissions f) + { + mode_t m (S_IREAD | S_IWRITE | S_IEXEC); + +#ifdef S_IRWXG + m |= S_IRWXG; +#endif + +#ifdef S_IRWXO + m |= S_IRWXO; +#endif + + m &= static_cast (f); + +#ifndef _WIN32 + if (chmod (p.string ().c_str (), m) == -1) +#else + if (_chmod (p.string ().c_str (), m) == -1) +#endif + throw_generic_error (errno); + } + + // dir_{entry,iterator} + // +#ifndef _WIN32 + + // dir_entry + // + dir_iterator:: + ~dir_iterator () + { + if (h_ != nullptr) + closedir (h_); // Ignore any errors. + } + + dir_iterator& dir_iterator:: + operator= (dir_iterator&& x) + { + if (this != &x) + { + e_ = move (x.e_); + + if (h_ != nullptr && closedir (h_) == -1) + throw_generic_error (errno); + + h_ = x.h_; + x.h_ = nullptr; + } + return *this; + } + + entry_type dir_entry:: + type (bool link) const + { + path_type p (b_ / p_); + struct stat s; + if ((link + ? stat (p.string ().c_str (), &s) + : lstat (p.string ().c_str (), &s)) != 0) + { + throw_generic_error (errno); + } + + entry_type r; + + if (S_ISREG (s.st_mode)) + r = entry_type::regular; + else if (S_ISDIR (s.st_mode)) + r = entry_type::directory; + else if (S_ISLNK (s.st_mode)) + r = entry_type::symlink; + else + r = entry_type::other; + + return r; + } + + // dir_iterator + // + struct dir_deleter + { + void operator() (DIR* p) const {if (p != nullptr) closedir (p);} + }; + + dir_iterator:: + dir_iterator (const dir_path& d) + { + unique_ptr h (opendir (d.string ().c_str ())); + h_ = h.get (); + + if (h_ == nullptr) + throw_generic_error (errno); + + next (); + + if (h_ != nullptr) + e_.b_ = d; + + h.release (); + } + + template + inline /*constexpr*/ entry_type d_type (const D* d, decltype(d->d_type)*) + { + switch (d->d_type) + { +#ifdef DT_DIR + case DT_DIR: return entry_type::directory; +#endif +#ifdef DT_REG + case DT_REG: return entry_type::regular; +#endif +#ifdef DT_LNK + case DT_LNK: return entry_type::symlink; +#endif +#ifdef DT_BLK + case DT_BLK: +#endif +#ifdef DT_CHR + case DT_CHR: +#endif +#ifdef DT_FIFO + case DT_FIFO: +#endif +#ifdef DT_SOCK + case DT_SOCK: +#endif + return entry_type::other; + + default: return entry_type::unknown; + } + } + + template + inline constexpr entry_type d_type (...) {return entry_type::unknown;} + + void dir_iterator:: + next () + { + for (;;) + { + errno = 0; + if (struct dirent* de = readdir (h_)) + { + // We can accept some overhead for '.' and '..' (relying on short + // string optimization) in favor of a more compact code. + // + path p (de->d_name); + + // Skip '.' and '..'. + // + if (p.current () || p.parent ()) + continue; + + e_.p_ = move (p); + e_.t_ = d_type (de, nullptr); + e_.lt_ = entry_type::unknown; + } + else if (errno == 0) + { + // End of stream. + // + closedir (h_); + h_ = nullptr; + } + else + throw_generic_error (errno); + + break; + } + } + +#else + + // dir_entry + // + dir_iterator:: + ~dir_iterator () + { + if (h_ != -1) + _findclose (h_); // Ignore any errors. + } + + dir_iterator& dir_iterator:: + operator= (dir_iterator&& x) + { + if (this != &x) + { + e_ = move (x.e_); + + if (h_ != -1 && _findclose (h_) == -1) + throw_generic_error (errno); + + h_ = x.h_; + x.h_ = -1; + } + return *this; + } + + entry_type dir_entry:: + type (bool) const + { + // Note that we currently do not support symlinks (yes, there is symlink + // support since Vista). + // + path_type p (b_ / p_); + + struct _stat s; + if (_stat (p.string ().c_str (), &s) != 0) + throw_generic_error (errno); + + entry_type r; + if (S_ISREG (s.st_mode)) + r = entry_type::regular; + else if (S_ISDIR (s.st_mode)) + r = entry_type::directory; + else + r = entry_type::other; + + return r; + } + + // dir_iterator + // + struct auto_dir + { + explicit + auto_dir (intptr_t& h): h_ (&h) {} + + auto_dir (const auto_dir&) = delete; + auto_dir& operator= (const auto_dir&) = delete; + + ~auto_dir () + { + if (h_ != nullptr && *h_ != -1) + _findclose (*h_); + } + + void release () {h_ = nullptr;} + + private: + intptr_t* h_; + }; + + dir_iterator:: + dir_iterator (const dir_path& d) + { + auto_dir h (h_); + e_.b_ = d; // Used by next() to call _findfirst(). + + next (); + h.release (); + } + + void dir_iterator:: + next () + { + for (;;) + { + bool r; + _finddata_t fi; + + if (h_ == -1) + { + // The call is made from the constructor. Any other call with h_ == -1 + // is illegal. + // + + // Check to distinguish non-existent vs empty directories. + // + if (!dir_exists (e_.b_)) + throw_generic_error (ENOENT); + + h_ = _findfirst ((e_.b_ / path ("*")).string ().c_str (), &fi); + r = h_ != -1; + } + else + r = _findnext (h_, &fi) == 0; + + if (r) + { + // We can accept some overhead for '.' and '..' (relying on short + // string optimization) in favor of a more compact code. + // + path p (fi.name); + + // Skip '.' and '..'. + // + if (p.current () || p.parent ()) + continue; + + e_.p_ = move (p); + + // We do not support symlinks at the moment. + // + e_.t_ = fi.attrib & _A_SUBDIR + ? entry_type::directory + : entry_type::regular; + + e_.lt_ = entry_type::unknown; + } + else if (errno == ENOENT) + { + // End of stream. + // + if (h_ != -1) + { + _findclose (h_); + h_ = -1; + } + } + else + throw_generic_error (errno); + + break; + } + } +#endif + + // Match the name [ni, ne) to the pattern [pi, pe). Ranges can be empty. + // + static bool + match (string::const_iterator pi, string::const_iterator pe, + string::const_iterator ni, string::const_iterator ne) + { + using reverse_iterator = std::reverse_iterator; + + reverse_iterator rpi (pe); + reverse_iterator rpe (pi); + + reverse_iterator rni (ne); + reverse_iterator rne (ni); + + // Match the pattern suffix (follows the last *) to the name trailing + // characters. + // + char pc; + for (; rpi != rpe && (pc = *rpi) != '*' && rni != rne; ++rpi, ++rni) + { +#ifndef _WIN32 + if (*rni != pc && pc != '?') +#else + if (lcase (*rni) != lcase (pc) && pc != '?') +#endif + return false; + } + + // If we got to the (reversed) end of the pattern (no * is encountered) + // than we are done. The success depends on if we got to the (reversed) end + // of the name as well. + // + if (rpi == rpe) + return rni == rne; + + // If we didn't reach * in the pattern then we reached the (reversed) end + // of the name. That means we have unmatched non-star characters in the + // pattern, and so match failed. + // + if (pc != '*') + { + assert (rni == rne); + return false; + } + + // Match the pattern prefix (ends with the first *) to the name leading + // characters. If they mismatch we failed. Otherwise if this is an only * + // in the pattern (matches whatever is left in the name) then we succeed, + // otherwise we perform backtracking (recursively). + // + pe = rpi.base (); + ne = rni.base (); + + // Compare the pattern and the name char by char until the name suffix or + // * is encountered in the pattern (whichever happens first). Fail if a + // char mismatches. + // + for (; (pc = *pi) != '*' && ni != ne; ++pi, ++ni) + { +#ifndef _WIN32 + if (*ni != pc && pc != '?') +#else + if (lcase (*ni) != lcase (pc) && pc != '?') +#endif + return false; + } + + // If we didn't get to * in the pattern then we got to the name suffix. + // That means that the pattern has unmatched non-star characters, and so + // match failed. + // + if (pc != '*') + { + assert (ni == ne); + return false; + } + + // If * that we have reached is the last one, then it matches whatever is + // left in the name (including an empty range). + // + if (++pi == pe) + return true; + + // Perform backtracking. + // + // From now on, we will call the pattern not-yet-matched part (starting + // the leftmost * and ending the rightmost one inclusively) as pattern, and + // the name not-yet-matched part as name. + // + // Here we sequentially assume that * that starts the pattern matches the + // name leading part (staring from an empty one and iterating till the full + // name). So if, at some iteration, the pattern trailing part (that follows + // the leftmost *) matches the name trailing part, then the pattern matches + // the name. + // + bool r; + for (; !(r = match (pi, pe, ni, ne)) && ni != ne; ++ni) ; + return r; + } + + bool + path_match (const string& pattern, const string& name) + { + // Implementation notes: + // + // - This has a good potential of becoming hairy quickly so need to strive + // for an elegant way to implement this. + // + // - Most patterns will contains a single * wildcard with a prefix and/or + // suffix (e.g., *.txt, foo*, f*.txt). Something like this is not very + // common: *foo*. + // + // So it would be nice to have a clever implementation that first + // "anchors" itself with a literal prefix and/or suffix and only then + // continue with backtracking. In other words, reduce: + // + // *.txt vs foo.txt -> * vs foo + // foo* vs foo.txt -> * vs .txt + // f*.txt vs foo.txt -> * vs oo + // + + auto pi (pattern.rbegin ()); + auto pe (pattern.rend ()); + + auto ni (name.rbegin ()); + auto ne (name.rend ()); + + // The name doesn't match the pattern if it is of a different type than the + // pattern is. + // + bool pd (pi != pe && path::traits::is_separator (*pi)); + bool nd (ni != ne && path::traits::is_separator (*ni)); + + if (pd != nd) + return false; + + // Skip trailing separators if present. + // + if (pd) + { + ++pi; + ++ni; + } + + return match (pattern.begin (), pi.base (), name.begin (), ni.base ()); + } + + // Iterate over directory sub-entries, recursively and including itself if + // requested. Note that recursive iterating goes depth-first which make + // sense for the cleanup use cases (@@ maybe this should be controllable + // since for directory creation it won't make sense). + // + // Prior to recursively opening a directory for iterating the preopen + // callback function is called. If false is returned, then the directory is + // not traversed but still returned by the next() call. + // + // Note that iterating over non-existent directory is not en error. The + // subsequent next() call returns false for such a directory. + // + using preopen = std::function; + + class recursive_dir_iterator + { + public: + recursive_dir_iterator (dir_path p, + bool recursive, + bool self, + bool fs, + preopen po) + : start_ (move (p)), + recursive_ (recursive), + self_ (self), + follow_symlinks_ (fs), + preopen_ (move (po)) + { + open (dir_path (), self_); + } + + // Non-copyable, non-movable type. + // + recursive_dir_iterator (const recursive_dir_iterator&) = delete; + recursive_dir_iterator& operator= (const recursive_dir_iterator&) = delete; + + // Return false if no more entries left. Otherwise save the next entry path + // and return true. The path is relative against the directory being + // traversed and contains a trailing separator for sub-directories. Throw + // std::system_error in case of a failure (insufficient permissions, + // dangling symlink encountered, etc). + // + bool + next (path& p) + { + if (iters_.empty ()) + return false; + + auto& i (iters_.back ()); + + // If we got to the end of directory sub-entries, then go one level up + // and return this directory path. + // + if (i.first == dir_iterator ()) + { + path d (move (i.second)); + iters_.pop_back (); + + // Return the path unless it is the last one (the directory we started + // to iterate from) and the self flag is not set. + // + if (iters_.empty () && !self_) + return false; + + p = move (d); + return true; + } + + const dir_entry& de (*i.first); + + // Append separator if a directory. Note that dir_entry::type() can + // throw. + // + entry_type et (follow_symlinks_ ? de.type () : de.ltype ()); + path pe (et == entry_type::directory + ? path_cast (i.second / de.path ()) + : i.second / de.path ()); + + ++i.first; + + if (recursive_ && pe.to_directory ()) + { + open (path_cast (move (pe)), true); + return next (p); + } + + p = move (pe); + return true; + } + + private: + void + open (dir_path p, bool preopen) + { + // We should consider a racing condition here. The directory can be + // removed before we create an iterator for it. In this case we just do + // nothing, so the directory is silently skipped. + // + try + { + // If preopen_() returns false, then the directory will not be + // traversed (as we leave iterator with end semantics) but still be + // returned by the next() call as a sub-entry. + // + dir_iterator i; + if (!preopen || preopen_ (p)) + { + dir_path d (start_ / p); + i = dir_iterator (!d.empty () ? d : dir_path (".")); + } + + iters_.emplace_back (move (i), move (p)); + } + catch (const system_error& e) + { + // Ignore non-existent directory (ENOENT or ENOTDIR). Rethrow for any + // other error. We consider ENOTDIR as a variety of removal, with a + // new filesystem entry being created afterwards. + // + // Make sure that the error denotes errno portable code. + // + assert (e.code ().category () == generic_category ()); + + int ec (e.code ().value ()); + if (ec != ENOENT && ec != ENOTDIR) + throw; + } + } + + private: + dir_path start_; + bool recursive_; + bool self_; + bool follow_symlinks_; + preopen preopen_; + small_vector, 1> iters_; + }; + + // Search for paths matching the pattern and call the specified function for + // each matching path. Return false if the underlying func() call returns + // false. Otherwise the function conforms to the path_search() description. + // + static const string any_dir ("*/"); + + static bool + search ( + path pattern, + dir_path pattern_dir, + const dir_path start_dir, + bool follow_symlinks, + const function& func) + { + // Fast-forward the leftmost pattern non-wildcard components. So, for + // example, search for foo/f* in /bar/ becomes search for f* in /bar/foo/. + // + { + auto b (pattern.begin ()); + auto e (pattern.end ()); + auto i (b); + for (; i != e && (*i).find_first_of ("*?") == string::npos; ++i) ; + + // If the pattern has no wildcards then we reduce to checking for the + // filesystem entry existence. It matches if exists and is of the proper + // type. + // + if (i == e) + { + path p (pattern_dir / pattern); + auto pe (path_entry (start_dir / p, true)); + + if (pe.first && + ((pe.second == entry_type::directory) == p.to_directory ())) + return func (move (p), string (), false); + + return true; + } + else if (i != b) // There are non-wildcard components, so fast-forward. + { + path p (b, i); + pattern = pattern.leaf (p); + pattern_dir /= path_cast (move (p)); + } + } + + assert (!pattern.empty ()); + + // The pattern leftmost component. Will use it to match the start directory + // sub-entries. + // + path pc (pattern.begin (), ++pattern.begin ()); + string pcr (pc.representation ()); + + // Note that if the pattern has multiple components (is not a simple path), + // then the leftmost one has a trailing separator, and so will match + // sub-directories only. + // + bool simple (pattern.simple ()); + + // Note that we rely on "small function object" optimization here. + // + recursive_dir_iterator i ( + start_dir / pattern_dir, + pcr.find ("**") != string::npos, // Recursive. + pcr.find ("***") != string::npos, // Self-inclusive. + follow_symlinks, + [&pattern_dir, &func] (const dir_path& p) -> bool // Preopen. + { + return func (pattern_dir / p, any_dir, true); + }); + + // Canonicalize the pattern component collapsing consecutive stars (used to + // express that it is recursive) into a single one. + // + size_t j (0); + size_t n (pcr.size ()); + for (size_t i (0); i < n; ++i) + { + char c (pcr[i]); + if (!(c == '*' && i > 0 && pcr[i - 1] == '*')) + pcr[j++] = c; + } + + if (j != n) + pcr.resize (j); + + // Note that the callback function can be called for the same directory + // twice: first time as intermediate match from iterator's preopen() call, + // and then, if the first call succeed, from the iterating loop (possibly + // as the final match). + // + path p; + while (i.next (p)) + { + // Skip sub-entry if its name doesn't match the pattern leftmost + // component. + // + // Matching the directory we are iterating through (as for a pattern + // component containing ***) is a bit tricky. This directory is + // represented by the iterator as an empty path, and so we need to + // compute it (the leaf would actually be enough) for matching. This + // leaf can be aquired from the pattern_dir / start_dir path except the + // case when both directories are empty. This is the case when we search + // in the current directory (start_dir is empty) with a pattern that + // starts with *** wildcard (for example f***/bar). All we can do here is + // to fallback to path::current_directory() call. Note that this will be + // the only call per path_search() as the next time pattern_dir will not + // be empty. + // + const path& se (!p.empty () + ? p + : path_cast (!pattern_dir.empty () + ? pattern_dir + : !start_dir.empty () + ? start_dir + : path::current_directory ())); + + if (!path_match (pcr, se.leaf ().representation ())) + continue; + + // If the callback function returns false, then we stop the entire search + // for the final match, or do not search below the path for the + // intermediate one. + // + if (!func (pattern_dir / p, pcr, !simple)) + { + if (simple) // Final match. + return false; + else + continue; + } + + // If the pattern is not a simple one, and it's leftmost component + // matches the sub-entry, then the sub-entry is a directory (see the note + // above), and we search in it using the trailing part of the pattern. + // + if (!simple && !search (pattern.leaf (pc), + pattern_dir / path_cast (move (p)), + start_dir, + follow_symlinks, + func)) + return false; + } + + return true; + } + + void + path_search ( + const path& pattern, + const function& func, + const dir_path& start, + bool follow_symlinks) + { + search (pattern, + dir_path (), + pattern.relative () ? start : dir_path (), + follow_symlinks, + func); + } +} -- cgit v1.1