From e930d5c9cb4176c6055bde2b4ff196f4b5f92f69 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Sun, 21 Aug 2016 12:36:35 +0200 Subject: Redo process path search to better accommodate Windows-specific semantics --- butl/process | 101 ++++++++++++++++- butl/process.cxx | 335 ++++++++++++++++++++++++++++++++++++++----------------- butl/process.ixx | 57 +++++++++- 3 files changed, 383 insertions(+), 110 deletions(-) (limited to 'butl') diff --git a/butl/process b/butl/process index ed9f798..75f3c66 100644 --- a/butl/process +++ b/butl/process @@ -13,6 +13,7 @@ #include // uint32_t #include +#include #include namespace butl @@ -28,14 +29,68 @@ namespace butl process_error (int e, bool child) : system_error (e, std::system_category ()), child_ (child) {} #else - process_error (const std::string& d) - : system_error (ECHILD, std::system_category (), d), child_ (false) {} + process_error (int e) + : system_error (e, std::system_category ()), child_ (false) {} + + process_error (const std::string& d, int e = ECHILD) + : system_error (e, std::system_category (), d), child_ (false) {} #endif private: bool child_; }; + // A process executable has three paths: initial, recall, and effective. + // Initial is the original "command" that you specify in argv[0] and on + // POSIX that's what ends up in the child's argv[0]. But not on Windows. On + // Windows the command is first searched for in the parent executable's + // directory and if found then that's what should end up in child's argv[0]. + // So this is the recall path. It is called recall because this is what the + // caller of the parent process will be able to execute if you printed the + // command line. Finally, effective is the actual path to the executable + // that will include the directory part if found in PATH, the .exe extension + // if one is missing, etc. + // + // As an example, let's say we run foo\foo.exe that itself spawns bar which + // is found as foo\bar.exe. The paths will then be: + // + // initial: bar + // recall: foo\bar + // effective: foo\bar.exe + // + // In most cases, at least on POSIX, all three paths will be the same. As an + // optimization, if the recall path is empty, then it means it is the same + // as initial. Similarly, if the effective path is empty then, it is the + // same as recall (and if that is empty, as initial). + // + // Note that the call to path_search() below adjust args[0] to point to the + // recall path which brings up lifetime issues. To address this this class + // also implements an RAII-based auto-restore of args[0] to its initial + // value. + // + class process_path + { + public: + const char* initial = nullptr; + path recall; + path effect; + + // Moveable-only type. + // + process_path (process_path&&); + process_path& operator= (process_path&&); + + process_path (const process_path&) = delete; + process_path& operator= (const process_path&) = delete; + + process_path () = default; + process_path (const char* i, const char** a0): initial (i), args0_ (a0) {} + ~process_path () {if (args0_ != nullptr) *args0_ = initial;} + + private: + const char** args0_ = nullptr; + }; + class LIBBUTL_EXPORT process { public: @@ -70,7 +125,13 @@ namespace butl // exceptions (e.g., if exec() failed) can be thrown in the child // version of us. // - process (char const* const args[], int in = 0, int out = 1, int err = 2); + // Note that the versions without the the process_path argument may + // temporarily change args[0] (see path_search() for details). + // + process (const char* args[], int in = 0, int out = 1, int err = 2); + + process (const process_path&, const char* args[], + int in = 0, int out = 1, int err = 2); // The "piping" constructor, for example: // @@ -80,14 +141,26 @@ namespace butl // rhs.wait (); // Wait for last first. // lhs.wait (); // - process (char const* const args[], process& in, int out = 1, int err = 2); + process (const char* args[], process& in, int out = 1, int err = 2); + + process (const process_path&, const char* args[], + process& in, int out = 1, int err = 2); // Versions of the above constructors that allow us to change the // current working directory of the child process. NULL and empty // cwd arguments are ignored. // - process (const char* cwd, char const* const[], int = 0, int = 1, int = 2); - process (const char* cwd, char const* const[], process&, int = 1, int = 2); + process (const char* cwd, const char* [], int = 0, int = 1, int = 2); + + process (const char* cwd, + const process_path&, const char* [], + int = 0, int = 1, int = 2); + + process (const char* cwd, const char* [], process&, int = 1, int = 2); + + process (const char* cwd, + const process_path&, const char* [], + process&, int = 1, int = 2); // Wait for the process to terminate. Return true if the process // terminated normally and with the zero exit status. Unless ignore_error @@ -122,6 +195,22 @@ namespace butl // process (); + // Resolve process' paths based on the initial path in args0. If recall + // differs from initial, adjust args0 to point to the recall path. If + // resolution fails, throw process_error. Normally, you will use this + // function like this: + // + // const char* args[] = {"foo", ..., nullptr}; + // + // process_path pp (process::path_search (args[0])) + // + // ... // E.g., print args[0]. + // + // process p (pp, args); + // + process_path + path_search (const char*& args0); + public: #ifndef _WIN32 using handle_type = pid_t; diff --git a/butl/process.cxx b/butl/process.cxx index 80b97cc..cceceed 100644 --- a/butl/process.cxx +++ b/butl/process.cxx @@ -7,6 +7,8 @@ #ifndef _WIN32 # include // execvp, fork, dup2, pipe, chdir, *_FILENO, getpid # include // waitpid +# include // _stat +# include // _stat(), S_IS* #else # include @@ -16,13 +18,23 @@ # include // stat # include // stat(), S_IS* +# ifdef _MSC_VER // Unlikely to be fixed in newer versions. +# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) + +# define STDIN_FILENO 0 +# define STDOUT_FILENO 1 +# define STDERR_FILENO 2 +# endif // _MSC_VER + # include // unique_ptr +# include // __argv[] -# include # include #endif #include +#include // size_t +#include // strlen(), strchr() #include // casecmp() #include // fdnull(), fdclose() @@ -33,15 +45,6 @@ using namespace std; using namespace butl::win32; #endif -#ifdef _MSC_VER // Unlikely to be fixed in newer versions. -#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) - -# define STDIN_FILENO 0 -# define STDOUT_FILENO 1 -# define STDERR_FILENO 2 -#endif // _MSC_VER - namespace butl { class auto_fd @@ -89,8 +92,96 @@ namespace butl #ifndef _WIN32 + process_path process:: + path_search (const char*& args0) + { + // Note that there is a similar version for Win32. + + typedef path::traits traits; + + const char* f (args0); + size_t fn (strlen (f)); + + path rp, ep; // recall & effective + auto search = [&ep, f, fn] (const char* d, size_t dn) -> bool + { + string s (move (ep).string ()); // Reuse buffer. + + if (dn != 0) + { + s.assign (d, dn); + + if (!traits::is_separator (s.back ())) + s += traits::directory_separator; + } + + s.append (f, fn); + ep = path (move (s)); // Move back into result. + + // Check that the file exists and has at least one executable bit set. + // This way we get a bit closer to the "continue search on EACCES" + // semantics (see below). + // + struct stat si; + return (stat (ep.string ().c_str (), &si) == 0 && + S_ISREG (si.st_mode) && + (si.st_mode & (S_IEXEC | S_IXGRP | S_IXOTH)) != 0); + }; + + for (;;) // The "goto end" loop. + { + // If there is a directory component in the file, then search does not + // apply. + // + if (traits::find_separator (f, fn) != nullptr) + break; + + // The search order is documented in exec(3). Some of the differences + // compared to exec*p() functions: + // + // 1. If there no PATH, we don't default to current directory/_CS_PATH. + // 2. We do not continue searching on EACCES from execve(). + // 3. We do not execute via default shell on ENOEXEC from execve(). + // + { + const char* b (getenv ("PATH")); + + for (const char* e; b != nullptr; b = (e != nullptr ? e + 1 : e)) + { + e = strchr (b, traits::path_separator); + + // Empty path (i.e., a double colon or a colon at the beginning or + // end of PATH) means search in the current dirrectory. + // + if (search (b, e != nullptr ? e - b : strlen (b))) + break; + } + + if (b != nullptr) + break; + } + + // Did not find anything. + // + throw process_error (ENOENT, false); + } + + // Found the file and the result is in rp and ep, both of which can be + // empty. + // + process_path r (f, + rp.empty () ? nullptr : &(args0 = rp.string ().c_str ())); + + r.recall = move (rp); + r.effect = move (ep); + + return r; + } + process:: - process (const char* cwd, char const* const args[], int in, int out, int err) + process (const char* cwd, + const process_path& pp, const char* args[], + int in, int out, int err) { using pipe = auto_fd[2]; @@ -176,7 +267,11 @@ namespace butl if (cwd != nullptr && *cwd != '\0' && chdir (cwd) != 0) fail (true); - if (execvp (args[0], const_cast (&args[0])) == -1) + const char* file (pp.effect.empty () + ? args[0] + : pp.effect.string ().c_str ()); + + if (execv (file, const_cast (&args[0])) == -1) fail (true); } @@ -188,9 +283,10 @@ namespace butl } process:: - process (const char* cwd, char const* const args[], + process (const char* cwd, + const process_path& pp, const char* args[], process& in, int out, int err) - : process (cwd, args, in.in_ofd, out, err) + : process (cwd, pp, args, in.in_ofd, out, err) { assert (in.in_ofd != -1); // Should be a pipe. close (in.in_ofd); // Close it on our side. @@ -250,102 +346,155 @@ namespace butl #else // _WIN32 - // Why do we search for the program ourselves when CreateProcess() can be - // made to do that for us? Well, that's a bit of a historic mystery. We - // could use it to disable search in the current working directory. Or we - // could handle batch files automatically. - // - static path - path_search (const path& f) + process_path process:: + path_search (const char*& args0) { + // Note that there is a similar version for Win32. + typedef path::traits traits; - // If there is a directory component in the file, then the PATH search - // does not apply. + const char* f (args0); + size_t fn (strlen (f)); + + // Unless there is already the .exe extension, then we will need to add + // it. Note that running .bat files requires starting cmd.exe and passing + // the batch file as an argument (see CreateProcess() for deails). So + // if/when we decide to support those, it will have to be handled + // differently. // - if (!f.simple ()) - return f; + bool ext; + { + const char* e (traits::find_extension (f, fn)); + ext = (e == nullptr || casecmp (e, ".exe") != 0); + } - path r; - auto search = [&r, &f] (const char* d, size_t n) -> bool + path rp, ep; // recall & effective + auto search = [&ep, f, fn, ext] (const char* d, size_t dn) -> bool { - string s (move (r).string ()); // Reuse buffer. + string s (move (ep).string ()); // Reuse buffer. - if (n != 0) + if (dn != 0) { - s.assign (d, n); + s.assign (d, dn); if (!traits::is_separator (s.back ())) s += traits::directory_separator; } - s += f.string (); - r = path (move (s)); // Move back into result. + s.append (f, fn); + ep = path (move (s)); // Move back into result. - // Unless there is already the .exe extension, add it. Note that running - // .bat files requires starting cmd.exe and passing the batch file as an - // argument (see CreateProcess() for deails). So if/when we decide to - // support those, it will have to be handled differently. + // Add the .exe extension if necessary. // - const char* e (r.extension ()); - if (e == nullptr || casecmp (e, "exe") != 0) - r += ".exe"; + if (ext) + ep += ".exe"; // Only check that the file exists since the executable mode is set // according to the file extension. // - struct stat si; - return stat (r.string ().c_str (), &si) == 0 && S_ISREG (si.st_mode); + struct _stat si; + return _stat (ep.string ().c_str (), &si) == 0 && S_ISREG (si.st_mode); }; - // The search order is documented in CreateProcess(). First we look in - // the directory of the parent executable. - // + for (;;) // The "goto end" loop. { - char d[_MAX_PATH + 1]; - DWORD n (GetModuleFileName (NULL, d, _MAX_PATH + 1)); + // If there is a directory component in the file, then search does not + // apply. But we may still need to append the extension. + // + if (traits::find_separator (f, fn) != nullptr) + { + if (ext) + { + ep = path (f, fn); + ep += ".exe"; + } - if (n == 0 || n == _MAX_PATH + 1) // Failed or truncated. - throw process_error (last_error_msg ()); + break; + } - const char* p (traits::rfind_separator (d, n)); - assert (p != nullptr); + // The search order is documented in CreateProcess(). First we look in + // the directory of the parent executable. + // + { + char d[_MAX_PATH + 1]; + DWORD n (GetModuleFileName (NULL, d, _MAX_PATH + 1)); - if (search (d, p - d + 1)) // Include trailing slash. - return r; - } + if (n == 0 || n == _MAX_PATH + 1) // Failed or truncated. + throw process_error (last_error_msg ()); - // Next look in the current working directory. Crazy, I know. - // - if (search ("", 0)) - return r; + const char* p (traits::rfind_separator (d, n)); + assert (p != nullptr); - // Finally, search in PATH. - // - if (const char* s = getenv ("PATH")) - { - string ps (s); + if (search (d, p - d + 1)) // Include trailing slash. + { + // In this case we have to set the recall path. + // + // Note that the directory we have extracted is always absolute but + // the parent's recall path (argv[0]) might be relative. It seems, + // ideally, we would want to use parent's argv[0] dir (if any) to + // form the recall path. In particular, if the parent has no + // directory, then it means it was found via the standard search + // (e.g., PATH) and then so should the child. + // + // How do we get the parent's argv[0]? Luckily, here is __argv on + // Windows. + // + const char* d (__argv[0]); + size_t n (strlen (d)); + if (const char* p = traits::rfind_separator (d, n)) + { + string s (d, p - d + 1); // Include trailing slash. + s.append (f, fn); + rp = path (move (s)); + } + + break; + } + } - for (size_t b (0), e (ps.find (traits::path_separator)); - b != string::npos;) + // Next look in the current working directory. Crazy, I know. + // + // The recall path is the same as initial, though it might not be a bad + // idea to prepend .\ for clarity. + // + if (search ("", 0)) + break; + + // Finally, search in PATH. Recall is unchanged. + // { - // Empty path (i.e., a double colon or a colon at the beginning or end - // of PATH) means search in the current dirrectory. - // - if (search (ps.c_str () + b, (e != string::npos ? e : ps.size ()) - b)) - return r; + const char* b (getenv ("PATH")); - if (e == string::npos) - b = e; - else + for (const char* e; b != nullptr; b = (e != nullptr ? e + 1 : e)) { - b = e + 1; - e = ps.find (traits::path_separator, b); + e = strchr (b, traits::path_separator); + + // Empty path (i.e., a double colon or a colon at the beginning or + // end of PATH) means search in the current dirrectory. + // + if (search (b, e != nullptr ? e - b : strlen (b))) + break; } + + if (b != nullptr) + break; } + + // Did not find anything. + // + throw process_error (ENOENT); } - return path (); + // Found the file and the result is in rp and ep, both of which can be + // empty. + // + process_path r (f, + rp.empty () ? nullptr : &(args0 = rp.string ().c_str ())); + + r.recall = move (rp); + r.effect = move (ep); + + return r; } class auto_handle @@ -392,7 +541,9 @@ namespace butl }; process:: - process (const char* cwd, char const* const args[], int in, int out, int err) + process (const char* cwd, + const process_path& pp, const char* args[], + int in, int out, int err) { using pipe = auto_handle[2]; @@ -481,32 +632,15 @@ namespace butl // Create the process. // - path file (args[0]); - - // Do PATH search. - // - if (file.simple ()) - { - file = path_search (file); - - if (file.empty ()) - fail ("file not found"); - } - else - { - // Unless there is already the .exe extension, add it. See path_search() - // for details. - // - const char* e (file.extension ()); - if (e == nullptr || casecmp (e, "exe") != 0) - file += ".exe"; - } + const char* file (pp.effect.empty () + ? args[0] + : pp.effect.string ().c_str ()); // Serialize the arguments to string. // string cmd_line; - for (char const* const* p (args); *p != 0; ++p) + for (const char* const* p (args); *p != 0; ++p) { if (p != args) cmd_line += ' '; @@ -575,7 +709,7 @@ namespace butl fail ("invalid file descriptor"); if (!CreateProcess ( - file.string ().c_str (), + file, const_cast (cmd_line.c_str ()), 0, // Process security attributes. 0, // Primary thread security attributes. @@ -621,9 +755,10 @@ namespace butl } process:: - process (const char* cwd, char const* const args[], + process (const char* cwd, + const process_path& pp, const char* args[], process& in, int out, int err) - : process (cwd, args, in.in_ofd, out, err) + : process (cwd, pp, args, in.in_ofd, out, err) { assert (in.in_ofd != -1); // Should be a pipe. _close (in.in_ofd); // Close it on our side. diff --git a/butl/process.ixx b/butl/process.ixx index 3b7ed7d..59ac60c 100644 --- a/butl/process.ixx +++ b/butl/process.ixx @@ -2,8 +2,39 @@ // copyright : Copyright (c) 2014-2016 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file +#include // move() + namespace butl { + inline process_path:: + process_path (process_path&& p) + : initial (p.initial), + recall (std::move (p.recall)), + effect (std::move (p.effect)), + args0_ (p.args0_) + { + p.args0_ = nullptr; + } + + inline process_path& process_path:: + operator= (process_path&& p) + { + if (this != &p) + { + if (args0_ != nullptr) + *args0_ = initial; + + initial = p.initial; + recall = std::move (p.recall); + effect = std::move (p.effect); + args0_ = p.args0_; + + p.args0_ = nullptr; + } + + return *this; + } + inline process:: process () : handle (0), @@ -15,12 +46,30 @@ namespace butl } inline process:: - process (char const* const args[], int in, int out, int err) - : process (nullptr, args, in, out, err) {} + process (const char* args[], int in, int out, int err) + : process (nullptr, path_search (args[0]), args, in, out, err) {} + + inline process:: + process (const process_path& pp, const char* args[], + int in, int out, int err) + : process (nullptr, pp, args, in, out, err) {} + + inline process:: + process (const char* args[], process& in, int out, int err) + : process (nullptr, path_search (args[0]), args, in, out, err) {} + + inline process:: + process (const process_path& pp, const char* args[], + process& in, int out, int err) + : process (nullptr, pp, args, in, out, err) {} + + inline process:: + process (const char* cwd, const char* args[], int in, int out, int err) + : process (cwd, path_search (args[0]), args, in, out, err) {} inline process:: - process (char const* const args[], process& in, int out, int err) - : process (nullptr, args, in, out, err) {} + process (const char* cwd, const char* args[], process& in, int out, int err) + : process (cwd, path_search (args[0]), args, in, out, err) {} inline process:: process (process&& p) -- cgit v1.1