From 4d79758a845d3cd64f0153d60abb88d3ae4c2a68 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 22 Aug 2016 12:51:24 +0200 Subject: Extend process search with pre-search support, other minor improvements --- butl/process | 43 +++++++- butl/process.cxx | 320 ++++++++++++++++++++++++++----------------------------- butl/process.ixx | 40 +++++++ 3 files changed, 235 insertions(+), 168 deletions(-) diff --git a/butl/process b/butl/process index c967c44..7966309 100644 --- a/butl/process +++ b/butl/process @@ -75,6 +75,16 @@ namespace butl path recall; path effect; + // Handle empty recall/effect. + // + const char* recall_string () const; + const char* effect_string () const; + + bool empty () const + { + return initial == nullptr && recall.empty () && effect.empty (); + } + // Moveable-only type. // process_path (process_path&&); @@ -84,10 +94,11 @@ namespace butl process_path& operator= (const process_path&) = delete; process_path () = default; - process_path (const char* i, const char** a0): initial (i), args0_ (a0) {} - ~process_path () {if (args0_ != nullptr) *args0_ = initial;} + process_path (const char* i, path&& r, path&& e); + ~process_path (); private: + friend class process; const char** args0_ = nullptr; }; @@ -215,6 +226,34 @@ namespace butl static process_path path_search (const char*& args0, const dir_path& fallback = dir_path ()); + // This version is primarily useful when you want to pre-search the + // executable before creating the args[] array. In this case you will + // use the recall path for args[0]. + // + // The init argument determines whether to initialize the initial path to + // the shallow copy of file. If it is true, then initial is the same as + // file and recall is either empty or contain a different path. If it is + // false then initial contains a shallow copy of recall, and recall is + // either a different path or a deep copy of file. Normally you don't care + // about initial once you got recall and the main reason to pass true to + // this argument is to save a copy (since initial and recall are usually + // the same). + // + static process_path + path_search (const char* file, bool init, const dir_path& = dir_path ()); + + static process_path + path_search (const std::string& f, bool i, const dir_path& fb = dir_path ()) + { + return path_search (f.c_str (), i, fb); + } + + static process_path + path_search (const path& f, bool i, const dir_path& fb = dir_path ()) + { + return path_search (f.string ().c_str (), i, fb); + } + public: #ifndef _WIN32 using handle_type = pid_t; diff --git a/butl/process.cxx b/butl/process.cxx index dcb4bc9..37e9f72 100644 --- a/butl/process.cxx +++ b/butl/process.cxx @@ -90,19 +90,37 @@ namespace butl int fd_; }; -#ifndef _WIN32 + static process_path + path_search (const char*, const dir_path&); process_path process:: - path_search (const char*& args0, const dir_path& fb) + path_search (const char* f, bool init, const dir_path& fb) + { + process_path r (butl::path_search (f, fb)); + + path& rp (r.recall); + r.initial = init + ? f + : (rp.empty () ? (rp = path (f)) : rp).string ().c_str (); + + return r; + } + +#ifndef _WIN32 + + static process_path + path_search (const char* f, const dir_path& fb) { // Note that there is a similar version for Win32. typedef path::traits traits; - const char* f (args0); size_t fn (strlen (f)); - path rp, ep; // recall & effective + process_path r (nullptr, path (), path ()); + path& rp (r.recall); + path& ep (r.effect); + auto search = [&ep, f, fn] (const char* d, size_t dn) -> bool { string s (move (ep).string ()); // Reuse buffer. @@ -128,68 +146,51 @@ namespace butl (si.st_mode & (S_IEXEC | S_IXGRP | S_IXOTH)) != 0); }; - for (;;) // The "goto end" loop. + // If there is a directory component in the file, then search does not + // apply. + // + if (traits::find_separator (f, fn) != nullptr) + return r; + + // The search order is documented in exec(3). Some of the differences + // compared to exec*p() functions: + // + // 1. If there no PATH, we don't default to current directory/_CS_PATH. + // 2. We do not continue searching on EACCES from execve(). + // 3. We do not execute via default shell on ENOEXEC from execve(). + // + for (const char* b (getenv ("PATH")), *e; + b != nullptr; + b = (e != nullptr ? e + 1 : e)) { - // If there is a directory component in the file, then search does not - // apply. - // - if (traits::find_separator (f, fn) != nullptr) - break; + e = strchr (b, traits::path_separator); - // The search order is documented in exec(3). Some of the differences - // compared to exec*p() functions: - // - // 1. If there no PATH, we don't default to current directory/_CS_PATH. - // 2. We do not continue searching on EACCES from execve(). - // 3. We do not execute via default shell on ENOEXEC from execve(). + // Empty path (i.e., a double colon or a colon at the beginning or end + // of PATH) means search in the current dirrectory. // - { - const char* b (getenv ("PATH")); - - for (const char* e; b != nullptr; b = (e != nullptr ? e + 1 : e)) - { - e = strchr (b, traits::path_separator); - - // Empty path (i.e., a double colon or a colon at the beginning or - // end of PATH) means search in the current dirrectory. - // - if (search (b, e != nullptr ? e - b : strlen (b))) - break; - } - - if (b != nullptr) - break; - } + if (search (b, e != nullptr ? e - b : strlen (b))) + return r; + } - // If we were given a fallback, try that. - // - if (!fb.empty ()) + // If we were given a fallback, try that. + // + if (!fb.empty ()) + { + if (search (fb.string ().c_str (), fb.string ().size ())) { - if (search (fb.string ().c_str (), fb.string ().size ())) - { - // In this case we have to set the recall path. - // - rp = fb; - rp /= f; - break; - } - } + // In this case we have to set the recall path. And we know from + // search() implementation that it will be the same as effective. + // Which means we can just move effective to recall. + // + rp.swap (ep); - // Did not find anything. - // - throw process_error (ENOENT, false); + return r; + } } - // Found the file and the result is in rp and ep, both of which can be - // empty. + // Did not find anything. // - process_path r (f, - rp.empty () ? nullptr : &(args0 = rp.string ().c_str ())); - - r.recall = move (rp); - r.effect = move (ep); - - return r; + throw process_error (ENOENT, false); } process:: @@ -281,11 +282,7 @@ namespace butl if (cwd != nullptr && *cwd != '\0' && chdir (cwd) != 0) fail (true); - const char* file (pp.effect.empty () - ? args[0] - : pp.effect.string ().c_str ()); - - if (execv (file, const_cast (&args[0])) == -1) + if (execv (pp.effect_string (), const_cast (&args[0])) == -1) fail (true); } @@ -360,14 +357,13 @@ namespace butl #else // _WIN32 - process_path process:: - path_search (const char*& args0, const dir_path& fb) + static process_path + path_search (const char* f, const dir_path& fb) { // Note that there is a similar version for Win32. typedef path::traits traits; - const char* f (args0); size_t fn (strlen (f)); // Unless there is already the .exe extension, then we will need to add @@ -382,7 +378,10 @@ namespace butl ext = (e == nullptr || casecmp (e, ".exe") != 0); } - path rp, ep; // recall & effective + process_path r (nullptr, path (), path ()); + path& rp (r.recall); + path& ep (r.effect); + auto search = [&ep, f, fn, ext] (const char* d, size_t dn) -> bool { string s (move (ep).string ()); // Reuse buffer. @@ -410,125 +409,117 @@ namespace butl return _stat (ep.string ().c_str (), &si) == 0 && S_ISREG (si.st_mode); }; - for (;;) // The "goto end" loop. + // If there is a directory component in the file, then search does not + // apply. But we may still need to append the extension. + // + if (traits::find_separator (f, fn) != nullptr) { - // If there is a directory component in the file, then search does not - // apply. But we may still need to append the extension. - // - if (traits::find_separator (f, fn) != nullptr) + if (ext) { - if (ext) - { - ep = path (f, fn); - ep += ".exe"; - } - - break; + ep = path (f, fn); + ep += ".exe"; } - // The search order is documented in CreateProcess(). First we look in - // the directory of the parent executable. - // - { - char d[_MAX_PATH + 1]; - DWORD n (GetModuleFileName (NULL, d, _MAX_PATH + 1)); + return r; + } - if (n == 0 || n == _MAX_PATH + 1) // Failed or truncated. - throw process_error (last_error_msg ()); + // The search order is documented in CreateProcess(). First we look in the + // directory of the parent executable. + // + { + char d[_MAX_PATH + 1]; + DWORD n (GetModuleFileName (NULL, d, _MAX_PATH + 1)); - const char* p (traits::rfind_separator (d, n)); - assert (p != nullptr); + if (n == 0 || n == _MAX_PATH + 1) // Failed or truncated. + throw process_error (last_error_msg ()); - if (search (d, p - d + 1)) // Include trailing slash. - { - // In this case we have to set the recall path. - // - // Note that the directory we have extracted is always absolute but - // the parent's recall path (argv[0]) might be relative. It seems, - // ideally, we would want to use parent's argv[0] dir (if any) to - // form the recall path. In particular, if the parent has no - // directory, then it means it was found via the standard search - // (e.g., PATH) and then so should the child. - // - // How do we get the parent's argv[0]? Luckily, here is __argv on - // Windows. - // - const char* d (__argv[0]); - size_t n (strlen (d)); - if (const char* p = traits::rfind_separator (d, n)) - { - string s (d, p - d + 1); // Include trailing slash. - s.append (f, fn); - rp = path (move (s)); - } - - break; - } - } - - // Next look in the current working directory. Crazy, I know. - // - // The recall path is the same as initial, though it might not be a bad - // idea to prepend .\ for clarity. - // - if (search ("", 0)) - break; + const char* p (traits::rfind_separator (d, n)); + assert (p != nullptr); - // Now search in PATH. Recall is unchanged. - // + if (search (d, p - d + 1)) // Include trailing slash. { - const char* b (getenv ("PATH")); - - for (const char* e; b != nullptr; b = (e != nullptr ? e + 1 : e)) + // In this case we have to set the recall path. + // + // Note that the directory we have extracted is always absolute but + // the parent's recall path (argv[0]) might be relative. It seems, + // ideally, we would want to use parent's argv[0] dir (if any) to form + // the recall path. In particular, if the parent has no directory, + // then it means it was found via the standard search (e.g., PATH) and + // then so should the child. + // + // How do we get the parent's argv[0]? Luckily, here is __argv on + // Windows. + // + const char* d (__argv[0]); + size_t n (strlen (d)); + if (const char* p = traits::rfind_separator (d, n)) { - e = strchr (b, traits::path_separator); + string s (d, p - d + 1); // Include trailing slash. + s.append (f, fn); + rp = path (move (s)); - // Empty path (i.e., a double colon or a colon at the beginning or - // end of PATH) means search in the current dirrectory. + // If recall is the same as effective, then set effective to empty. // - if (search (b, e != nullptr ? e - b : strlen (b))) - break; + if (rp == ep) + ep.clear (); } - if (b != nullptr) - break; + return r; } + } - // Finally, if we were given a fallback, try that. This case is similar - // to searching in the parent executable's directory. - // - if (!fb.empty ()) - { - // I would have been nice to preserve trailing slash (by using - // representation() instead of string()), but that would involve - // a copy. Oh, well, can't always win. - // - if (search (fb.string ().c_str (), fb.string ().size ())) - { - // In this case we have to set the recall path. At least here we - // got to keep the original slash. - // - rp = fb; - rp /= f; - break; - } - } + // Next look in the current working directory. Crazy, I know. + // + // The recall path is the same as initial, though it might not be a bad + // idea to prepend .\ for clarity. + // + if (search ("", 0)) + return r; - // Did not find anything. + // Now search in PATH. Recall is unchanged. + // + for (const char* b (getenv ("PATH")), *e; + b != nullptr; + b = (e != nullptr ? e + 1 : e)) + { + e = strchr (b, traits::path_separator); + + // Empty path (i.e., a double colon or a colon at the beginning or end + // of PATH) means search in the current dirrectory. // - throw process_error (ENOENT); + if (search (b, e != nullptr ? e - b : strlen (b))) + return r; } - // Found the file and the result is in rp and ep, both of which can be - // empty. + // Finally, if we were given a fallback, try that. This case is similar to + // searching in the parent executable's directory. // - process_path r (f, - rp.empty () ? nullptr : &(args0 = rp.string ().c_str ())); + if (!fb.empty ()) + { + // I would have been nice to preserve trailing slash (by using + // representation() instead of string()), but that would involve a + // copy. Oh, well, can't always win. + // + if (search (fb.string ().c_str (), fb.string ().size ())) + { + // In this case we have to set the recall path. At least here we got + // to keep the original slash. + // + rp = fb; + rp /= f; - r.recall = move (rp); - r.effect = move (ep); + // If recall is the same as effective, then set effective to empty. + // + if (rp == ep) + ep.clear (); - return r; + return r; + } + } + + // Did not find anything. + // + throw process_error (ENOENT); } class auto_handle @@ -666,9 +657,6 @@ namespace butl // Create the process. // - const char* file (pp.effect.empty () - ? args[0] - : pp.effect.string ().c_str ()); // Serialize the arguments to string. // @@ -743,7 +731,7 @@ namespace butl fail ("invalid file descriptor"); if (!CreateProcess ( - file, + pp.effect_string (), const_cast (cmd_line.c_str ()), 0, // Process security attributes. 0, // Primary thread security attributes. diff --git a/butl/process.ixx b/butl/process.ixx index 59ac60c..264f77c 100644 --- a/butl/process.ixx +++ b/butl/process.ixx @@ -7,6 +7,20 @@ namespace butl { inline process_path:: + ~process_path () + { + if (args0_ != nullptr) + *args0_ = initial; + } + + inline process_path:: + process_path (const char* i, path&& r, path&& e) + : initial (i), + recall (std::move (r)), + effect (std::move (e)), + args0_ (nullptr) {} + + inline process_path:: process_path (process_path&& p) : initial (p.initial), recall (std::move (p.recall)), @@ -35,6 +49,32 @@ namespace butl return *this; } + inline const char* process_path:: + recall_string () const + { + return recall.empty () ? initial : recall.string ().c_str (); + } + + inline const char* process_path:: + effect_string () const + { + return effect.empty () ? recall_string () : effect.string ().c_str (); + } + + inline process_path process:: + path_search (const char*& a0, const dir_path& fb) + { + process_path r (path_search (a0, true, fb)); + + if (!r.recall.empty ()) + { + r.args0_ = &a0; + a0 = r.recall.string ().c_str (); + } + + return r; + } + inline process:: process () : handle (0), -- cgit v1.1