From 4476d9d02ac7af21a7bc6ef92490491d308237f4 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 29 Apr 2020 22:20:21 +0300 Subject: Add --pkg-proxy common option --- bpkg/archive.cxx | 3 - bpkg/archive.hxx | 2 - bpkg/auth.cxx | 1 - bpkg/checksum.cxx | 4 -- bpkg/common.cli | 22 +++++++- bpkg/diagnostics.cxx | 29 ++++++++-- bpkg/diagnostics.hxx | 22 ++++++++ bpkg/fetch-git.cxx | 1 - bpkg/fetch-pkg.cxx | 12 +++- bpkg/fetch.cxx | 138 ++++++++++++++++++++++++++++++++++++++++------ bpkg/fetch.hxx | 15 +++-- bpkg/manifest-utility.cxx | 1 - bpkg/pkg-build.cxx | 3 +- bpkg/pkg-unpack.cxx | 3 - bpkg/pkg-verify.cxx | 1 - bpkg/satisfaction.cxx | 3 - bpkg/types-parsers.cxx | 22 ++++++++ bpkg/types-parsers.hxx | 10 ++++ bpkg/types.hxx | 7 +++ bpkg/utility.cxx | 1 - bpkg/utility.hxx | 5 ++ 21 files changed, 250 insertions(+), 55 deletions(-) diff --git a/bpkg/archive.cxx b/bpkg/archive.cxx index 1471029..c096701 100644 --- a/bpkg/archive.cxx +++ b/bpkg/archive.cxx @@ -3,13 +3,10 @@ #include -#include - #include #include using namespace std; -using namespace butl; namespace bpkg { diff --git a/bpkg/archive.hxx b/bpkg/archive.hxx index f46812f..cfc4cdb 100644 --- a/bpkg/archive.hxx +++ b/bpkg/archive.hxx @@ -4,8 +4,6 @@ #ifndef BPKG_ARCHIVE_HXX #define BPKG_ARCHIVE_HXX -#include - #include #include diff --git a/bpkg/auth.cxx b/bpkg/auth.cxx index e29fdb9..e661ad0 100644 --- a/bpkg/auth.cxx +++ b/bpkg/auth.cxx @@ -9,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/bpkg/checksum.cxx b/bpkg/checksum.cxx index 228e0c5..efc86dd 100644 --- a/bpkg/checksum.cxx +++ b/bpkg/checksum.cxx @@ -7,13 +7,9 @@ # include // replace() #endif -#include -#include - #include using namespace std; -using namespace butl; namespace bpkg { diff --git a/bpkg/common.cli b/bpkg/common.cli index c94f250..4537961 100644 --- a/bpkg/common.cli +++ b/bpkg/common.cli @@ -181,12 +181,30 @@ namespace bpkg programs." } + butl::url --pkg-proxy + { + "", + "HTTP proxy server to use when fetching package manifests and archives + from remote \cb{pkg} repositories. If specified, the proxy must + be in the \c{\b{http://}\i{host}[\b{:}\i{port}]} form. If \ci{port} is + omitted, 80 is used by default. + + Note that to allow caching, the proxied \cb{https://} URLs are + converted to \cb{http://} in order to prevent the fetch program from + tunneling (which is the standard approach for proxying HTTPS). If both + HTTP and HTTPS repositories are used, it is assumed that the proxy + server can figure out which URLs need to be converted back to + \cb{https://} based on the request information (for example, host + name). For security, this mechanism should only be used with signed + repositories or when the proxy is located inside a trusted network." + } + path --git = "git" { "", "The git program to be used to fetch git repositories. You can also - specify additional options that should be passed to the git program with - \cb{--git-option}. + specify additional options that should be passed to the git program + with \cb{--git-option}. If the git program is not explicitly specified, then \cb{bpkg} will use \cb{git} by default." diff --git a/bpkg/diagnostics.cxx b/bpkg/diagnostics.cxx index 5b77e65..a471d07 100644 --- a/bpkg/diagnostics.cxx +++ b/bpkg/diagnostics.cxx @@ -7,8 +7,8 @@ #include -#include -#include // operator<<(ostream, process_arg) +#include // process_args +#include // operator<<(ostream, process_*) #include @@ -22,14 +22,31 @@ namespace bpkg void print_process (const char* const args[], size_t n) { - diag_record r (text); - print_process (r, args, n); + diag_record dr (text); + print_process (dr, args, n); } void - print_process (diag_record& r, const char* const args[], size_t n) + print_process (diag_record& dr, const char* const args[], size_t n) { - r << process_args {args, n}; + dr << process_args {args, n}; + } + + void + print_process (const process_env& pe, const char* const args[], size_t n) + { + diag_record dr (text); + print_process (dr, pe, args, n); + } + + void + print_process (diag_record& dr, + const process_env& pe, const char* const args[], size_t n) + { + if (pe.env ()) + dr << pe << ' '; + + dr << process_args {args, n}; } // Diagnostics verbosity level. diff --git a/bpkg/diagnostics.hxx b/bpkg/diagnostics.hxx index ac19a1b..d11ab0b 100644 --- a/bpkg/diagnostics.hxx +++ b/bpkg/diagnostics.hxx @@ -49,6 +49,28 @@ namespace bpkg print_process (args.data (), args.size ()); } + // As above but with process_env. + // + void + print_process (diag_record&, + const process_env&, const char* const args[], size_t n = 0); + + void + print_process (const process_env&, const char* const args[], size_t n = 0); + + inline void + print_process (diag_record& dr, + const process_env& pe, const cstrings& args) + { + print_process (dr, pe, args.data (), args.size ()); + } + + inline void + print_process (const process_env& pe, const cstrings& args) + { + print_process (pe, args.data (), args.size ()); + } + // Verbosity level. Update documentation for --verbose if changing. // // 0 - disabled diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx index 274108f..df59dde 100644 --- a/bpkg/fetch-git.cxx +++ b/bpkg/fetch-git.cxx @@ -8,7 +8,6 @@ #include #include // digit(), xdigit() -#include #include // path_entry #include #include diff --git a/bpkg/fetch-pkg.cxx b/bpkg/fetch-pkg.cxx index 67ee3a9..81d4131 100644 --- a/bpkg/fetch-pkg.cxx +++ b/bpkg/fetch-pkg.cxx @@ -24,7 +24,11 @@ namespace bpkg bool ignore_unknown) { string url (u.string ()); - process pr (start_fetch (o, url)); + process pr (start_fetch (o, + url, + path () /* out */, + string () /* user_agent */, + o.pkg_proxy ())); try { @@ -94,7 +98,11 @@ namespace bpkg fail << "file " << df << " already exists"; auto_rmfile arm (df); - process pr (start_fetch (o, u.string (), df)); + process pr (start_fetch (o, + u.string (), + df, + string () /* user_agent */, + o.pkg_proxy ())); if (!pr.wait ()) { diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx index 63a459d..f565f26 100644 --- a/bpkg/fetch.cxx +++ b/bpkg/fetch.cxx @@ -3,12 +3,9 @@ #include -#include - #include using namespace std; -using namespace butl; namespace bpkg { @@ -94,7 +91,8 @@ namespace bpkg const strings& ops, const string& url, const path& out, - const string& user_agent) + const string& user_agent, + const string& http_proxy) { bool fo (!out.empty ()); // Output to file. @@ -178,10 +176,23 @@ namespace bpkg args.push_back (url.c_str ()); args.push_back (nullptr); - process_path pp (process::path_search (args[0])); + process_path pp (process::path_search (args[0])); + process_env env (pp); + + // HTTP proxy. + // + string evar; + const char* evars[] = {nullptr, nullptr}; + + if (!http_proxy.empty ()) + { + evar = "http_proxy=" + http_proxy; + evars[0] = evar.c_str (); + env.vars = evars; + } if (verb >= 2) - print_process (args); + print_process (env, args); // If we are fetching into a file, change the wget's directory to // that of the output file. We do it this way so that we end up with @@ -191,8 +202,9 @@ namespace bpkg return fo ? process (pp, args.data (), 0, 1, 2, - out.directory ().string ().c_str ()) - : process (pp, args.data (), 0, -1); + out.directory ().string ().c_str (), + env.vars) + : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars); } // curl @@ -247,7 +259,8 @@ namespace bpkg const strings& ops, const string& url, const path& out, - const string& user_agent) + const string& user_agent, + const string& http_proxy) { bool fo (!out.empty ()); // Output to file. @@ -320,6 +333,14 @@ namespace bpkg args.push_back (out.string ().c_str ()); } + // HTTP proxy. + // + if (!http_proxy.empty ()) + { + args.push_back ("--proxy"); + args.push_back (http_proxy.c_str ()); + } + args.push_back (url.c_str ()); args.push_back (nullptr); @@ -396,7 +417,8 @@ namespace bpkg const strings& ops, const string& url, const path& out, - const string& user_agent) + const string& user_agent, + const string& http_proxy) { bool fo (!out.empty ()); // Output to file. @@ -464,10 +486,23 @@ namespace bpkg args.push_back (url.c_str ()); args.push_back (nullptr); - process_path pp (process::path_search (args[0])); + process_path pp (process::path_search (args[0])); + process_env env (pp); + + // HTTP proxy. + // + string evar; + const char* evars[] = {nullptr, nullptr}; + + if (!http_proxy.empty ()) + { + evar = "HTTP_PROXY=" + http_proxy; + evars[0] = evar.c_str (); + env.vars = evars; + } if (verb >= 2) - print_process (args); + print_process (env, args); // If we are fetching into a file, change the fetch's directory to // that of the output file. We do it this way so that we end up with @@ -477,8 +512,9 @@ namespace bpkg return fo ? process (pp, args.data (), 0, 1, 2, - out.directory ().string ().c_str ()) - : process (pp, args.data (), 0, -1); + out.directory ().string ().c_str (), + env.vars) + : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars); } // The dispatcher. @@ -572,9 +608,10 @@ namespace bpkg process start_fetch (const common_options& o, - const string& url, + const string& src, const path& out, - const string& user_agent) + const string& user_agent, + const url& proxy) { process (*f) (const path&, const optional&, @@ -582,6 +619,7 @@ namespace bpkg const strings&, const string&, const path&, + const string&, const string&) = nullptr; switch (check (o)) @@ -595,15 +633,79 @@ namespace bpkg if (o.fetch_timeout_specified ()) timeout = o.fetch_timeout (); + // If the HTTP proxy is specified and the URL is HTTP(S), then fetch + // through the proxy, converting the https URL scheme to http. + // try { + string http_url; + string http_proxy; + + if (!proxy.empty ()) + { + auto bad_proxy = [&src, &proxy] (const char* d) + { + fail << "unable to fetch '" << src << "' using '" << proxy + << "' as proxy: " << d; + }; + + if (icasecmp (proxy.scheme, "http") != 0) + bad_proxy ("only HTTP proxy is supported"); + + if (!proxy.authority || proxy.authority->host.empty ()) + bad_proxy ("invalid host name in proxy URL"); + + if (!proxy.authority->user.empty ()) + bad_proxy ("unexpected user in proxy URL"); + + if (proxy.path) + bad_proxy ("unexpected path in proxy URL"); + + if (proxy.query) + bad_proxy ("unexpected query in proxy URL"); + + if (proxy.fragment) + bad_proxy ("unexpected fragment in proxy URL"); + + if (proxy.rootless) + bad_proxy ("proxy URL cannot be rootless"); + + url u; + try + { + u = url (src); + } + catch (const invalid_argument& e) + { + fail << "unable to fetch '" << src << "': invalid URL: " << e; + } + + bool http (icasecmp (u.scheme, "http") == 0); + bool https (icasecmp (u.scheme, "https") == 0); + + if (http || https) + { + http_proxy = proxy.string (); + + if (proxy.authority->port == 0) + http_proxy += ":80"; + + if (https) + { + u.scheme = "http"; + http_url = u.string (); + } + } + } + return f (fetch_path, timeout, o.no_progress (), o.fetch_option (), - url, + !http_url.empty () ? http_url : src, out, - user_agent); + user_agent, + http_proxy); } catch (const process_error& e) { diff --git a/bpkg/fetch.hxx b/bpkg/fetch.hxx index ed1fd50..d57dcf3 100644 --- a/bpkg/fetch.hxx +++ b/bpkg/fetch.hxx @@ -6,8 +6,6 @@ #include // time_t -#include - #include #include @@ -20,6 +18,9 @@ namespace bpkg // Repository type pkg (fetch-pkg.cxx). // + // If HTTP proxy is specified via the --pkg-proxy option, then use it for + // fetching manifests and archives from the remote pkg repository. + // pkg_repository_manifests pkg_fetch_repositories (const dir_path&, bool ignore_unknown); @@ -120,13 +121,17 @@ namespace bpkg // Start the process of fetching the specified URL. If out is empty, then // fetch to stdout. In this case also don't show any progress unless we are // running verbose. If user_agent is empty, then send the default (fetch - // program specific) User-Agent header value. + // program specific) User-Agent header value. If the HTTP proxy URL is not + // empty and the URL to fetch is HTTP(S), then fetch it via the specified + // proxy server converting the https URL scheme to http (see the --pkg-proxy + // option for details). // - butl::process + process start_fetch (const common_options& o, const string& url, const path& out = {}, - const string& user_agent = {}); + const string& user_agent = {}, + const butl::url& proxy = {}); } #endif // BPKG_FETCH_HXX diff --git a/bpkg/manifest-utility.cxx b/bpkg/manifest-utility.cxx index 697f920..159b2c3 100644 --- a/bpkg/manifest-utility.cxx +++ b/bpkg/manifest-utility.cxx @@ -6,7 +6,6 @@ #include // strcspn() #include -#include #include #include // wildcard_version diff --git a/bpkg/pkg-build.cxx b/bpkg/pkg-build.cxx index 6b68a65..106666a 100644 --- a/bpkg/pkg-build.cxx +++ b/bpkg/pkg-build.cxx @@ -10,7 +10,6 @@ #include // cout #include // find_if() -#include #include #include @@ -2645,7 +2644,7 @@ namespace bpkg // size_t p (0); - using url_traits = butl::url::traits_type; + using url_traits = url::traits_type; // Skip leading ':' that are not part of a URL. // diff --git a/bpkg/pkg-unpack.cxx b/bpkg/pkg-unpack.cxx index 02d61b6..10f8920 100644 --- a/bpkg/pkg-unpack.cxx +++ b/bpkg/pkg-unpack.cxx @@ -7,8 +7,6 @@ # include // replace() #endif -#include - #include #include @@ -23,7 +21,6 @@ #include using namespace std; -using namespace butl; namespace bpkg { diff --git a/bpkg/pkg-verify.cxx b/bpkg/pkg-verify.cxx index 5c23b69..38b4d68 100644 --- a/bpkg/pkg-verify.cxx +++ b/bpkg/pkg-verify.cxx @@ -5,7 +5,6 @@ #include // cout -#include #include #include diff --git a/bpkg/satisfaction.cxx b/bpkg/satisfaction.cxx index c2a4601..52def32 100644 --- a/bpkg/satisfaction.cxx +++ b/bpkg/satisfaction.cxx @@ -3,13 +3,10 @@ #include -#include - #include #include using namespace std; -using namespace butl; namespace bpkg { diff --git a/bpkg/types-parsers.cxx b/bpkg/types-parsers.cxx index ce3cd0c..be95219 100644 --- a/bpkg/types-parsers.cxx +++ b/bpkg/types-parsers.cxx @@ -7,6 +7,28 @@ namespace bpkg { namespace cli { + void parser:: + parse (url& x, bool& xs, scanner& s) + { + const char* o (s.next ()); + + if (!s.more ()) + throw missing_value (o); + + const char* v (s.next ()); + + try + { + x = url (v); + } + catch (const invalid_argument& e) + { + throw invalid_value (o, v, e.what ()); + } + + xs = true; + } + template static void parse_path (T& x, scanner& s) diff --git a/bpkg/types-parsers.hxx b/bpkg/types-parsers.hxx index c812e7b..38b7cee 100644 --- a/bpkg/types-parsers.hxx +++ b/bpkg/types-parsers.hxx @@ -19,6 +19,16 @@ namespace bpkg namespace cli { template <> + struct parser + { + static void + parse (url&, bool&, scanner&); + + static void + merge (url& b, const url& a) {b = a;} + }; + + template <> struct parser { static void diff --git a/bpkg/types.hxx b/bpkg/types.hxx index d4847b8..4f0cc98 100644 --- a/bpkg/types.hxx +++ b/bpkg/types.hxx @@ -21,6 +21,7 @@ #include +#include #include #include #include // compare_reference_target @@ -94,9 +95,14 @@ namespace bpkg using paths = std::vector; using dir_paths = std::vector; + // + // + using butl::url; + // // using butl::process; + using butl::process_env; using butl::process_path; using butl::process_exit; using butl::process_error; @@ -104,6 +110,7 @@ namespace bpkg // // using butl::auto_fd; + using butl::nullfd; using butl::fdpipe; using butl::ifdstream; using butl::ofdstream; diff --git a/bpkg/utility.cxx b/bpkg/utility.cxx index 5ec8159..9c9c991 100644 --- a/bpkg/utility.cxx +++ b/bpkg/utility.cxx @@ -4,7 +4,6 @@ #include #include -#include #include #include diff --git a/bpkg/utility.hxx b/bpkg/utility.hxx index 1763af0..e891f21 100644 --- a/bpkg/utility.hxx +++ b/bpkg/utility.hxx @@ -14,6 +14,7 @@ #include #include // icasecmp(), reverse_iterate(), etc +#include #include #include @@ -47,6 +48,10 @@ namespace bpkg using butl::setenv; using butl::unsetenv; + // + // + using butl::process_start_callback; + // // using butl::auto_rmfile; -- cgit v1.1