diff options
Diffstat (limited to 'bpkg/fetch.cxx')
-rw-r--r-- | bpkg/fetch.cxx | 501 |
1 files changed, 428 insertions, 73 deletions
diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx index 9db4920..5b59d42 100644 --- a/bpkg/fetch.cxx +++ b/bpkg/fetch.cxx @@ -3,9 +3,12 @@ #include <bpkg/fetch.hxx> +#include <libbutl/curl.hxx> + #include <bpkg/diagnostics.hxx> using namespace std; +using namespace butl; namespace bpkg { @@ -84,12 +87,21 @@ namespace bpkg } } - static process + // Note that there is no easy way to retrieve the HTTP status code for wget + // (there is no reliable way to redirect the status line/headers to stdout) + // and thus we always return 0. Due to the status code unavailability there + // is no need to redirect stderr and thus we ignore the stderr mode. + // + static pair<process, uint16_t> start_wget (const path& prog, const optional<size_t>& timeout, + bool progress, bool no_progress, + stderr_mode, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -98,7 +110,7 @@ namespace bpkg const string& ua (user_agent.empty () ? BPKG_USER_AGENT " wget/" + to_string (wget_major) + - "." + to_string (wget_minor) + '.' + to_string (wget_minor) : user_agent); cstrings args { @@ -106,12 +118,21 @@ namespace bpkg "-U", ua.c_str () }; + // Wget 1.16 introduced the --show-progress option which in the quiet mode + // (-q) shows a nice and tidy progress bar (if only it also showed errors, + // then it would have been perfect). + // + bool has_show_progress (wget_major > 1 || + (wget_major == 1 && wget_minor >= 16)); + // Map verbosity level. If we are running quiet or at level 1 // and the output is stdout, then run wget quiet. If at level // 1 and the output is a file, then show the progress bar. At // level 2 and 3 run it at the default level (so we will print // the command line and it will display the progress, error // messages, etc). Higher than that -- run it with debug output. + // Always show the progress bar if requested explicitly, even in + // the quiet mode. // // In the wget world quiet means don't print anything, not even // error messages. There is also the -nv mode (aka "non-verbose") @@ -122,16 +143,29 @@ namespace bpkg // if (verb < (fo ? 1 : 2)) { - args.push_back ("-q"); - no_progress = false; // Already suppressed with -q. + bool quiet (true); + + if (progress) + { + // If --show-progress options is supported, then pass both + // --show-progress and -q, otherwise pass none of them and run + // verbose. + // + if (has_show_progress) + args.push_back ("--show-progress"); + else + quiet = false; + } + + if (quiet) + { + args.push_back ("-q"); + no_progress = false; // Already suppressed with -q. + } } else if (fo && verb == 1) { - // Wget 1.16 introduced the --show-progress option which in the - // quiet mode shows a nice and tidy progress bar (if only it also - // showed errors, then it would have been perfect). - // - if (wget_major > 1 || (wget_major == 1 && wget_minor >= 16)) + if (has_show_progress) { args.push_back ("-q"); @@ -199,12 +233,19 @@ namespace bpkg // just the file name (rather than the whole path) in the progress // report. Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data (), - 0, 1, 2, - out.directory ().string ().c_str (), - env.vars) - : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars); + process pr (fo + ? process (pp, args.data (), + 0, 1, 2, + out.directory ().string ().c_str (), + env.vars) + : process (pp, args.data (), + 0, -1, 2, + nullptr /* cwd */, env.vars)); + + if (!fo && out_is != nullptr) + out_is->open (move (pr.in_ofd), out_ism); + + return make_pair (move (pr), 0); } // curl @@ -252,12 +293,23 @@ namespace bpkg return false; } - static process + // If HTTP status code needs to be retrieved (out_is != NULL), then open the + // passed stream and read out the status line(s) extracting the status code + // and the headers. Otherwise, return 0 indicating that the status code is + // not available. In the former case if the output file is also specified, + // then read out and save the file if the status code is 200 and drop the + // HTTP response body otherwise. + // + static pair<process, uint16_t> start_curl (const path& prog, const optional<size_t>& timeout, + bool progress, bool no_progress, + stderr_mode err_mode, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -270,7 +322,6 @@ namespace bpkg cstrings args { prog.string ().c_str (), - "-f", // Fail on HTTP errors (e.g., 404). "-L", // Follow redirects. "-A", ua.c_str () }; @@ -286,27 +337,36 @@ namespace bpkg // 1 and the output is a file, then show the progress bar. At // level 2 and 3 run it at the default level (so we will print // the command line and it will display its elaborate progress). - // Higher than that -- run it verbose. + // Higher than that -- run it verbose. Always show the progress + // bar if requested explicitly, even in the quiet mode. // - if (verb < (fo ? 1 : 2)) - { - suppress_progress (); - no_progress = false; // Already suppressed. - } - else if (fo && verb == 1) + bool quiet (err_mode == stderr_mode::redirect_quiet); + + if (!quiet) { - if (!no_progress) - args.push_back ("--progress-bar"); + if (verb < (fo ? 1 : 2)) + { + if (!progress) + { + suppress_progress (); + no_progress = false; // Already suppressed. + } + } + else if (fo && verb == 1) + { + if (!no_progress) + args.push_back ("--progress-bar"); + } + else if (verb > 3) + args.push_back ("-v"); } - else if (verb > 3) - args.push_back ("-v"); // Suppress progress. // // Note: the `-v -s` options combination is valid and results in a verbose // output without progress. // - if (no_progress) + if (no_progress || quiet) suppress_progress (); // Set download timeout if requested. @@ -327,7 +387,7 @@ namespace bpkg // Output. By default curl writes to stdout. // - if (fo) + if (fo && out_is == nullptr) // Output to file and don't query HTTP status? { args.push_back ("-o"); args.push_back (out.string ().c_str ()); @@ -341,11 +401,31 @@ namespace bpkg args.push_back (http_proxy.c_str ()); } + // Status code. + // + // Add the --include|-i option if HTTP status code needs to be retrieved + // in order to include the HTTP response headers to the output. Otherwise, + // add the --fail|-f option not to print the response body and exit with + // non-zero status code on HTTP error (e.g., 404), so that the caller can + // recognize the request failure. + // + // Note that older versions of curl (e.g., 7.55.1) ignore the --include|-i + // option in the presence of the --fail|-f option on HTTP errors and don't + // print the response status line and headers. + // + if (out_is != nullptr) + args.push_back ("-i"); + else + args.push_back ("-f"); + args.push_back (url.c_str ()); args.push_back (nullptr); process_path pp (process::path_search (args[0])); + // Let's still print the command line in the quiet mode to ease the + // troubleshooting. + // if (verb >= 2) print_process (args); else if (verb == 1 && fo && !no_progress) @@ -358,9 +438,115 @@ namespace bpkg // Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data ()) - : process (pp, args.data (), 0, -1); + process pr (fo && out_is == nullptr + ? process (pp, args.data ()) + : process (pp, args.data (), + 0, -1, err_mode == stderr_mode::pass ? 2 : -1)); + + // Close the process stdout stream and read stderr stream out and dump. + // + // Needs to be called prior to failing, so that the process won't get + // blocked writing to stdout and so that stderr get dumped before the + // error message we issue. + // + auto close_streams = [&pr, out_is, err_mode] () + { + try + { + assert (out_is != nullptr); + + out_is->close (); + + if (err_mode != stderr_mode::pass) + bpkg::dump_stderr (move (pr.in_efd)); + } + catch (const io_error&) + { + // Not much we can do here. + } + }; + + // If HTTP status code needs to be retrieved, then open the passed stream + // and read out the status line(s) and headers. + // + // Note that this implementation is inspired by the bdep's + // http_service::post() function. + // + uint16_t sc (0); + + if (out_is != nullptr) + try + { + ifdstream& is (*out_is); + is.open (move (pr.in_ofd), out_ism); + sc = curl::read_http_status (*out_is).code; + } + catch (const invalid_argument& e) + { + close_streams (); + + fail << "unable to read HTTP response status line for " << url << ": " + << e; + } + catch (const io_error&) + { + close_streams (); + + fail << "unable to read HTTP response status line for " << url; + } + + // If the output file is specified and the HTTP status code needs to also + // be retrieved, then read out and save the file if the status code is 200 + // and drop the HTTP response body otherwise. + // + bool io_read; // If true then io_error relates to a read operation. + if (fo && out_is != nullptr) + try + { + ifdstream& is (*out_is); + + // Read and save the file if the HTTP status code is 200. + // + if (sc == 200) + { + io_read = false; + ofdstream os (out, fdopen_mode::binary); + + bufstreambuf* buf (dynamic_cast<bufstreambuf*> (is.rdbuf ())); + assert (buf != nullptr); + + for (io_read = true; + is.peek () != istream::traits_type::eof (); // Potentially reads. + io_read = true) + { + size_t n (buf->egptr () - buf->gptr ()); + + io_read = false; + os.write (buf->gptr (), n); + + buf->gbump (static_cast<int> (n)); + } + + io_read = false; + os.close (); + } + + // Close the stream, skipping the remaining content, if present. + // + io_read = true; + is.close (); + } + catch (const io_error& e) + { + close_streams (); + + if (io_read) + fail << "unable to read fetched " << url << ": " << e; + else + fail << "unable to write to " << out << ": " << e; + } + + return make_pair (move (pr), sc); } // fetch @@ -410,12 +596,24 @@ namespace bpkg return false; } - static process + // Note that there is no easy way to retrieve the HTTP status code for the + // fetch program and thus we always return 0. + // + // Also note that in the redirect* stderr modes we nevertheless redirect + // stderr to prevent the fetch program from interactively querying the user + // for the credentials. Thus, we also respect the redirect_quiet mode in + // contrast to start_wget(). + // + static pair<process, uint16_t> start_fetch (const path& prog, const optional<size_t>& timeout, + bool progress, bool no_progress, + stderr_mode err_mode, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -437,7 +635,8 @@ namespace bpkg // Map verbosity level. If we are running quiet then run fetch quiet. // If we are at level 1 and we are fetching into a file or we are at // level 2 or 3, then run it at the default level (so it will display - // the progress). Higher than that -- run it verbose. + // the progress). Higher than that -- run it verbose. Always show the + // progress bar if requested explicitly, even in the quiet mode. // // Note that the only way to suppress progress for the fetch program is to // run it quiet (-q). However, it prints nothing but the progress by @@ -446,20 +645,28 @@ namespace bpkg // unless the verbosity level is greater than three, in which case we will // run verbose (and with progress). That's the best we can do. // - if (verb < (fo ? 1 : 2)) - { - args.push_back ("-q"); - no_progress = false; // Already suppressed with -q. - } - else if (verb > 3) + bool quiet (err_mode == stderr_mode::redirect_quiet); + + if (!quiet) { - args.push_back ("-v"); - no_progress = false; // Don't be quiet in the verbose mode (see above). + if (verb < (fo ? 1 : 2)) + { + if (!progress) + { + args.push_back ("-q"); + no_progress = false; // Already suppressed with -q. + } + } + else if (verb > 3) + { + args.push_back ("-v"); + no_progress = false; // Don't be quiet in the verbose mode (see above). + } } // Suppress progress. // - if (no_progress) + if (no_progress || quiet) args.push_back ("-q"); // Set download timeout if requested. @@ -501,6 +708,9 @@ namespace bpkg env.vars = evars; } + // Let's still print the command line in the quiet mode to ease the + // troubleshooting. + // if (verb >= 2) print_process (env, args); @@ -509,12 +719,19 @@ namespace bpkg // just the file name (rather than the whole path) in the progress // report. Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data (), - 0, 1, 2, - out.directory ().string ().c_str (), - env.vars) - : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars); + process pr (fo + ? process (pp, args.data (), + 0, 1, 2, + out.directory ().string ().c_str (), + env.vars) + : process (pp, args.data (), + 0, -1, err_mode == stderr_mode::pass ? 2 : -1, + nullptr /* cwd */, env.vars)); + + if (!fo && out_is != nullptr) + out_is->open (move (pr.in_ofd), out_ism); + + return make_pair (move (pr), 0); } // The dispatcher. @@ -522,7 +739,7 @@ namespace bpkg // Cache the result of finding/testing the fetch program. Sometimes a simple // global variable is really the right solution... // - enum class fetch_kind {wget, curl, fetch}; + enum class fetch_kind {curl, wget, fetch}; static path path_; static fetch_kind kind_; @@ -542,20 +759,20 @@ namespace bpkg const path& n (p.leaf ()); const string& s (n.string ()); - if (s.find ("wget") != string::npos) - { - if (!check_wget (p)) - fail << p << " does not appear to be the 'wget' program"; - - kind_ = fetch_kind::wget; - } - else if (s.find ("curl") != string::npos) + if (s.find ("curl") != string::npos) { if (!check_curl (p)) fail << p << " does not appear to be the 'curl' program"; kind_ = fetch_kind::curl; } + else if (s.find ("wget") != string::npos) + { + if (!check_wget (p)) + fail << p << " does not appear to be the 'wget' program"; + + kind_ = fetch_kind::wget; + } else if (s.find ("fetch") != string::npos) { if (!check_fetch (p)) @@ -566,15 +783,45 @@ namespace bpkg else fail << "unknown fetch program " << p; } + else if (o.curl_specified ()) + { + const path& p (path_ = o.curl ()); + + if (!check_curl (p)) + fail << p << " does not appear to be the 'curl' program"; + + kind_ = fetch_kind::curl; + } else { // See if any is available. The preference order is: // + // curl + // wget + // fetch +#if 1 + if (check_curl (path_ = path ("curl"))) + { + kind_ = fetch_kind::curl; + } + else if (check_wget (path_ = path ("wget"))) + { + kind_ = fetch_kind::wget; + } +#else + // Old preference order: + // // wget 1.16 or up // curl // wget // fetch // + // We used to prefer wget 1.16 because it has --show-progress which + // results in nicer progress. But experience shows that wget is quite + // unreliable plus with bdep always using curl, it would be strange + // to use both curl and wget (and expecting the user to setup proxy, + // authentication, etc., for both). + // bool wg (check_wget (path_ = path ("wget"))); if (wg && (wget_major > 1 || (wget_major == 1 && wget_minor >= 16))) @@ -590,12 +837,13 @@ namespace bpkg path_ = path ("wget"); kind_ = fetch_kind::wget; } +#endif else if (check_fetch (path_ = path ("fetch"))) { kind_ = fetch_kind::fetch; } else - fail << "unable to find 'wget', 'curl', or 'fetch'" << + fail << "unable to find 'curl', 'wget', or 'fetch'" << info << "use --fetch to specify the fetch program location"; if (verb >= 3) @@ -606,26 +854,47 @@ namespace bpkg return kind_; } - process + static pair<process, uint16_t> start_fetch (const common_options& o, const string& src, + ifdstream* out_is, + fdstream_mode out_ism, + stderr_mode err_mode, const path& out, const string& user_agent, const url& proxy) { - process (*f) (const path&, - const optional<size_t>&, - bool, - const strings&, - const string&, - const path&, - const string&, - const string&) = nullptr; - - switch (check (o)) + // Currently, for the sake of simplicity, we don't support redirecting + // stderr if we fetch into a file. + // + assert (out.empty () || err_mode == stderr_mode::pass); + + // If out_is is not NULL and out is not empty, then the former argument is + // unused by the caller and only indicates that the HTTP status code still + // needs to be retrieved while the requested file needs to be saved. In + // this case if the fetch program doesn't provide an easy way to retrieve + // the HTTP status code, then the respective start_*() function can just + // ignore the referred stream. Otherwise, it may or may not use it for + // convenience but should close it before returning if it does. + // + pair<process, uint16_t> (*f) (const path&, + const optional<size_t>&, + bool, + bool, + stderr_mode, + const strings&, + const string&, + ifdstream*, + fdstream_mode, + const path&, + const string&, + const string&) = nullptr; + + fetch_kind fk (check (o)); + switch (fk) { - case fetch_kind::wget: f = &start_wget; break; case fetch_kind::curl: f = &start_curl; break; + case fetch_kind::wget: f = &start_wget; break; case fetch_kind::fetch: f = &start_fetch; break; } @@ -698,11 +967,40 @@ namespace bpkg } } + // Note that the merge semantics here is not 100% accurate since we may + // override "later" --fetch-option with "earlier" --curl-option. + // However, this should be close enough for our use-case, which is + // bdep's --curl-option values overriding --fetch-option specified in + // the default options file. The situation that we will mis-handle is + // when both are specified on the command line, for example, + // --curl-option --max-time=2 --bpkg-option --fetch-option=--max-time=1, + // but that feel quite far fetched to complicate things here. + // + const strings& fos (o.fetch_option ()); + const strings& cos (o.curl_option ()); + + const strings& os ( + fk != fetch_kind::curl || cos.empty () + ? fos + : (fos.empty () + ? cos + : [&fos, &cos] () + { + strings r (fos.begin (), fos.end ()); + r.insert (r.end (), cos.begin (), cos.end ()); + return r; + } ())); + + return f (path_, timeout, + o.progress (), o.no_progress (), - o.fetch_option (), + err_mode, + os, !http_url.empty () ? http_url : src, + out_is, + out_ism, out, user_agent, http_proxy); @@ -717,4 +1015,61 @@ namespace bpkg throw failed (); } } + + process + start_fetch (const common_options& o, + const string& src, + const path& out, + const string& user_agent, + const url& proxy) + { + return start_fetch (o, + src, + nullptr /* out_is */, + fdstream_mode::none, + stderr_mode::pass, + out, + user_agent, + proxy).first; + } + + pair<process, uint16_t> + start_fetch_http (const common_options& o, + const string& src, + ifdstream& out, + fdstream_mode out_mode, + stderr_mode err_mode, + const string& user_agent, + const url& proxy) + { + return start_fetch (o, + src, + &out, + out_mode, + err_mode, + path () /* out */, + user_agent, + proxy); + } + + pair<process, uint16_t> + start_fetch_http (const common_options& o, + const string& src, + const path& out, + const string& user_agent, + const url& proxy) + { + assert (!out.empty ()); + + ifdstream is (ifdstream::badbit | ifdstream::failbit); + + return start_fetch (o, + src, + &is, + fdstream_mode::skip | fdstream_mode::binary, + stderr_mode::pass, + out, + user_agent, + proxy); + } } |