From 01c179eed3fcfccc7cdd262742935177dfcf5106 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 10 Jan 2023 23:33:31 +0300 Subject: Assume git repository protocol as smart if HTTP response code is 401 --- bpkg/fetch.cxx | 355 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 303 insertions(+), 52 deletions(-) (limited to 'bpkg/fetch.cxx') diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx index 44f1ba4..a7fe8c9 100644 --- a/bpkg/fetch.cxx +++ b/bpkg/fetch.cxx @@ -84,13 +84,21 @@ namespace bpkg } } - static process + // Note that there is no easy way to retrieve the HTTP status code for wget + // (there is no reliable way to redirect the status line/headers to stdout) + // and thus we always return 0. Due to the status code unavailability there + // is no need to redirect stderr and thus we ignore the quiet mode. + // + static pair start_wget (const path& prog, const optional& timeout, bool progress, bool no_progress, + bool /* quiet */, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -108,8 +116,8 @@ namespace bpkg }; // Wget 1.16 introduced the --show-progress option which in the quiet mode - // shows a nice and tidy progress bar (if only it also showed errors, then - // it would have been perfect). + // (-q) shows a nice and tidy progress bar (if only it also showed errors, + // then it would have been perfect). // bool has_show_progress (wget_major > 1 || (wget_major == 1 && wget_minor >= 16)); @@ -222,12 +230,19 @@ namespace bpkg // just the file name (rather than the whole path) in the progress // report. Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data (), - 0, 1, 2, - out.directory ().string ().c_str (), - env.vars) - : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars); + process pr (fo + ? process (pp, args.data (), + 0, 1, 2, + out.directory ().string ().c_str (), + env.vars) + : process (pp, args.data (), + 0, -1, 2, + nullptr /* cwd */, env.vars)); + + if (out_is != nullptr) + out_is->open (move (pr.in_ofd), out_ism); + + return make_pair (move (pr), 0); } // curl @@ -275,13 +290,21 @@ namespace bpkg return false; } - static process + // If HTTP status code needs to be retrieved, then open the passed stream + // and read out the status line(s) extracting the status code and the + // headers. Otherwise, return 0 indicating that the status code is not + // available. In the former case redirect stderr and respect the quiet mode. + // + static pair start_curl (const path& prog, const optional& timeout, bool progress, bool no_progress, + bool quiet, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -313,28 +336,31 @@ namespace bpkg // Higher than that -- run it verbose. Always show the progress // bar if requested explicitly, even in the quiet mode. // - if (verb < (fo ? 1 : 2)) + if (!quiet) { - if (!progress) + if (verb < (fo ? 1 : 2)) { - suppress_progress (); - no_progress = false; // Already suppressed. + if (!progress) + { + suppress_progress (); + no_progress = false; // Already suppressed. + } } + else if (fo && verb == 1) + { + if (!no_progress) + args.push_back ("--progress-bar"); + } + else if (verb > 3) + args.push_back ("-v"); } - else if (fo && verb == 1) - { - if (!no_progress) - args.push_back ("--progress-bar"); - } - else if (verb > 3) - args.push_back ("-v"); // Suppress progress. // // Note: the `-v -s` options combination is valid and results in a verbose // output without progress. // - if (no_progress) + if (no_progress || quiet) suppress_progress (); // Set download timeout if requested. @@ -369,11 +395,23 @@ namespace bpkg args.push_back (http_proxy.c_str ()); } + // Status code. + // + if (out_is != nullptr) + { + assert (!fo); // Currently unsupported (see start_fetch() for details). + + args.push_back ("-i"); + } + args.push_back (url.c_str ()); args.push_back (nullptr); process_path pp (process::path_search (args[0])); + // Let's still print the command line in the quiet mode to ease the + // troubleshooting. + // if (verb >= 2) print_process (args); else if (verb == 1 && fo && !no_progress) @@ -386,9 +424,143 @@ namespace bpkg // Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data ()) - : process (pp, args.data (), 0, -1); + process pr (fo + ? process (pp, args.data ()) + : process (pp, args.data (), + 0, -1, out_is != nullptr ? -1 : 2)); + + // Close the process stdout stream and read stderr stream out and dump. + // + // Needs to be called prior to failing, so that the process won't get + // blocked writing to stdout and so that stderr get dumped before the + // error message we issue. + // + auto close_streams = [&pr, out_is] () + { + try + { + out_is->close (); + + bpkg::dump_stderr (move (pr.in_efd)); + } + catch (const io_error&) + { + // Not much we can do here. + } + }; + + // If HTTP status code needs to be retrieved, then open the passed stream + // and read out the status line(s) and headers. + // + // Note that this implementation is inspired by the bdep's + // http_service::post() function. + // + uint16_t sc (0); + + if (out_is != nullptr) + try + { + // At this stage we will read until the empty line (containing just + // CRLF). Not being able to reach such a line is an error, which is the + // reason for the exception mask choice. When done, we will restore the + // original exception mask. + // + ifdstream::iostate es (out_is->exceptions ()); + + out_is->exceptions ( + ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit); + + out_is->open (move (pr.in_ofd), out_ism); + + // Parse and return the HTTP status code. Return 0 if the argument is + // invalid. + // + auto status_code = [] (const string& s) + { + char* e (nullptr); + unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw. + assert (e != nullptr); + + return *e == '\0' && c >= 100 && c < 600 + ? static_cast (c) + : 0; + }; + + // Read the CRLF-terminated line from the stream stripping the trailing + // CRLF. + // + auto read_line = [out_is] () + { + string l; + getline (*out_is, l); // Strips the trailing LF (0xA). + + // Note that on POSIX CRLF is not automatically translated into LF, so + // we need to strip CR (0xD) manually. + // + if (!l.empty () && l.back () == '\r') + l.pop_back (); + + return l; + }; + + auto read_status = [&read_line, &status_code, &url, &close_streams] () + -> uint16_t + { + string l (read_line ()); + + for (;;) // Breakout loop. + { + if (l.compare (0, 5, "HTTP/") != 0) + break; + + size_t p (l.find (' ', 5)); // The protocol end. + if (p == string::npos) + break; + + p = l.find_first_not_of (' ', p + 1); // The code start. + if (p == string::npos) + break; + + size_t e (l.find (' ', p + 1)); // The code end. + if (e == string::npos) + break; + + uint16_t c (status_code (string (l, p, e - p))); + if (c == 0) + break; + + return c; + } + + close_streams (); + + fail << "invalid HTTP response status line '" << l + << "' while fetching " << url; + + assert (false); // Can't be here. + return 0; + }; + + sc = read_status (); + + if (sc == 100) + { + while (!read_line ().empty ()) ; // Skips the interim response. + sc = read_status (); // Reads the final status code. + } + + while (!read_line ().empty ()) ; // Skips headers. + + out_is->exceptions (es); + } + catch (const io_error&) + { + close_streams (); + + fail << "unable to read HTTP response status line for " << url; + } + + return make_pair (move (pr), sc); } // fetch @@ -438,13 +610,24 @@ namespace bpkg return false; } - static process + // Note that there is no easy way to retrieve the HTTP status code for the + // fetch program and thus we always return 0. + // + // Also note that in the HTTP status code retrieval mode (out_is != NULL) we + // nevertheless redirect stderr to prevent the fetch program from + // interactively querying the user for the credentials. Thus, we also + // respect the quiet mode in contrast to start_wget(). + // + static pair start_fetch (const path& prog, const optional& timeout, bool progress, bool no_progress, + bool quiet, const strings& ops, const string& url, + ifdstream* out_is, + fdstream_mode out_ism, const path& out, const string& user_agent, const string& http_proxy) @@ -476,23 +659,26 @@ namespace bpkg // unless the verbosity level is greater than three, in which case we will // run verbose (and with progress). That's the best we can do. // - if (verb < (fo ? 1 : 2)) + if (!quiet) { - if (!progress) + if (verb < (fo ? 1 : 2)) { - args.push_back ("-q"); - no_progress = false; // Already suppressed with -q. + if (!progress) + { + args.push_back ("-q"); + no_progress = false; // Already suppressed with -q. + } + } + else if (verb > 3) + { + args.push_back ("-v"); + no_progress = false; // Don't be quiet in the verbose mode (see above). } - } - else if (verb > 3) - { - args.push_back ("-v"); - no_progress = false; // Don't be quiet in the verbose mode (see above). } // Suppress progress. // - if (no_progress) + if (no_progress || quiet) args.push_back ("-q"); // Set download timeout if requested. @@ -534,6 +720,9 @@ namespace bpkg env.vars = evars; } + // Let's still print the command line in the quiet mode to ease the + // troubleshooting. + // if (verb >= 2) print_process (env, args); @@ -542,12 +731,19 @@ namespace bpkg // just the file name (rather than the whole path) in the progress // report. Process exceptions must be handled by the caller. // - return fo - ? process (pp, args.data (), - 0, 1, 2, - out.directory ().string ().c_str (), - env.vars) - : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars); + process pr (fo + ? process (pp, args.data (), + 0, 1, 2, + out.directory ().string ().c_str (), + env.vars) + : process (pp, args.data (), + 0, -1, out_is != nullptr ? -1 : 2, + nullptr /* cwd */, env.vars)); + + if (out_is != nullptr) + out_is->open (move (pr.in_ofd), out_ism); + + return make_pair (move (pr), 0); } // The dispatcher. @@ -670,22 +866,38 @@ namespace bpkg return kind_; } - process + static pair start_fetch (const common_options& o, const string& src, + ifdstream* out_is, + fdstream_mode out_ism, + bool quiet, const path& out, const string& user_agent, const url& proxy) { - process (*f) (const path&, - const optional&, - bool, - bool, - const strings&, - const string&, - const path&, - const string&, - const string&) = nullptr; + // Currently, for the sake of simplicity, we don't support retrieving the + // HTTP status code if we fetch into a file. + // + assert (out.empty () || out_is == nullptr); + + // Quiet mode is only meaningful if HTTP status code needs to be + // retrieved. + // + assert (!quiet || out_is != nullptr); + + pair (*f) (const path&, + const optional&, + bool, + bool, + bool, + const strings&, + const string&, + ifdstream*, + fdstream_mode, + const path&, + const string&, + const string&) = nullptr; fetch_kind fk (check (o)); switch (fk) @@ -793,8 +1005,11 @@ namespace bpkg timeout, o.progress (), o.no_progress (), + quiet, os, !http_url.empty () ? http_url : src, + out_is, + out_ism, out, user_agent, http_proxy); @@ -809,4 +1024,40 @@ namespace bpkg throw failed (); } } + + process + start_fetch (const common_options& o, + const string& src, + const path& out, + const string& user_agent, + const url& proxy) + { + return start_fetch (o, + src, + nullptr /* out_is */, + fdstream_mode::none, + false /* quiet */, + out, + user_agent, + proxy).first; + } + + pair + start_fetch_http (const common_options& o, + const string& src, + ifdstream& out, + fdstream_mode out_mode, + bool quiet, + const string& user_agent, + const url& proxy) + { + return start_fetch (o, + src, + &out, + out_mode, + quiet, + path () /* out */, + user_agent, + proxy); + } } -- cgit v1.1