aboutsummaryrefslogtreecommitdiff
path: root/bpkg/fetch.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'bpkg/fetch.cxx')
-rw-r--r--bpkg/fetch.cxx501
1 files changed, 428 insertions, 73 deletions
diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx
index 9db4920..5b59d42 100644
--- a/bpkg/fetch.cxx
+++ b/bpkg/fetch.cxx
@@ -3,9 +3,12 @@
#include <bpkg/fetch.hxx>
+#include <libbutl/curl.hxx>
+
#include <bpkg/diagnostics.hxx>
using namespace std;
+using namespace butl;
namespace bpkg
{
@@ -84,12 +87,21 @@ namespace bpkg
}
}
- static process
+ // Note that there is no easy way to retrieve the HTTP status code for wget
+ // (there is no reliable way to redirect the status line/headers to stdout)
+ // and thus we always return 0. Due to the status code unavailability there
+ // is no need to redirect stderr and thus we ignore the stderr mode.
+ //
+ static pair<process, uint16_t>
start_wget (const path& prog,
const optional<size_t>& timeout,
+ bool progress,
bool no_progress,
+ stderr_mode,
const strings& ops,
const string& url,
+ ifdstream* out_is,
+ fdstream_mode out_ism,
const path& out,
const string& user_agent,
const string& http_proxy)
@@ -98,7 +110,7 @@ namespace bpkg
const string& ua (user_agent.empty ()
? BPKG_USER_AGENT " wget/" + to_string (wget_major) +
- "." + to_string (wget_minor)
+ '.' + to_string (wget_minor)
: user_agent);
cstrings args {
@@ -106,12 +118,21 @@ namespace bpkg
"-U", ua.c_str ()
};
+ // Wget 1.16 introduced the --show-progress option which in the quiet mode
+ // (-q) shows a nice and tidy progress bar (if only it also showed errors,
+ // then it would have been perfect).
+ //
+ bool has_show_progress (wget_major > 1 ||
+ (wget_major == 1 && wget_minor >= 16));
+
// Map verbosity level. If we are running quiet or at level 1
// and the output is stdout, then run wget quiet. If at level
// 1 and the output is a file, then show the progress bar. At
// level 2 and 3 run it at the default level (so we will print
// the command line and it will display the progress, error
// messages, etc). Higher than that -- run it with debug output.
+ // Always show the progress bar if requested explicitly, even in
+ // the quiet mode.
//
// In the wget world quiet means don't print anything, not even
// error messages. There is also the -nv mode (aka "non-verbose")
@@ -122,16 +143,29 @@ namespace bpkg
//
if (verb < (fo ? 1 : 2))
{
- args.push_back ("-q");
- no_progress = false; // Already suppressed with -q.
+ bool quiet (true);
+
+ if (progress)
+ {
+ // If --show-progress options is supported, then pass both
+ // --show-progress and -q, otherwise pass none of them and run
+ // verbose.
+ //
+ if (has_show_progress)
+ args.push_back ("--show-progress");
+ else
+ quiet = false;
+ }
+
+ if (quiet)
+ {
+ args.push_back ("-q");
+ no_progress = false; // Already suppressed with -q.
+ }
}
else if (fo && verb == 1)
{
- // Wget 1.16 introduced the --show-progress option which in the
- // quiet mode shows a nice and tidy progress bar (if only it also
- // showed errors, then it would have been perfect).
- //
- if (wget_major > 1 || (wget_major == 1 && wget_minor >= 16))
+ if (has_show_progress)
{
args.push_back ("-q");
@@ -199,12 +233,19 @@ namespace bpkg
// just the file name (rather than the whole path) in the progress
// report. Process exceptions must be handled by the caller.
//
- return fo
- ? process (pp, args.data (),
- 0, 1, 2,
- out.directory ().string ().c_str (),
- env.vars)
- : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars);
+ process pr (fo
+ ? process (pp, args.data (),
+ 0, 1, 2,
+ out.directory ().string ().c_str (),
+ env.vars)
+ : process (pp, args.data (),
+ 0, -1, 2,
+ nullptr /* cwd */, env.vars));
+
+ if (!fo && out_is != nullptr)
+ out_is->open (move (pr.in_ofd), out_ism);
+
+ return make_pair (move (pr), 0);
}
// curl
@@ -252,12 +293,23 @@ namespace bpkg
return false;
}
- static process
+ // If HTTP status code needs to be retrieved (out_is != NULL), then open the
+ // passed stream and read out the status line(s) extracting the status code
+ // and the headers. Otherwise, return 0 indicating that the status code is
+ // not available. In the former case if the output file is also specified,
+ // then read out and save the file if the status code is 200 and drop the
+ // HTTP response body otherwise.
+ //
+ static pair<process, uint16_t>
start_curl (const path& prog,
const optional<size_t>& timeout,
+ bool progress,
bool no_progress,
+ stderr_mode err_mode,
const strings& ops,
const string& url,
+ ifdstream* out_is,
+ fdstream_mode out_ism,
const path& out,
const string& user_agent,
const string& http_proxy)
@@ -270,7 +322,6 @@ namespace bpkg
cstrings args {
prog.string ().c_str (),
- "-f", // Fail on HTTP errors (e.g., 404).
"-L", // Follow redirects.
"-A", ua.c_str ()
};
@@ -286,27 +337,36 @@ namespace bpkg
// 1 and the output is a file, then show the progress bar. At
// level 2 and 3 run it at the default level (so we will print
// the command line and it will display its elaborate progress).
- // Higher than that -- run it verbose.
+ // Higher than that -- run it verbose. Always show the progress
+ // bar if requested explicitly, even in the quiet mode.
//
- if (verb < (fo ? 1 : 2))
- {
- suppress_progress ();
- no_progress = false; // Already suppressed.
- }
- else if (fo && verb == 1)
+ bool quiet (err_mode == stderr_mode::redirect_quiet);
+
+ if (!quiet)
{
- if (!no_progress)
- args.push_back ("--progress-bar");
+ if (verb < (fo ? 1 : 2))
+ {
+ if (!progress)
+ {
+ suppress_progress ();
+ no_progress = false; // Already suppressed.
+ }
+ }
+ else if (fo && verb == 1)
+ {
+ if (!no_progress)
+ args.push_back ("--progress-bar");
+ }
+ else if (verb > 3)
+ args.push_back ("-v");
}
- else if (verb > 3)
- args.push_back ("-v");
// Suppress progress.
//
// Note: the `-v -s` options combination is valid and results in a verbose
// output without progress.
//
- if (no_progress)
+ if (no_progress || quiet)
suppress_progress ();
// Set download timeout if requested.
@@ -327,7 +387,7 @@ namespace bpkg
// Output. By default curl writes to stdout.
//
- if (fo)
+ if (fo && out_is == nullptr) // Output to file and don't query HTTP status?
{
args.push_back ("-o");
args.push_back (out.string ().c_str ());
@@ -341,11 +401,31 @@ namespace bpkg
args.push_back (http_proxy.c_str ());
}
+ // Status code.
+ //
+ // Add the --include|-i option if HTTP status code needs to be retrieved
+ // in order to include the HTTP response headers to the output. Otherwise,
+ // add the --fail|-f option not to print the response body and exit with
+ // non-zero status code on HTTP error (e.g., 404), so that the caller can
+ // recognize the request failure.
+ //
+ // Note that older versions of curl (e.g., 7.55.1) ignore the --include|-i
+ // option in the presence of the --fail|-f option on HTTP errors and don't
+ // print the response status line and headers.
+ //
+ if (out_is != nullptr)
+ args.push_back ("-i");
+ else
+ args.push_back ("-f");
+
args.push_back (url.c_str ());
args.push_back (nullptr);
process_path pp (process::path_search (args[0]));
+ // Let's still print the command line in the quiet mode to ease the
+ // troubleshooting.
+ //
if (verb >= 2)
print_process (args);
else if (verb == 1 && fo && !no_progress)
@@ -358,9 +438,115 @@ namespace bpkg
// Process exceptions must be handled by the caller.
//
- return fo
- ? process (pp, args.data ())
- : process (pp, args.data (), 0, -1);
+ process pr (fo && out_is == nullptr
+ ? process (pp, args.data ())
+ : process (pp, args.data (),
+ 0, -1, err_mode == stderr_mode::pass ? 2 : -1));
+
+ // Close the process stdout stream and read stderr stream out and dump.
+ //
+ // Needs to be called prior to failing, so that the process won't get
+ // blocked writing to stdout and so that stderr get dumped before the
+ // error message we issue.
+ //
+ auto close_streams = [&pr, out_is, err_mode] ()
+ {
+ try
+ {
+ assert (out_is != nullptr);
+
+ out_is->close ();
+
+ if (err_mode != stderr_mode::pass)
+ bpkg::dump_stderr (move (pr.in_efd));
+ }
+ catch (const io_error&)
+ {
+ // Not much we can do here.
+ }
+ };
+
+ // If HTTP status code needs to be retrieved, then open the passed stream
+ // and read out the status line(s) and headers.
+ //
+ // Note that this implementation is inspired by the bdep's
+ // http_service::post() function.
+ //
+ uint16_t sc (0);
+
+ if (out_is != nullptr)
+ try
+ {
+ ifdstream& is (*out_is);
+ is.open (move (pr.in_ofd), out_ism);
+ sc = curl::read_http_status (*out_is).code;
+ }
+ catch (const invalid_argument& e)
+ {
+ close_streams ();
+
+ fail << "unable to read HTTP response status line for " << url << ": "
+ << e;
+ }
+ catch (const io_error&)
+ {
+ close_streams ();
+
+ fail << "unable to read HTTP response status line for " << url;
+ }
+
+ // If the output file is specified and the HTTP status code needs to also
+ // be retrieved, then read out and save the file if the status code is 200
+ // and drop the HTTP response body otherwise.
+ //
+ bool io_read; // If true then io_error relates to a read operation.
+ if (fo && out_is != nullptr)
+ try
+ {
+ ifdstream& is (*out_is);
+
+ // Read and save the file if the HTTP status code is 200.
+ //
+ if (sc == 200)
+ {
+ io_read = false;
+ ofdstream os (out, fdopen_mode::binary);
+
+ bufstreambuf* buf (dynamic_cast<bufstreambuf*> (is.rdbuf ()));
+ assert (buf != nullptr);
+
+ for (io_read = true;
+ is.peek () != istream::traits_type::eof (); // Potentially reads.
+ io_read = true)
+ {
+ size_t n (buf->egptr () - buf->gptr ());
+
+ io_read = false;
+ os.write (buf->gptr (), n);
+
+ buf->gbump (static_cast<int> (n));
+ }
+
+ io_read = false;
+ os.close ();
+ }
+
+ // Close the stream, skipping the remaining content, if present.
+ //
+ io_read = true;
+ is.close ();
+ }
+ catch (const io_error& e)
+ {
+ close_streams ();
+
+ if (io_read)
+ fail << "unable to read fetched " << url << ": " << e;
+ else
+ fail << "unable to write to " << out << ": " << e;
+ }
+
+ return make_pair (move (pr), sc);
}
// fetch
@@ -410,12 +596,24 @@ namespace bpkg
return false;
}
- static process
+ // Note that there is no easy way to retrieve the HTTP status code for the
+ // fetch program and thus we always return 0.
+ //
+ // Also note that in the redirect* stderr modes we nevertheless redirect
+ // stderr to prevent the fetch program from interactively querying the user
+ // for the credentials. Thus, we also respect the redirect_quiet mode in
+ // contrast to start_wget().
+ //
+ static pair<process, uint16_t>
start_fetch (const path& prog,
const optional<size_t>& timeout,
+ bool progress,
bool no_progress,
+ stderr_mode err_mode,
const strings& ops,
const string& url,
+ ifdstream* out_is,
+ fdstream_mode out_ism,
const path& out,
const string& user_agent,
const string& http_proxy)
@@ -437,7 +635,8 @@ namespace bpkg
// Map verbosity level. If we are running quiet then run fetch quiet.
// If we are at level 1 and we are fetching into a file or we are at
// level 2 or 3, then run it at the default level (so it will display
- // the progress). Higher than that -- run it verbose.
+ // the progress). Higher than that -- run it verbose. Always show the
+ // progress bar if requested explicitly, even in the quiet mode.
//
// Note that the only way to suppress progress for the fetch program is to
// run it quiet (-q). However, it prints nothing but the progress by
@@ -446,20 +645,28 @@ namespace bpkg
// unless the verbosity level is greater than three, in which case we will
// run verbose (and with progress). That's the best we can do.
//
- if (verb < (fo ? 1 : 2))
- {
- args.push_back ("-q");
- no_progress = false; // Already suppressed with -q.
- }
- else if (verb > 3)
+ bool quiet (err_mode == stderr_mode::redirect_quiet);
+
+ if (!quiet)
{
- args.push_back ("-v");
- no_progress = false; // Don't be quiet in the verbose mode (see above).
+ if (verb < (fo ? 1 : 2))
+ {
+ if (!progress)
+ {
+ args.push_back ("-q");
+ no_progress = false; // Already suppressed with -q.
+ }
+ }
+ else if (verb > 3)
+ {
+ args.push_back ("-v");
+ no_progress = false; // Don't be quiet in the verbose mode (see above).
+ }
}
// Suppress progress.
//
- if (no_progress)
+ if (no_progress || quiet)
args.push_back ("-q");
// Set download timeout if requested.
@@ -501,6 +708,9 @@ namespace bpkg
env.vars = evars;
}
+ // Let's still print the command line in the quiet mode to ease the
+ // troubleshooting.
+ //
if (verb >= 2)
print_process (env, args);
@@ -509,12 +719,19 @@ namespace bpkg
// just the file name (rather than the whole path) in the progress
// report. Process exceptions must be handled by the caller.
//
- return fo
- ? process (pp, args.data (),
- 0, 1, 2,
- out.directory ().string ().c_str (),
- env.vars)
- : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars);
+ process pr (fo
+ ? process (pp, args.data (),
+ 0, 1, 2,
+ out.directory ().string ().c_str (),
+ env.vars)
+ : process (pp, args.data (),
+ 0, -1, err_mode == stderr_mode::pass ? 2 : -1,
+ nullptr /* cwd */, env.vars));
+
+ if (!fo && out_is != nullptr)
+ out_is->open (move (pr.in_ofd), out_ism);
+
+ return make_pair (move (pr), 0);
}
// The dispatcher.
@@ -522,7 +739,7 @@ namespace bpkg
// Cache the result of finding/testing the fetch program. Sometimes a simple
// global variable is really the right solution...
//
- enum class fetch_kind {wget, curl, fetch};
+ enum class fetch_kind {curl, wget, fetch};
static path path_;
static fetch_kind kind_;
@@ -542,20 +759,20 @@ namespace bpkg
const path& n (p.leaf ());
const string& s (n.string ());
- if (s.find ("wget") != string::npos)
- {
- if (!check_wget (p))
- fail << p << " does not appear to be the 'wget' program";
-
- kind_ = fetch_kind::wget;
- }
- else if (s.find ("curl") != string::npos)
+ if (s.find ("curl") != string::npos)
{
if (!check_curl (p))
fail << p << " does not appear to be the 'curl' program";
kind_ = fetch_kind::curl;
}
+ else if (s.find ("wget") != string::npos)
+ {
+ if (!check_wget (p))
+ fail << p << " does not appear to be the 'wget' program";
+
+ kind_ = fetch_kind::wget;
+ }
else if (s.find ("fetch") != string::npos)
{
if (!check_fetch (p))
@@ -566,15 +783,45 @@ namespace bpkg
else
fail << "unknown fetch program " << p;
}
+ else if (o.curl_specified ())
+ {
+ const path& p (path_ = o.curl ());
+
+ if (!check_curl (p))
+ fail << p << " does not appear to be the 'curl' program";
+
+ kind_ = fetch_kind::curl;
+ }
else
{
// See if any is available. The preference order is:
//
+ // curl
+ // wget
+ // fetch
+#if 1
+ if (check_curl (path_ = path ("curl")))
+ {
+ kind_ = fetch_kind::curl;
+ }
+ else if (check_wget (path_ = path ("wget")))
+ {
+ kind_ = fetch_kind::wget;
+ }
+#else
+ // Old preference order:
+ //
// wget 1.16 or up
// curl
// wget
// fetch
//
+ // We used to prefer wget 1.16 because it has --show-progress which
+ // results in nicer progress. But experience shows that wget is quite
+ // unreliable plus with bdep always using curl, it would be strange
+ // to use both curl and wget (and expecting the user to setup proxy,
+ // authentication, etc., for both).
+ //
bool wg (check_wget (path_ = path ("wget")));
if (wg && (wget_major > 1 || (wget_major == 1 && wget_minor >= 16)))
@@ -590,12 +837,13 @@ namespace bpkg
path_ = path ("wget");
kind_ = fetch_kind::wget;
}
+#endif
else if (check_fetch (path_ = path ("fetch")))
{
kind_ = fetch_kind::fetch;
}
else
- fail << "unable to find 'wget', 'curl', or 'fetch'" <<
+ fail << "unable to find 'curl', 'wget', or 'fetch'" <<
info << "use --fetch to specify the fetch program location";
if (verb >= 3)
@@ -606,26 +854,47 @@ namespace bpkg
return kind_;
}
- process
+ static pair<process, uint16_t>
start_fetch (const common_options& o,
const string& src,
+ ifdstream* out_is,
+ fdstream_mode out_ism,
+ stderr_mode err_mode,
const path& out,
const string& user_agent,
const url& proxy)
{
- process (*f) (const path&,
- const optional<size_t>&,
- bool,
- const strings&,
- const string&,
- const path&,
- const string&,
- const string&) = nullptr;
-
- switch (check (o))
+ // Currently, for the sake of simplicity, we don't support redirecting
+ // stderr if we fetch into a file.
+ //
+ assert (out.empty () || err_mode == stderr_mode::pass);
+
+ // If out_is is not NULL and out is not empty, then the former argument is
+ // unused by the caller and only indicates that the HTTP status code still
+ // needs to be retrieved while the requested file needs to be saved. In
+ // this case if the fetch program doesn't provide an easy way to retrieve
+ // the HTTP status code, then the respective start_*() function can just
+ // ignore the referred stream. Otherwise, it may or may not use it for
+ // convenience but should close it before returning if it does.
+ //
+ pair<process, uint16_t> (*f) (const path&,
+ const optional<size_t>&,
+ bool,
+ bool,
+ stderr_mode,
+ const strings&,
+ const string&,
+ ifdstream*,
+ fdstream_mode,
+ const path&,
+ const string&,
+ const string&) = nullptr;
+
+ fetch_kind fk (check (o));
+ switch (fk)
{
- case fetch_kind::wget: f = &start_wget; break;
case fetch_kind::curl: f = &start_curl; break;
+ case fetch_kind::wget: f = &start_wget; break;
case fetch_kind::fetch: f = &start_fetch; break;
}
@@ -698,11 +967,40 @@ namespace bpkg
}
}
+ // Note that the merge semantics here is not 100% accurate since we may
+ // override "later" --fetch-option with "earlier" --curl-option.
+ // However, this should be close enough for our use-case, which is
+ // bdep's --curl-option values overriding --fetch-option specified in
+ // the default options file. The situation that we will mis-handle is
+ // when both are specified on the command line, for example,
+ // --curl-option --max-time=2 --bpkg-option --fetch-option=--max-time=1,
+ // but that feel quite far fetched to complicate things here.
+ //
+ const strings& fos (o.fetch_option ());
+ const strings& cos (o.curl_option ());
+
+ const strings& os (
+ fk != fetch_kind::curl || cos.empty ()
+ ? fos
+ : (fos.empty ()
+ ? cos
+ : [&fos, &cos] ()
+ {
+ strings r (fos.begin (), fos.end ());
+ r.insert (r.end (), cos.begin (), cos.end ());
+ return r;
+ } ()));
+
+
return f (path_,
timeout,
+ o.progress (),
o.no_progress (),
- o.fetch_option (),
+ err_mode,
+ os,
!http_url.empty () ? http_url : src,
+ out_is,
+ out_ism,
out,
user_agent,
http_proxy);
@@ -717,4 +1015,61 @@ namespace bpkg
throw failed ();
}
}
+
+ process
+ start_fetch (const common_options& o,
+ const string& src,
+ const path& out,
+ const string& user_agent,
+ const url& proxy)
+ {
+ return start_fetch (o,
+ src,
+ nullptr /* out_is */,
+ fdstream_mode::none,
+ stderr_mode::pass,
+ out,
+ user_agent,
+ proxy).first;
+ }
+
+ pair<process, uint16_t>
+ start_fetch_http (const common_options& o,
+ const string& src,
+ ifdstream& out,
+ fdstream_mode out_mode,
+ stderr_mode err_mode,
+ const string& user_agent,
+ const url& proxy)
+ {
+ return start_fetch (o,
+ src,
+ &out,
+ out_mode,
+ err_mode,
+ path () /* out */,
+ user_agent,
+ proxy);
+ }
+
+ pair<process, uint16_t>
+ start_fetch_http (const common_options& o,
+ const string& src,
+ const path& out,
+ const string& user_agent,
+ const url& proxy)
+ {
+ assert (!out.empty ());
+
+ ifdstream is (ifdstream::badbit | ifdstream::failbit);
+
+ return start_fetch (o,
+ src,
+ &is,
+ fdstream_mode::skip | fdstream_mode::binary,
+ stderr_mode::pass,
+ out,
+ user_agent,
+ proxy);
+ }
}