aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2023-01-10 23:33:31 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2023-01-12 13:07:30 +0300
commit01c179eed3fcfccc7cdd262742935177dfcf5106 (patch)
treeb6044897e1e752d5454f6144618ef8d46d661438
parent14d987674aa01cfc41f7cda4932cb5808b3ef738 (diff)
Assume git repository protocol as smart if HTTP response code is 401
-rw-r--r--bpkg/fetch-git.cxx102
-rw-r--r--bpkg/fetch.cxx355
-rw-r--r--bpkg/fetch.hxx32
-rw-r--r--bpkg/utility.cxx19
-rw-r--r--bpkg/utility.hxx6
5 files changed, 434 insertions, 80 deletions
diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx
index 79567e6..dc1e16a 100644
--- a/bpkg/fetch-git.cxx
+++ b/bpkg/fetch-git.cxx
@@ -285,20 +285,7 @@ namespace bpkg
try
{
- ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit);
-
- // We could probably write something like this, instead:
- //
- // *diag_stream << is.rdbuf () << flush;
- //
- // However, it would never throw and we could potentially miss the
- // reading failure, unless we decide to additionally mess with the
- // diagnostics stream exception mask.
- //
- for (string l; !eof (getline (is, l)); )
- *diag_stream << l << endl;
-
- is.close ();
+ dump_stderr (move (pipe.in));
// Fall through.
}
@@ -549,7 +536,11 @@ namespace bpkg
// For HTTP(S) sense the protocol type by sending the first HTTP request of
// the fetch operation handshake and analyzing the first line of the
// response. Fail if connecting to the server failed, the response code
- // differs from 200, or reading the response body failed.
+ // differs from 200 and 401, or reading the response body failed. If the
+ // response code is 401 (requires authentication), then consider protocol as
+ // smart. The thinking here is that a git repository with support for
+ // authentication is likely one of the hosting places (like git{hub,lab})
+ // and is unlikely to be dumb.
//
// Note that, as a side-effect, this function checks the HTTP(S) server
// availability and so must be called prior to any git command that involves
@@ -574,12 +565,12 @@ namespace bpkg
static capabilities
sense_capabilities (const common_options& co,
- repository_url url,
+ const repository_url& repo_url,
const semantic_version& git_ver)
{
- assert (url.path);
+ assert (repo_url.path);
- switch (url.scheme)
+ switch (repo_url.scheme)
{
case repository_protocol::git:
case repository_protocol::ssh:
@@ -588,6 +579,9 @@ namespace bpkg
case repository_protocol::https: break; // Ask the server (see below).
}
+ // Craft the URL for sensing the capabilities.
+ //
+ repository_url url (repo_url);
path& up (*url.path);
if (!up.to_directory ())
@@ -601,19 +595,69 @@ namespace bpkg
url.query = "service=git-upload-pack";
string u (url.string ());
- process pr (start_fetch (co,
- u,
- path () /* out */,
- "git/" + git_ver.string ()));
+
+ // Start fetching, also trying to retrieve the HTTP status code.
+ //
+ // We unset failbit to properly handle an empty response (no refs) from
+ // the dumb server.
+ //
+ ifdstream is (ifdstream::badbit);
+
+ pair<process, uint16_t> ps (
+ start_fetch_http (co,
+ u,
+ is /* out */,
+ fdstream_mode::skip | fdstream_mode::binary,
+ true /* quiet */,
+ "git/" + git_ver.string ()));
+
+ process& pr (ps.first);
+
+ // If the fetch program stderr is redirected, then read it out and pass
+ // through.
+ //
+ auto dump_stderr = [&pr] ()
+ {
+ if (pr.in_efd != nullfd)
+ try
+ {
+ bpkg::dump_stderr (move (pr.in_efd));
+ }
+ catch (const io_error&)
+ {
+ // Not much we can do here.
+ }
+ };
try
{
- // We unset failbit to properly handle an empty response (no refs) from
- // the dumb server.
+ // If authentication is required (HTTP status code is 401), then
+ // consider the protocol as smart. Drop the diagnostics if that's the
+ // case and dump it otherwise.
//
- ifdstream is (move (pr.in_ofd),
- fdstream_mode::skip | fdstream_mode::binary,
- ifdstream::badbit);
+ if (ps.second == 401)
+ {
+ if (verb >= 2)
+ {
+ info << "smart git protocol assumed for repository " << repo_url
+ << " due to authentication requirement" <<
+ info << "use --git-capabilities to override or suppress this "
+ << "diagnostics";
+ }
+
+ // Note that we don't care about the process exit code here and just
+ // silently wait for the process completion in the process object
+ // destructor. We, however, close the stream (reading out the
+ // content), so that the process won't get blocked writing to it.
+ //
+ // Also note that we drop the potentially redirected process stderr
+ // stream content. We even don't read it out, since we assume it fully
+ // fits into the pipe buffer.
+ //
+ is.close ();
+
+ return capabilities::smart;
+ }
string l;
getline (is, l); // Is empty if no refs returned by the dumb server.
@@ -667,6 +711,8 @@ namespace bpkg
is.close ();
+ dump_stderr ();
+
if (pr.wait ())
return r;
@@ -674,6 +720,8 @@ namespace bpkg
}
catch (const io_error&)
{
+ dump_stderr ();
+
if (pr.wait ())
fail << "unable to read fetched " << url << endg;
diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx
index 44f1ba4..a7fe8c9 100644
--- a/bpkg/fetch.cxx
+++ b/bpkg/fetch.cxx
@@ -84,13 +84,21 @@ namespace bpkg
}
}
- static process
+ // Note that there is no easy way to retrieve the HTTP status code for wget
+ // (there is no reliable way to redirect the status line/headers to stdout)
+ // and thus we always return 0. Due to the status code unavailability there
+ // is no need to redirect stderr and thus we ignore the quiet mode.
+ //
+ static pair<process, uint16_t>
start_wget (const path& prog,
const optional<size_t>& timeout,
bool progress,
bool no_progress,
+ bool /* quiet */,
const strings& ops,
const string& url,
+ ifdstream* out_is,
+ fdstream_mode out_ism,
const path& out,
const string& user_agent,
const string& http_proxy)
@@ -108,8 +116,8 @@ namespace bpkg
};
// Wget 1.16 introduced the --show-progress option which in the quiet mode
- // shows a nice and tidy progress bar (if only it also showed errors, then
- // it would have been perfect).
+ // (-q) shows a nice and tidy progress bar (if only it also showed errors,
+ // then it would have been perfect).
//
bool has_show_progress (wget_major > 1 ||
(wget_major == 1 && wget_minor >= 16));
@@ -222,12 +230,19 @@ namespace bpkg
// just the file name (rather than the whole path) in the progress
// report. Process exceptions must be handled by the caller.
//
- return fo
- ? process (pp, args.data (),
- 0, 1, 2,
- out.directory ().string ().c_str (),
- env.vars)
- : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars);
+ process pr (fo
+ ? process (pp, args.data (),
+ 0, 1, 2,
+ out.directory ().string ().c_str (),
+ env.vars)
+ : process (pp, args.data (),
+ 0, -1, 2,
+ nullptr /* cwd */, env.vars));
+
+ if (out_is != nullptr)
+ out_is->open (move (pr.in_ofd), out_ism);
+
+ return make_pair (move (pr), 0);
}
// curl
@@ -275,13 +290,21 @@ namespace bpkg
return false;
}
- static process
+ // If HTTP status code needs to be retrieved, then open the passed stream
+ // and read out the status line(s) extracting the status code and the
+ // headers. Otherwise, return 0 indicating that the status code is not
+ // available. In the former case redirect stderr and respect the quiet mode.
+ //
+ static pair<process, uint16_t>
start_curl (const path& prog,
const optional<size_t>& timeout,
bool progress,
bool no_progress,
+ bool quiet,
const strings& ops,
const string& url,
+ ifdstream* out_is,
+ fdstream_mode out_ism,
const path& out,
const string& user_agent,
const string& http_proxy)
@@ -313,28 +336,31 @@ namespace bpkg
// Higher than that -- run it verbose. Always show the progress
// bar if requested explicitly, even in the quiet mode.
//
- if (verb < (fo ? 1 : 2))
+ if (!quiet)
{
- if (!progress)
+ if (verb < (fo ? 1 : 2))
{
- suppress_progress ();
- no_progress = false; // Already suppressed.
+ if (!progress)
+ {
+ suppress_progress ();
+ no_progress = false; // Already suppressed.
+ }
}
+ else if (fo && verb == 1)
+ {
+ if (!no_progress)
+ args.push_back ("--progress-bar");
+ }
+ else if (verb > 3)
+ args.push_back ("-v");
}
- else if (fo && verb == 1)
- {
- if (!no_progress)
- args.push_back ("--progress-bar");
- }
- else if (verb > 3)
- args.push_back ("-v");
// Suppress progress.
//
// Note: the `-v -s` options combination is valid and results in a verbose
// output without progress.
//
- if (no_progress)
+ if (no_progress || quiet)
suppress_progress ();
// Set download timeout if requested.
@@ -369,11 +395,23 @@ namespace bpkg
args.push_back (http_proxy.c_str ());
}
+ // Status code.
+ //
+ if (out_is != nullptr)
+ {
+ assert (!fo); // Currently unsupported (see start_fetch() for details).
+
+ args.push_back ("-i");
+ }
+
args.push_back (url.c_str ());
args.push_back (nullptr);
process_path pp (process::path_search (args[0]));
+ // Let's still print the command line in the quiet mode to ease the
+ // troubleshooting.
+ //
if (verb >= 2)
print_process (args);
else if (verb == 1 && fo && !no_progress)
@@ -386,9 +424,143 @@ namespace bpkg
// Process exceptions must be handled by the caller.
//
- return fo
- ? process (pp, args.data ())
- : process (pp, args.data (), 0, -1);
+ process pr (fo
+ ? process (pp, args.data ())
+ : process (pp, args.data (),
+ 0, -1, out_is != nullptr ? -1 : 2));
+
+ // Close the process stdout stream and read stderr stream out and dump.
+ //
+ // Needs to be called prior to failing, so that the process won't get
+ // blocked writing to stdout and so that stderr get dumped before the
+ // error message we issue.
+ //
+ auto close_streams = [&pr, out_is] ()
+ {
+ try
+ {
+ out_is->close ();
+
+ bpkg::dump_stderr (move (pr.in_efd));
+ }
+ catch (const io_error&)
+ {
+ // Not much we can do here.
+ }
+ };
+
+ // If HTTP status code needs to be retrieved, then open the passed stream
+ // and read out the status line(s) and headers.
+ //
+ // Note that this implementation is inspired by the bdep's
+ // http_service::post() function.
+ //
+ uint16_t sc (0);
+
+ if (out_is != nullptr)
+ try
+ {
+ // At this stage we will read until the empty line (containing just
+ // CRLF). Not being able to reach such a line is an error, which is the
+ // reason for the exception mask choice. When done, we will restore the
+ // original exception mask.
+ //
+ ifdstream::iostate es (out_is->exceptions ());
+
+ out_is->exceptions (
+ ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit);
+
+ out_is->open (move (pr.in_ofd), out_ism);
+
+ // Parse and return the HTTP status code. Return 0 if the argument is
+ // invalid.
+ //
+ auto status_code = [] (const string& s)
+ {
+ char* e (nullptr);
+ unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw.
+ assert (e != nullptr);
+
+ return *e == '\0' && c >= 100 && c < 600
+ ? static_cast<uint16_t> (c)
+ : 0;
+ };
+
+ // Read the CRLF-terminated line from the stream stripping the trailing
+ // CRLF.
+ //
+ auto read_line = [out_is] ()
+ {
+ string l;
+ getline (*out_is, l); // Strips the trailing LF (0xA).
+
+ // Note that on POSIX CRLF is not automatically translated into LF, so
+ // we need to strip CR (0xD) manually.
+ //
+ if (!l.empty () && l.back () == '\r')
+ l.pop_back ();
+
+ return l;
+ };
+
+ auto read_status = [&read_line, &status_code, &url, &close_streams] ()
+ -> uint16_t
+ {
+ string l (read_line ());
+
+ for (;;) // Breakout loop.
+ {
+ if (l.compare (0, 5, "HTTP/") != 0)
+ break;
+
+ size_t p (l.find (' ', 5)); // The protocol end.
+ if (p == string::npos)
+ break;
+
+ p = l.find_first_not_of (' ', p + 1); // The code start.
+ if (p == string::npos)
+ break;
+
+ size_t e (l.find (' ', p + 1)); // The code end.
+ if (e == string::npos)
+ break;
+
+ uint16_t c (status_code (string (l, p, e - p)));
+ if (c == 0)
+ break;
+
+ return c;
+ }
+
+ close_streams ();
+
+ fail << "invalid HTTP response status line '" << l
+ << "' while fetching " << url;
+
+ assert (false); // Can't be here.
+ return 0;
+ };
+
+ sc = read_status ();
+
+ if (sc == 100)
+ {
+ while (!read_line ().empty ()) ; // Skips the interim response.
+ sc = read_status (); // Reads the final status code.
+ }
+
+ while (!read_line ().empty ()) ; // Skips headers.
+
+ out_is->exceptions (es);
+ }
+ catch (const io_error&)
+ {
+ close_streams ();
+
+ fail << "unable to read HTTP response status line for " << url;
+ }
+
+ return make_pair (move (pr), sc);
}
// fetch
@@ -438,13 +610,24 @@ namespace bpkg
return false;
}
- static process
+ // Note that there is no easy way to retrieve the HTTP status code for the
+ // fetch program and thus we always return 0.
+ //
+ // Also note that in the HTTP status code retrieval mode (out_is != NULL) we
+ // nevertheless redirect stderr to prevent the fetch program from
+ // interactively querying the user for the credentials. Thus, we also
+ // respect the quiet mode in contrast to start_wget().
+ //
+ static pair<process, uint16_t>
start_fetch (const path& prog,
const optional<size_t>& timeout,
bool progress,
bool no_progress,
+ bool quiet,
const strings& ops,
const string& url,
+ ifdstream* out_is,
+ fdstream_mode out_ism,
const path& out,
const string& user_agent,
const string& http_proxy)
@@ -476,23 +659,26 @@ namespace bpkg
// unless the verbosity level is greater than three, in which case we will
// run verbose (and with progress). That's the best we can do.
//
- if (verb < (fo ? 1 : 2))
+ if (!quiet)
{
- if (!progress)
+ if (verb < (fo ? 1 : 2))
{
- args.push_back ("-q");
- no_progress = false; // Already suppressed with -q.
+ if (!progress)
+ {
+ args.push_back ("-q");
+ no_progress = false; // Already suppressed with -q.
+ }
+ }
+ else if (verb > 3)
+ {
+ args.push_back ("-v");
+ no_progress = false; // Don't be quiet in the verbose mode (see above).
}
- }
- else if (verb > 3)
- {
- args.push_back ("-v");
- no_progress = false; // Don't be quiet in the verbose mode (see above).
}
// Suppress progress.
//
- if (no_progress)
+ if (no_progress || quiet)
args.push_back ("-q");
// Set download timeout if requested.
@@ -534,6 +720,9 @@ namespace bpkg
env.vars = evars;
}
+ // Let's still print the command line in the quiet mode to ease the
+ // troubleshooting.
+ //
if (verb >= 2)
print_process (env, args);
@@ -542,12 +731,19 @@ namespace bpkg
// just the file name (rather than the whole path) in the progress
// report. Process exceptions must be handled by the caller.
//
- return fo
- ? process (pp, args.data (),
- 0, 1, 2,
- out.directory ().string ().c_str (),
- env.vars)
- : process (pp, args.data (), 0, -1, 2, nullptr /* cwd */, env.vars);
+ process pr (fo
+ ? process (pp, args.data (),
+ 0, 1, 2,
+ out.directory ().string ().c_str (),
+ env.vars)
+ : process (pp, args.data (),
+ 0, -1, out_is != nullptr ? -1 : 2,
+ nullptr /* cwd */, env.vars));
+
+ if (out_is != nullptr)
+ out_is->open (move (pr.in_ofd), out_ism);
+
+ return make_pair (move (pr), 0);
}
// The dispatcher.
@@ -670,22 +866,38 @@ namespace bpkg
return kind_;
}
- process
+ static pair<process, uint16_t>
start_fetch (const common_options& o,
const string& src,
+ ifdstream* out_is,
+ fdstream_mode out_ism,
+ bool quiet,
const path& out,
const string& user_agent,
const url& proxy)
{
- process (*f) (const path&,
- const optional<size_t>&,
- bool,
- bool,
- const strings&,
- const string&,
- const path&,
- const string&,
- const string&) = nullptr;
+ // Currently, for the sake of simplicity, we don't support retrieving the
+ // HTTP status code if we fetch into a file.
+ //
+ assert (out.empty () || out_is == nullptr);
+
+ // Quiet mode is only meaningful if HTTP status code needs to be
+ // retrieved.
+ //
+ assert (!quiet || out_is != nullptr);
+
+ pair<process, uint16_t> (*f) (const path&,
+ const optional<size_t>&,
+ bool,
+ bool,
+ bool,
+ const strings&,
+ const string&,
+ ifdstream*,
+ fdstream_mode,
+ const path&,
+ const string&,
+ const string&) = nullptr;
fetch_kind fk (check (o));
switch (fk)
@@ -793,8 +1005,11 @@ namespace bpkg
timeout,
o.progress (),
o.no_progress (),
+ quiet,
os,
!http_url.empty () ? http_url : src,
+ out_is,
+ out_ism,
out,
user_agent,
http_proxy);
@@ -809,4 +1024,40 @@ namespace bpkg
throw failed ();
}
}
+
+ process
+ start_fetch (const common_options& o,
+ const string& src,
+ const path& out,
+ const string& user_agent,
+ const url& proxy)
+ {
+ return start_fetch (o,
+ src,
+ nullptr /* out_is */,
+ fdstream_mode::none,
+ false /* quiet */,
+ out,
+ user_agent,
+ proxy).first;
+ }
+
+ pair<process, uint16_t>
+ start_fetch_http (const common_options& o,
+ const string& src,
+ ifdstream& out,
+ fdstream_mode out_mode,
+ bool quiet,
+ const string& user_agent,
+ const url& proxy)
+ {
+ return start_fetch (o,
+ src,
+ &out,
+ out_mode,
+ quiet,
+ path () /* out */,
+ user_agent,
+ proxy);
+ }
}
diff --git a/bpkg/fetch.hxx b/bpkg/fetch.hxx
index c077079..78e77a7 100644
--- a/bpkg/fetch.hxx
+++ b/bpkg/fetch.hxx
@@ -138,11 +138,41 @@ namespace bpkg
// option for details).
//
process
- start_fetch (const common_options& o,
+ start_fetch (const common_options&,
const string& url,
const path& out = {},
const string& user_agent = {},
const butl::url& proxy = {});
+
+ // Similar to the above but can only be used for HTTP(S) URLs. Additionally
+ // return the HTTP status code, if the underlying fetch program provides an
+ // easy way to retrieve it, and 0 otherwise.
+ //
+ // In the former case redirect the fetch process stderr to a pipe, so that
+ // depending on the returned status code the caller can either drop or dump
+ // the fetch process diagnostics. May also redirect stderr in the latter
+ // case for some implementation-specific reasons (to prevent the underlying
+ // fetch program from interacting with the user, etc). The caller can detect
+ // whether stderr is redirected by checking process::in_efd.
+ //
+ // In contrast to start_fetch() always fetch to stdout, which can be read by
+ // the caller from the specified stream.
+ //
+ // If the quiet argument is true, then, if stderr is redirected, minimize
+ // the amount of diagnostics printed by the fetch program by only printing
+ // errors. That allows the caller to read stdout and stderr streams
+ // sequentially in the blocking mode by assuming that the diagnostics always
+ // fits into the pipe buffer. If stderr is not redirected, then the quiet
+ // argument is ignored in favor of the more informative diagnostics.
+ //
+ pair<process, uint16_t>
+ start_fetch_http (const common_options&,
+ const string& url,
+ ifdstream& out,
+ fdstream_mode out_mode,
+ bool quiet,
+ const string& user_agent = {},
+ const butl::url& proxy = {});
}
#endif // BPKG_FETCH_HXX
diff --git a/bpkg/utility.cxx b/bpkg/utility.cxx
index 467e01f..52114df 100644
--- a/bpkg/utility.cxx
+++ b/bpkg/utility.cxx
@@ -366,4 +366,23 @@ namespace bpkg
? co.build ().string ().c_str ()
: BPKG_EXE_PREFIX "b" BPKG_EXE_SUFFIX;
}
+
+ void
+ dump_stderr (auto_fd&& fd)
+ {
+ ifdstream is (move (fd), fdstream_mode::skip, ifdstream::badbit);
+
+ // We could probably write something like this, instead:
+ //
+ // *diag_stream << is.rdbuf () << flush;
+ //
+ // However, it would never throw and we could potentially miss the reading
+ // failure, unless we decide to additionally mess with the diagnostics
+ // stream exception mask.
+ //
+ for (string l; !eof (getline (is, l)); )
+ *diag_stream << l << endl;
+
+ is.close ();
+ }
}
diff --git a/bpkg/utility.hxx b/bpkg/utility.hxx
index 1f5c725..8e7260a 100644
--- a/bpkg/utility.hxx
+++ b/bpkg/utility.hxx
@@ -246,6 +246,12 @@ namespace bpkg
template <typename... A>
void
run_b (const common_options&, verb_b, A&&... args);
+
+ // Read out the data from the specified file descriptor and dump it to
+ // stderr. Throw io_error on the underlying OS errors.
+ //
+ void
+ dump_stderr (auto_fd&&);
}
#include <bpkg/utility.txx>