From 19a6eb42669c2d380d0bb045fe1240b1589e9ad2 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 8 Feb 2019 20:34:57 +0300 Subject: Fix github-based repository fetching for URLs missing .git extension --- bpkg/fetch-git.cxx | 22 +++++++++++++++++++--- bpkg/fetch.cxx | 45 ++++++++++++++++++++++++++++++++++----------- bpkg/fetch.hxx | 6 ++++-- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx index d00690f..a1c420a 100644 --- a/bpkg/fetch-git.cxx +++ b/bpkg/fetch-git.cxx @@ -497,6 +497,14 @@ namespace bpkg // hanging indefinitely while trying to establish TCP/IP connection (see the // timeout_opts() function for the gory details). // + // Note that some smart HTTP(S) repositories are capable of adding missing + // .git directory extension in the URL (see git-upload-pack(1) for details). + // Some of them, specifically hosted on GitHub, do that if `git/...` value + // is specified for the User-Agent HTTP request header. We will pretend to + // be git while sensing the protocol capabilities to "fix-up" repository + // URLs, if possible. That's why the function requires the git version + // parameter. + // enum class capabilities { dumb, // No shallow clone support. @@ -505,7 +513,9 @@ namespace bpkg }; static capabilities - sense_capabilities (const common_options& co, repository_url url) + sense_capabilities (const common_options& co, + repository_url url, + const semantic_version& git_ver) { assert (url.path); @@ -531,7 +541,10 @@ namespace bpkg url.query = "service=git-upload-pack"; string u (url.string ()); - process pr (start_fetch (co, u)); + process pr (start_fetch (co, + u, + path () /* out */, + "git/" + git_ver.string ())); try { @@ -959,8 +972,11 @@ namespace bpkg auto caps = [&co, &url, &cap] () -> capabilities { + // Note that url() runs `git config --get remote.origin.url` command on + // the first call, and so git version get assigned (and checked). + // if (!cap) - cap = sense_capabilities (co, url ()); + cap = sense_capabilities (co, url (), git_ver); return *cap; }; diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx index aaabffe..910bd3c 100644 --- a/bpkg/fetch.cxx +++ b/bpkg/fetch.cxx @@ -94,11 +94,15 @@ namespace bpkg bool no_progress, const strings& ops, const string& url, - const path& out) + const path& out, + const string& user_agent) { bool fo (!out.empty ()); // Output to file. - string ua (BPKG_USER_AGENT " wget/" + to_string (wget_major) + "." - + to_string (wget_minor)); + + const string& ua (user_agent.empty () + ? BPKG_USER_AGENT " wget/" + to_string (wget_major) + + "." + to_string (wget_minor) + : user_agent); cstrings args { prog.string ().c_str (), @@ -243,15 +247,20 @@ namespace bpkg bool no_progress, const strings& ops, const string& url, - const path& out) + const path& out, + const string& user_agent) { bool fo (!out.empty ()); // Output to file. + const string& ua (user_agent.empty () + ? string (BPKG_USER_AGENT " curl") + : user_agent); + cstrings args { prog.string ().c_str (), "-f", // Fail on HTTP errors (e.g., 404). "-L", // Follow redirects. - "-A", (BPKG_USER_AGENT " curl") + "-A", ua.c_str () }; auto suppress_progress = [&args] () @@ -387,13 +396,18 @@ namespace bpkg bool no_progress, const strings& ops, const string& url, - const path& out) + const path& out, + const string& user_agent) { bool fo (!out.empty ()); // Output to file. + const string& ua (user_agent.empty () + ? string (BPKG_USER_AGENT " fetch") + : user_agent); + cstrings args { prog.string ().c_str (), - "--user-agent", (BPKG_USER_AGENT " fetch") + "--user-agent", ua.c_str () }; if (fo) @@ -558,14 +572,18 @@ namespace bpkg } process - start_fetch (const common_options& o, const string& url, const path& out) + start_fetch (const common_options& o, + const string& url, + const path& out, + const string& user_agent) { process (*f) (const path&, const optional&, bool, const strings&, const string&, - const path&) = nullptr; + const path&, + const string&) = nullptr; switch (check (o)) { @@ -580,8 +598,13 @@ namespace bpkg try { - return f ( - fetch_path, timeout, o.no_progress (), o.fetch_option (), url, out); + return f (fetch_path, + timeout, + o.no_progress (), + o.fetch_option (), + url, + out, + user_agent); } catch (const process_error& e) { diff --git a/bpkg/fetch.hxx b/bpkg/fetch.hxx index a8acbb7..2adbfe3 100644 --- a/bpkg/fetch.hxx +++ b/bpkg/fetch.hxx @@ -107,12 +107,14 @@ namespace bpkg // Start the process of fetching the specified URL. If out is empty, then // fetch to stdout. In this case also don't show any progress unless we are - // running verbose. + // running verbose. If user_agent is empty, then send the default (fetch + // program specific) User-Agent header value. // butl::process start_fetch (const common_options& o, const string& url, - const path& out = path ()); + const path& out = {}, + const string& user_agent = {}); } #endif // BPKG_FETCH_HXX -- cgit v1.1