diff options
Diffstat (limited to 'bpkg/fetch-git.cxx')
-rw-r--r-- | bpkg/fetch-git.cxx | 491 |
1 files changed, 419 insertions, 72 deletions
diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx index 3f6115f..d2c30a1 100644 --- a/bpkg/fetch-git.cxx +++ b/bpkg/fetch-git.cxx @@ -285,20 +285,7 @@ namespace bpkg try { - ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); - - // We could probably write something like this, instead: - // - // *diag_stream << is.rdbuf () << flush; - // - // However, it would never throw and we could potentially miss the - // reading failure, unless we decide to additionally mess with the - // diagnostics stream exception mask. - // - for (string l; !eof (getline (is, l)); ) - *diag_stream << l << endl; - - is.close (); + dump_stderr (move (pipe.in)); // Fall through. } @@ -549,7 +536,11 @@ namespace bpkg // For HTTP(S) sense the protocol type by sending the first HTTP request of // the fetch operation handshake and analyzing the first line of the // response. Fail if connecting to the server failed, the response code - // differs from 200, or reading the response body failed. + // differs from 200 and 401, or reading the response body failed. If the + // response code is 401 (requires authentication), then consider protocol as + // smart. The thinking here is that a git repository with support for + // authentication is likely one of the hosting places (like git{hub,lab}) + // and is unlikely to be dumb. // // Note that, as a side-effect, this function checks the HTTP(S) server // availability and so must be called prior to any git command that involves @@ -565,21 +556,16 @@ namespace bpkg // URLs, if possible. That's why the function requires the git version // parameter. // - enum class capabilities - { - dumb, // No shallow clone support. - smart, // Support for shallow clone, but not for unadvertised refs fetch. - unadv // Support for shallow clone and for unadvertised refs fetch. - }; + using capabilities = git_protocol_capabilities; static capabilities sense_capabilities (const common_options& co, - repository_url url, + const repository_url& repo_url, const semantic_version& git_ver) { - assert (url.path); + assert (repo_url.path); - switch (url.scheme) + switch (repo_url.scheme) { case repository_protocol::git: case repository_protocol::ssh: @@ -588,6 +574,9 @@ namespace bpkg case repository_protocol::https: break; // Ask the server (see below). } + // Craft the URL for sensing the capabilities. + // + repository_url url (repo_url); path& up (*url.path); if (!up.to_directory ()) @@ -601,19 +590,94 @@ namespace bpkg url.query = "service=git-upload-pack"; string u (url.string ()); - process pr (start_fetch (co, - u, - path () /* out */, - "git/" + git_ver.string ())); + + // Start fetching, also trying to retrieve the HTTP status code. + // + // We unset failbit to properly handle an empty response (no refs) from + // the dumb server. + // + ifdstream is (ifdstream::badbit); + + pair<process, uint16_t> ps ( + start_fetch_http (co, + u, + is /* out */, + fdstream_mode::skip | fdstream_mode::binary, + stderr_mode::redirect_quiet, + "git/" + git_ver.string ())); + + process& pr (ps.first); + + // If the fetch program stderr is redirected, then read it out and pass + // through. + // + auto dump_stderr = [&pr] () + { + if (pr.in_efd != nullfd) + try + { + bpkg::dump_stderr (move (pr.in_efd)); + } + catch (const io_error&) + { + // Not much we can do here. + } + }; try { - // We unset failbit to properly handle an empty response (no refs) from - // the dumb server. + // If authentication is required (HTTP status code is 401), then + // consider the protocol as smart. Drop the diagnostics if that's the + // case and dump it otherwise. // - ifdstream is (move (pr.in_ofd), - fdstream_mode::skip | fdstream_mode::binary, - ifdstream::badbit); + if (ps.second == 401) + { + if (verb >= 2) + { + info << "smart git protocol assumed for repository " << repo_url + << " due to authentication requirement" << + info << "use --git-capabilities to override or suppress this " + << "diagnostics"; + } + + // Note that we don't care about the process exit code here and just + // silently wait for the process completion in the process object + // destructor. We, however, close the stream (reading out the + // content), so that the process won't get blocked writing to it. + // + // Also note that we drop the potentially redirected process stderr + // stream content. We even don't read it out, since we assume it fully + // fits into the pipe buffer. + // + is.close (); + + return capabilities::smart; + } + + // Fail on any other HTTP error (e.g., 404). In the case of a success + // code other than 200 (e.g. 204 (No Content)) just let the capabilities + // detection to take its course. + // + if (ps.second != 0 && (ps.second < 200 || ps.second >= 300)) + { + // Note that we don't care about the process exit code here (see above + // for the reasoning). + // + is.close (); + + // Dump the potentially redirected process stderr stream content since + // it may be helpful to the user. + // + // Note, however, that we don't know if it really contains the error + // description since the fetch program may even exit successfully (see + // start_fetch_http() for details). Thus, we additionally print the + // HTTP status code in the diagnostics. + // + dump_stderr (); + + fail << "unable to fetch " << url << + info << "HTTP status code " << ps.second << endg; + } string l; getline (is, l); // Is empty if no refs returned by the dumb server. @@ -667,6 +731,8 @@ namespace bpkg is.close (); + dump_stderr (); + if (pr.wait ()) return r; @@ -674,6 +740,8 @@ namespace bpkg } catch (const io_error&) { + dump_stderr (); + if (pr.wait ()) fail << "unable to read fetched " << url << endg; @@ -877,24 +945,25 @@ namespace bpkg text << "querying " << url; refs rs; - fdpipe pipe (open_pipe ()); - - // Note: ls-remote doesn't print anything to stderr, so no progress - // suppression is required. - // - process pr (start_git (co, - pipe, 2 /* stderr */, - timeout_opts (co, url.scheme), - co.git_option (), - "ls-remote", - to_git_url (url))); - - // Shouldn't throw, unless something is severely damaged. - // - pipe.out.close (); for (;;) // Breakout loop. { + fdpipe pipe (open_pipe ()); + + // Note: ls-remote doesn't print anything to stderr, so no progress + // suppression is required. + // + process pr (start_git (co, + pipe, 2 /* stderr */, + timeout_opts (co, url.scheme), + co.git_option (), + "ls-remote", + to_git_url (url))); + + // Shouldn't throw, unless something is severely damaged. + // + pipe.out.close (); + try { ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); @@ -1083,7 +1152,25 @@ namespace bpkg // the first call, and so git version get assigned (and checked). // if (!cap) - cap = sense_capabilities (co, url (), git_ver); + { + const repository_url& u (url ()); + + // Check if the protocol capabilities are overridden for this + // repository. + // + const git_capabilities_map& gcs (co.git_capabilities ()); + + if (!gcs.empty () && u.scheme != repository_protocol::file) + { + auto i (gcs.find_sup (u.string ())); + + if (i != gcs.end ()) + cap = i->second; + } + + if (!cap) + cap = sense_capabilities (co, u, git_ver); + } return *cap; }; @@ -1701,6 +1788,255 @@ namespace bpkg submodule_failure (d, prefix, e); }; + // Use git-config to obtain the submodules names/paths and then + // git-ls-files to obtain their commits. + // + // Note that previously we used git-submodule--helper-list subcommand to + // obtain the submodules commits/paths and then git-submodule--helper-name + // to obtain their names. However, git 2.38 has removed these subcommands. + + // Obtain the submodules names/paths. + // + for (;;) // Breakout loop. + { + fdpipe pipe (open_pipe ()); + + process pr (start_git (co, + pipe, 2 /* stderr */, + co.git_option (), + "-C", dir, + "config", + "--list", + "--file", gitmodules_file, + "-z")); + + // Shouldn't throw, unless something is severely damaged. + // + pipe.out.close (); + + try + { + ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); + + for (string l; !eof (getline (is, l, '\0')); ) + { + auto bad = [&l] () + { + throw runtime_error ("invalid submodule option '" + l + '\''); + }; + + // The submodule configuration option line is NULL-terminated and + // has the following form: + // + // submodule.<submodule-name>.<option-name><NEWLINE><value> + // + // For example: + // + // submodule.style.path + // doc/style + // + l4 ([&]{trace << "submodule option: " << l;}); + + // If this is a submodule path option, then extract its name and + // path and add the entry to the resulting list. + // + size_t n (l.find ('\n')); + + if (n != string::npos && + n >= 15 && + l.compare (0, 10, "submodule.") == 0 && + l.compare (n - 5, 5, ".path") == 0) + { + string nm (l, 10, n - 15); + dir_path p (l, n + 1, l.size () - n - 1); + + // For good measure verify that the name and path are not empty. + // + if (nm.empty () || p.empty ()) + bad (); + + r.push_back (submodule {move (p), move (nm), empty_string}); + } + } + + is.close (); + + if (pr.wait ()) + break; + + // Fall through. + } + catch (const invalid_path& e) + { + if (pr.wait ()) + failure ("invalid submodule directory path '" + e.path + '\''); + + // Fall through. + } + catch (const io_error& e) + { + if (pr.wait ()) + failure ("unable to read submodule options", &e); + + // Fall through. + } + // Note that the io_error class inherits from the runtime_error class, + // so this catch-clause must go last. + // + catch (const runtime_error& e) + { + if (pr.wait ()) + failure (e.what ()); + + // Fall through. + } + + // We should only get here if the child exited with an error status. + // + assert (!pr.wait ()); + + failure ("unable to list submodule options"); + } + + // Note that we could potentially bail out here if the submodules list is + // empty. Let's however continue and verify that via git-ls-files, for + // good measure. + + // Complete the resulting submodules information with their commits. + // + for (;;) // Breakout loop. + { + fdpipe pipe (open_pipe ()); + + process pr (start_git (co, + pipe, 2 /* stderr */, + co.git_option (), + "-C", dir, + "ls-files", + "--stage", + "-z")); + + // Shouldn't throw, unless something is severely damaged. + // + pipe.out.close (); + + try + { + ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); + + for (string l; !eof (getline (is, l, '\0')); ) + { + auto bad = [&l] () + { + throw runtime_error ("invalid file description '" + l + '\''); + }; + + // The line describing a file is NULL-terminated and has the + // following form: + // + // <mode><SPACE><object><SPACE><stage><TAB><path> + // + // The mode is a 6-digit octal representation of the file type and + // permission bits mask. For a submodule directory it is 160000 (see + // git index format documentation for gitlink object type). For + // example: + // + // 160000 59dcc1bea3509e37b65905ac472f86f4c55eb510 0 doc/style + // + if (!(l.size () > 50 && l[48] == '0' && l[49] == '\t')) + bad (); + + // For submodules permission bits are always zero, so we can match + // the mode as a string. + // + if (l.compare (0, 6, "160000") == 0) + { + l4 ([&]{trace << "submodule: " << l;}); + + dir_path d (l, 50, l.size () - 50); + + auto i (find_if (r.begin (), r.end (), + [&d] (const submodule& sm) {return sm.path == d;})); + + if (i == r.end ()) + bad (); + + i->commit = string (l, 7, 40); + } + } + + is.close (); + + if (pr.wait ()) + break; + + // Fall through. + } + catch (const invalid_path& e) + { + if (pr.wait ()) + failure ("invalid submodule directory path '" + e.path + '\''); + + // Fall through. + } + catch (const io_error& e) + { + if (pr.wait ()) + failure ("unable to read repository file list", &e); + + // Fall through. + } + // Note that the io_error class inherits from the runtime_error class, + // so this catch-clause must go last. + // + catch (const runtime_error& e) + { + if (pr.wait ()) + failure (e.what ()); + + // Fall through. + } + + // We should only get here if the child exited with an error status. + // + assert (!pr.wait ()); + + failure ("unable to list repository files"); + } + + // Make sure that we have deduced commits for all the submodules. + // + for (const submodule& sm: r) + { + if (sm.commit.empty ()) + failure ("unable to deduce commit for submodule " + sm.name); + } + + return r; + } + + // @@ TMP Old, submodule--helper-{list,name} subcommands-based, + // implementation of find_submodules(). + // +#if 0 + static submodules + find_submodules (const common_options& co, + const dir_path& dir, + const dir_path& prefix, + bool gitmodules = true) + { + tracer trace ("find_submodules"); + + submodules r; + + if (gitmodules && !exists (dir / gitmodules_file)) + return r; + + auto failure = [&prefix] (const string& d, const exception* e = nullptr) + { + submodule_failure (d, prefix, e); + }; + fdpipe pipe (open_pipe ()); process pr (start_git (co, @@ -1730,7 +2066,7 @@ namespace bpkg l4 ([&]{trace << "submodule: " << l;}); if (!(l.size () > 50 && l[48] == '0' && l[49] == '\t')) - throw runtime_error ("invalid submodule description '" + l + "'"); + throw runtime_error ("invalid submodule description '" + l + '\''); dir_path d (string (l, 50)); @@ -1764,7 +2100,7 @@ namespace bpkg catch (const invalid_path& e) { if (pr.wait ()) - failure ("invalid submodule path '" + e.path + "'"); + failure ("invalid submodule path '" + e.path + '\''); // Fall through. } @@ -1792,6 +2128,7 @@ namespace bpkg submodule_failure ("unable to list submodules", prefix); } +#endif // Return commit id for the submodule directory or nullopt if the submodule // is not initialized (directory doesn't exist, doesn't contain .git entry, @@ -1839,13 +2176,15 @@ namespace bpkg co.git_option (), "-C", dir, - // Note that older git versions don't recognize the --super-prefix - // option but seem to behave correctly without any additional - // efforts when it is omitted. + // Note that git versions outside the [2.14.0 2.38.0) range don't + // recognize the --super-prefix option but seem to behave correctly + // without any additional efforts when it is omitted. // - !prefix.empty () && git_ver >= semantic_version {2, 14, 0} - ? strings ({"--super-prefix", prefix.posix_representation ()}) - : strings (), + (!prefix.empty () && + git_ver >= semantic_version {2, 14, 0} && + git_ver < semantic_version {2, 38, 0} + ? strings ({"--super-prefix", prefix.posix_representation ()}) + : strings ()), "submodule--helper", "init", verb < 2 ? "-q" : nullptr)) @@ -1958,7 +2297,7 @@ namespace bpkg catch (const invalid_path& e) { failure ("invalid submodule '" + sm.name + "' repository path '" + - e.path + "'"); + e.path + '\''); } catch (const invalid_argument& e) { @@ -2188,7 +2527,7 @@ namespace bpkg for (string l; !eof (getline (is, l, '\0')); ) { - // The line describing a file is NUL-terminated and has the following + // The line describing a file is NULL-terminated and has the following // form: // // <mode><SPACE><object><SPACE><stage><TAB><path> @@ -2198,16 +2537,18 @@ namespace bpkg // // 100644 165b42ec7a10fb6dd4a60b756fa1966c1065ef85 0 README // - l4 ([&]{trace << "file: " << l;}); - if (!(l.size () > 50 && l[48] == '0' && l[49] == '\t')) - throw runtime_error ("invalid file description '" + l + "'"); + throw runtime_error ("invalid file description '" + l + '\''); // For symlinks permission bits are always zero, so we can match the // mode as a string. // if (l.compare (0, 6, "120000") == 0) + { + l4 ([&]{trace << "symlink: " << l;}); + r.push_back (make_pair (path (string (l, 50)), string (l, 7, 40))); + } } is.close (); @@ -2220,7 +2561,7 @@ namespace bpkg catch (const invalid_path& e) { if (pr.wait ()) - failure ("invalid repository symlink path '" + e.path + "'"); + failure ("invalid repository symlink path '" + e.path + '\''); // Fall through. } @@ -2303,7 +2644,7 @@ namespace bpkg catch (const invalid_path& e) { failure ("invalid target path '" + e.path + "' for symlink '" + - lp.string () + "'", + lp.string () + '\'', &e); } @@ -2322,7 +2663,7 @@ namespace bpkg // if (tp.absolute ()) failure ("absolute target path '" + tp.string () + "' for symlink '" + - lp.string () + "'"); + lp.string () + '\''); // Verify that the symlink target path refers inside the top repository // directory. @@ -2351,6 +2692,9 @@ namespace bpkg void git_verify_symlinks (const common_options& co, const dir_path& dir) { + if ((verb && !co.no_progress ()) || co.progress ()) + text << "verifying symlinks..."; + verify_symlinks (co, dir, dir_path () /* prefix */); } @@ -2411,7 +2755,7 @@ namespace bpkg // if (r) failure ("unexpected real symlink in submodule '" + - sm.path.string () + "'"); + sm.path.string () + '\''); return nullopt; } @@ -2485,14 +2829,14 @@ namespace bpkg if (e.second.type == entry_type::symlink) { if (r) - failure ("unexpected real symlink '" + l.string () + "'"); + failure ("unexpected real symlink '" + l.string () + '\''); return nullopt; } } catch (const system_error& e) { - failure ("unable to stat symlink '" + l.string () + "'", &e); + failure ("unable to stat symlink '" + l.string () + '\'', &e); } // Read the symlink target path. @@ -2507,7 +2851,7 @@ namespace bpkg catch (const invalid_path& e) { failure ("invalid target path '" + e.path + "' for symlink '" + - l.string () + "'", + l.string () + '\'', &e); } catch (const io_error& e) @@ -2571,7 +2915,7 @@ namespace bpkg catch (const system_error& e) { failure ("unable to stat target '" + t.string () + - "' for symlink '" + l.string () + "'", + "' for symlink '" + l.string () + '\'', &e); } @@ -2589,7 +2933,7 @@ namespace bpkg { failure (string ("unable to create ") + (dir_target ? "junction" : "hardlink") + " '" + - l.string () + "' with target '" + t.string () + "'", + l.string () + "' with target '" + t.string () + '\'', &e); } @@ -2628,7 +2972,7 @@ namespace bpkg catch (const system_error& e) { failure ("unable to remove hardlink, symlink, or junction '" + - l.string () + "'", + l.string () + '\'', &e); } } @@ -2662,6 +3006,9 @@ namespace bpkg bool revert, bool ie) { + if (!revert && ((verb && !co.no_progress ()) || co.progress ())) + text << "fixing up symlinks..."; + try { optional<bool> r ( |