aboutsummaryrefslogtreecommitdiff
path: root/bpkg
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2018-02-12 17:30:16 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2018-02-12 17:34:55 +0300
commitb7763416f8a1e4940a10336d3a8b9fbbb879f414 (patch)
tree31f1bc7427181aa81984a639d998ca4e99058199 /bpkg
parentecb1efeebaa5597bee4cfdaab7bff4007b73127d (diff)
Clone and fetch git repositories
Diffstat (limited to 'bpkg')
-rw-r--r--bpkg/auth.cxx11
-rw-r--r--bpkg/cfg-create.cxx8
-rw-r--r--bpkg/common.cli41
-rw-r--r--bpkg/fetch-bpkg.cxx270
-rw-r--r--bpkg/fetch-git.cxx986
-rw-r--r--bpkg/fetch.cxx263
-rw-r--r--bpkg/fetch.hxx64
-rw-r--r--bpkg/package.hxx24
-rw-r--r--bpkg/pkg-fetch.cxx4
-rw-r--r--bpkg/rep-create.cxx4
-rw-r--r--bpkg/rep-fetch.cxx110
-rw-r--r--bpkg/utility.cxx39
-rw-r--r--bpkg/utility.hxx11
13 files changed, 1510 insertions, 325 deletions
diff --git a/bpkg/auth.cxx b/bpkg/auth.cxx
index b60a8ee..06555e2 100644
--- a/bpkg/auth.cxx
+++ b/bpkg/auth.cxx
@@ -68,7 +68,8 @@ namespace bpkg
// use the location rather than the name prefix.
//
if (rl.remote ())
- return repository_location (p.posix_string (), rl).canonical_name ();
+ return repository_location (
+ repository_url (p.posix_string ()), rl).canonical_name ();
else
return (path_cast<dir_path> (rl.path ()) / p).normalize ().string ();
}
@@ -555,11 +556,7 @@ namespace bpkg
//
if (pem)
{
- dir_path d (conf / certs_dir);
- if (!dir_exists (d))
- mk (d);
-
- path f (d / path (fp + ".pem"));
+ path f (conf / certs_dir / path (fp + ".pem"));
try
{
@@ -576,8 +573,6 @@ namespace bpkg
return cert;
}
- static const dir_path current_dir (".");
-
shared_ptr<const certificate>
authenticate_certificate (const common_options& co,
const dir_path* conf,
diff --git a/bpkg/cfg-create.cxx b/bpkg/cfg-create.cxx
index acd6bb9..1a21cfa 100644
--- a/bpkg/cfg-create.cxx
+++ b/bpkg/cfg-create.cxx
@@ -83,9 +83,13 @@ namespace bpkg
true,
vars);
- // Create .bpkg/.
+ // Create .bpkg/ and its subdirectories.
//
- mk (c / bpkg_dir);
+ {
+ mk (c / bpkg_dir);
+ mk (c / certs_dir);
+ mk (c / repos_dir);
+ }
// Initialize tmp directory.
//
diff --git a/bpkg/common.cli b/bpkg/common.cli
index 45d2c55..5c3de83 100644
--- a/bpkg/common.cli
+++ b/bpkg/common.cli
@@ -122,13 +122,20 @@ namespace bpkg
size_t --fetch-timeout
{
"<sec>",
- "The fetch program timeout. While the exact semantics of the value
- depends on the fetch program used, at a minimum it specifies in
- seconds the maximum time that can be spent without any network
- activity. Specifically, it is translated to the \cb{--max-time}
- option for \cb{curl} and to the \cb{--timeout} option for \cb{wget}
- and \cb{fetch}. See \cb{--fetch} for more information on the fetch
- program."
+ "The fetch and fetch-like (for example, \cb{git}) program timeout.
+ While the exact semantics of the value depends on the program used,
+ at a minimum it specifies in seconds the maximum time that can be
+ spent without any network activity.
+
+ Specifically, it is translated to the \cb{--max-time} option for
+ \cb{curl} and to the \cb{--timeout} option for \cb{wget} and
+ \cb{fetch}. For \cb{git} over HTTP/HTTPS this semantics is achieved
+ using the \cb{http.lowSpeedLimit}=\i{1} \cb{http.lowSpeedTime}=\i{sec}
+ configuration values (the \cb{git://} protocol currently does not
+ support timeouts).
+
+ See \cb{--fetch} and \cb{--git} for more information on the fetch
+ programs."
}
strings --fetch-option
@@ -139,6 +146,26 @@ namespace bpkg
specify multiple fetch options."
}
+ path --git = "git"
+ {
+ "<path>",
+ "The git program to be used to fetch git repositories. You can also
+ specify additional options that should be passed to the git program with
+ \cb{--git-option}.
+
+ If the git program is not explicitly specified, then \cb{bpkg} will use
+ \cb{git} by default."
+ }
+
+ strings --git-option
+ {
+ "<opt>",
+ "Additional common option to be passed to the git program. Note that
+ the common options are the ones that precede the \cb{git} command.
+ See \cb{--git} for more information on the git program. Repeat this
+ option to specify multiple git options."
+ }
+
path --sha256
{
"<path>",
diff --git a/bpkg/fetch-bpkg.cxx b/bpkg/fetch-bpkg.cxx
new file mode 100644
index 0000000..39f84f9
--- /dev/null
+++ b/bpkg/fetch-bpkg.cxx
@@ -0,0 +1,270 @@
+// file : bpkg/fetch-bpkg.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <bpkg/fetch.hxx>
+
+#include <sstream>
+
+#include <libbutl/fdstream.mxx>
+#include <libbutl/filesystem.mxx> // cpfile ()
+#include <libbutl/manifest-parser.mxx>
+
+#include <bpkg/checksum.hxx>
+#include <bpkg/diagnostics.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bpkg
+{
+ template <typename M>
+ static pair<M, string/*checksum*/>
+ fetch_manifest (const common_options& o,
+ const repository_url& u,
+ bool ignore_unknown)
+ {
+ string url (u.string ());
+ process pr (start_fetch (o, url));
+
+ try
+ {
+ // Unfortunately we cannot read from the original source twice as we do
+ // below for files. There doesn't seem to be anything better than reading
+ // the entire file into memory and then streaming it twice, once to
+ // calculate the checksum and the second time to actually parse. We need
+ // to read the original stream in the binary mode for the checksum
+ // calculation, then use the binary data to create the text stream for
+ // the manifest parsing.
+ //
+ ifdstream is (move (pr.in_ofd), fdstream_mode::binary);
+ stringstream bs (ios::in | ios::out | ios::binary);
+
+ // Note that the eof check is important: if the stream is at eof, write
+ // will fail.
+ //
+ if (is.peek () != ifdstream::traits_type::eof ())
+ bs << is.rdbuf ();
+
+ is.close ();
+
+ string s (bs.str ());
+ string sha256sum (sha256 (s.c_str (), s.size ()));
+
+ istringstream ts (s); // Text mode.
+
+ manifest_parser mp (ts, url);
+ M m (mp, ignore_unknown);
+
+ if (pr.wait ())
+ return make_pair (move (m), move (sha256sum));
+
+ // Child existed with an error, fall through.
+ }
+ // Ignore these exceptions if the child process exited with
+ // an error status since that's the source of the failure.
+ //
+ catch (const manifest_parsing& e)
+ {
+ if (pr.wait ())
+ fail (e.name, e.line, e.column) << e.description;
+ }
+ catch (const io_error&)
+ {
+ if (pr.wait ())
+ fail << "unable to read fetched " << url;
+ }
+
+ // We should only get here if the child exited with an error status.
+ //
+ assert (!pr.wait ());
+
+ // While it is reasonable to assuming the child process issued
+ // diagnostics, some may not mention the URL.
+ //
+ fail << "unable to fetch " << url <<
+ info << "re-run with -v for more information" << endf;
+ }
+
+ static path
+ fetch_file (const common_options& o,
+ const repository_url& u,
+ const dir_path& d)
+ {
+ path r (d / u.path->leaf ());
+
+ if (exists (r))
+ fail << "file " << r << " already exists";
+
+ auto_rmfile arm (r);
+ process pr (start_fetch (o, u.string (), r));
+
+ if (!pr.wait ())
+ {
+ // While it is reasonable to assuming the child process issued
+ // diagnostics, some may not mention the URL.
+ //
+ fail << "unable to fetch " << u <<
+ info << "re-run with -v for more information";
+ }
+
+ arm.cancel ();
+ return r;
+ }
+
+ static path
+ fetch_file (const path& f, const dir_path& d)
+ {
+ path r (d / f.leaf ());
+
+ try
+ {
+ cpfile (f, r);
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to copy " << f << " to " << r << ": " << e;
+ }
+
+ return r;
+ }
+
+ // If o is nullptr, then don't calculate the checksum.
+ //
+ template <typename M>
+ static pair<M, string/*checksum*/>
+ fetch_manifest (const common_options* o,
+ const path& f,
+ bool ignore_unknown)
+ {
+ if (!exists (f))
+ fail << "file " << f << " does not exist";
+
+ try
+ {
+ // We can not use the same file stream for both calculating the checksum
+ // and reading the manifest. The file should be opened in the binary
+ // mode for the first operation and in the text mode for the second one.
+ //
+ string sha256sum;
+ if (o != nullptr)
+ sha256sum = sha256 (*o, f); // Read file in the binary mode.
+
+ ifdstream ifs (f); // Open file in the text mode.
+
+ manifest_parser mp (ifs, f.string ());
+ return make_pair (M (mp, ignore_unknown), move (sha256sum));
+ }
+ catch (const manifest_parsing& e)
+ {
+ fail (e.name, e.line, e.column) << e.description << endf;
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to read from " << f << ": " << e << endf;
+ }
+ }
+
+ static const path repositories ("repositories");
+
+ repository_manifests
+ bpkg_fetch_repositories (const dir_path& d, bool iu)
+ {
+ return fetch_manifest<repository_manifests> (
+ nullptr, d / repositories, iu).first;
+ }
+
+ pair<repository_manifests, string/*checksum*/>
+ bpkg_fetch_repositories (const common_options& o,
+ const repository_location& rl,
+ bool iu)
+ {
+ assert (rl.remote () || rl.absolute ());
+
+ repository_url u (rl.url ());
+
+ path& f (*u.path);
+ f /= repositories;
+
+ return rl.remote ()
+ ? fetch_manifest<repository_manifests> (o, u, iu)
+ : fetch_manifest<repository_manifests> (&o, f, iu);
+ }
+
+ static const path packages ("packages");
+
+ package_manifests
+ bpkg_fetch_packages (const dir_path& d, bool iu)
+ {
+ return fetch_manifest<package_manifests> (nullptr, d / packages, iu).first;
+ }
+
+ pair<package_manifests, string/*checksum*/>
+ bpkg_fetch_packages (const common_options& o,
+ const repository_location& rl,
+ bool iu)
+ {
+ assert (rl.remote () || rl.absolute ());
+
+ repository_url u (rl.url ());
+
+ path& f (*u.path);
+ f /= packages;
+
+ return rl.remote ()
+ ? fetch_manifest<package_manifests> (o, u, iu)
+ : fetch_manifest<package_manifests> (&o, f, iu);
+ }
+
+ static const path signature ("signature");
+
+ signature_manifest
+ bpkg_fetch_signature (const common_options& o,
+ const repository_location& rl,
+ bool iu)
+ {
+ assert (rl.remote () || rl.absolute ());
+
+ repository_url u (rl.url ());
+
+ path& f (*u.path);
+ f /= signature;
+
+ return rl.remote ()
+ ? fetch_manifest<signature_manifest> (o, u, iu).first
+ : fetch_manifest<signature_manifest> (nullptr, f, iu).first;
+ }
+
+ path
+ bpkg_fetch_archive (const common_options& o,
+ const repository_location& rl,
+ const path& a,
+ const dir_path& d)
+ {
+ assert (!a.empty () && a.relative ());
+ assert (rl.remote () || rl.absolute ());
+
+ repository_url u (rl.url ());
+
+ path& f (*u.path);
+ f /= a;
+
+ auto bad_loc = [&u] () {fail << "invalid archive location " << u;};
+
+ try
+ {
+ f.normalize ();
+
+ if (*f.begin () == "..") // Can be the case for the remote location.
+ bad_loc ();
+ }
+ catch (const invalid_path&)
+ {
+ bad_loc ();
+ }
+
+ return rl.remote ()
+ ? fetch_file (o, u, d)
+ : fetch_file (f, d);
+ }
+}
diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx
new file mode 100644
index 0000000..1194178
--- /dev/null
+++ b/bpkg/fetch-git.cxx
@@ -0,0 +1,986 @@
+// file : bpkg/fetch-git.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <bpkg/fetch.hxx>
+
+#ifdef _WIN32
+# include <algorithm> // replace()
+#endif
+
+#include <libbutl/process.mxx>
+#include <libbutl/fdstream.mxx>
+
+#include <bpkg/diagnostics.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bpkg
+{
+ struct fail_git
+ {
+ [[noreturn]] void
+ operator() (const diag_record& r) const
+ {
+ if (verb < 2)
+ r << info << "re-run with -v for more information";
+
+ r << endf;
+ }
+ };
+
+ static const diag_noreturn_end<fail_git> endg;
+
+ static fdpipe
+ open_pipe ()
+ {
+ try
+ {
+ return fdopen_pipe ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to open pipe: " << e << endf;
+ }
+ }
+
+ static auto_fd
+ open_dev_null ()
+ {
+ try
+ {
+ return fdnull ();
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to open null device: " << e << endf;
+ }
+ }
+
+ using opt = optional<const char*>; // Program option.
+
+ static strings
+ timeout_opts (const common_options& co, repository_protocol proto)
+ {
+ if (!co.fetch_timeout_specified ())
+ return strings ();
+
+ switch (proto)
+ {
+ case repository_protocol::http:
+ case repository_protocol::https:
+ {
+ // Git doesn't support the connection timeout option. The options we
+ // use instead are just an approximation of the former, that, in
+ // particular, doesn't cover the connection establishing. Sensing
+ // HTTP(s) smart vs dumb protocol using a fetch utility prior to
+ // running git (see below) will probably mitigate this somewhat.
+ //
+ return strings ({
+ "-c", "http.lowSpeedLimit=1",
+ "-c", "http.lowSpeedTime=" + to_string (co.fetch_timeout ())});
+ }
+ case repository_protocol::git:
+ {
+ warn << "--fetch-timeout is not supported by the git protocol";
+ break;
+ }
+ case repository_protocol::file: return strings (); // Local communications.
+ }
+
+ assert (false); // Can't be here.
+ return strings ();
+ }
+
+ // Start git process.
+ //
+ // Note that git is executed in the "sanitized" environment, having the
+ // environment variables that are local to the repository being unset (all
+ // except GIT_CONFIG_PARAMETERS). We do the same as the git-submodule script
+ // does for commands executed for submodules. Though we do it for all
+ // commands (including the ones related to the top repository).
+ //
+ static optional<strings> unset_vars;
+
+ template <typename O, typename E, typename... A>
+ static process
+ start_git (const common_options& co,
+ O&& out,
+ E&& err,
+ A&&... args)
+ {
+ try
+ {
+ if (!unset_vars)
+ {
+ unset_vars = strings ();
+
+ for (;;) // Breakout loop.
+ {
+ fdpipe pipe (open_pipe ());
+
+ // We assume that non-sanitized git environment can't harm this call.
+ //
+ process pr (start_git (co,
+ pipe, 2 /* stderr */,
+ co.git_option (),
+ "rev-parse",
+ "--local-env-vars"));
+
+ // Shouldn't throw, unless something is severely damaged.
+ //
+ pipe.out.close ();
+
+ try
+ {
+ ifdstream is (move (pipe.in), fdstream_mode::skip);
+
+ while (is.peek () != ifdstream::traits_type::eof ())
+ {
+ string v;
+ getline (is, v);
+
+ if (v != "GIT_CONFIG_PARAMETERS")
+ unset_vars->push_back (move (v));
+ }
+
+ is.close ();
+
+ if (pr.wait ())
+ break;
+
+ // Fall through.
+ }
+ catch (const io_error&)
+ {
+ if (pr.wait ())
+ fail << "unable to read git local environment variables" << endg;
+
+ // Fall through.
+ }
+
+ // We should only get here if the child exited with an error status.
+ //
+ assert (!pr.wait ());
+
+ fail << "unable to list git local environment variables" << endg;
+ }
+ }
+
+ return process_start_callback ([] (const char* const args[], size_t n)
+ {
+ if (verb >= 2)
+ print_process (args, n);
+ },
+ 0 /* stdin */, out, err,
+ process_env (co.git (), *unset_vars),
+ forward<A> (args)...);
+ }
+ catch (const process_error& e)
+ {
+ fail << "unable to execute " << co.git () << ": " << e << endg;
+ }
+ }
+
+ // Run git process.
+ //
+ template <typename... A>
+ static process_exit
+ run_git (const common_options& co, A&&... args)
+ {
+ process pr (start_git (co, 1, 2, forward<A> (args)...));
+ pr.wait ();
+ return *pr.exit;
+ }
+
+ // Run git process and return it's output as a string. Fail if the output
+ // doesn't contain a single line.
+ //
+ template <typename... A>
+ static string
+ git_string (const common_options& co, const char* what, A&&... args)
+ {
+ fdpipe pipe (open_pipe ());
+ process pr (start_git (co, pipe, 2 /* stderr */, forward<A> (args)...));
+ pipe.out.close (); // Shouldn't throw, unless something is severely damaged.
+
+ try
+ {
+ ifdstream is (move (pipe.in), fdstream_mode::skip);
+
+ optional<string> r;
+ if (is.peek () != ifdstream::traits_type::eof ())
+ {
+ string s;
+ getline (is, s);
+
+ if (!is.eof () && is.peek () == ifdstream::traits_type::eof ())
+ r = move (s);
+ }
+
+ is.close ();
+
+ if (pr.wait ())
+ {
+ if (r)
+ return *r;
+
+ fail << "invalid " << what << endg;
+ }
+
+ // Fall through.
+ }
+ catch (const io_error&)
+ {
+ if (pr.wait ())
+ fail << "unable to read " << what << endg;
+
+ // Fall through.
+ }
+
+ // We should only get here if the child exited with an error status.
+ //
+ assert (!pr.wait ());
+
+ fail << "unable to obtain " << what << endg;
+ }
+
+ // Convert the URL object to string representation that is usable in the git
+ // commands. This, in particular, means using file:// (rather than local
+ // path) notation for local URLs.
+ //
+ // Note that cloning the local git repository using the local path notation
+ // disregards --depth option (and issues a warning), creating full copy of
+ // the source repository (copying some files and hard-linking others if
+ // possible). Using --no-local option overrides such an unwanted behavior.
+ // However, this options can not be propagated to submodule--helper's clone
+ // command that we use to clone submodules. So to truncate local submodule
+ // histories we will use the file URL notation for local repositories.
+ //
+ static string
+ git_url (const repository_url& url)
+ {
+ if (url.scheme != repository_protocol::file)
+ return url.string ();
+
+#ifndef _WIN32
+ // Enforce the 'file://' notation for local URLs (see libpkg/manifest.hxx).
+ //
+ repository_url u (url.scheme,
+ repository_url::authority_type (),
+ url.path,
+ url.query);
+
+ return u.string ();
+#else
+ // On Windows the appropriate file notations are:
+ //
+ // file://c:/...
+ // file://c:\...
+ //
+ // Note that none of them conforms to RFC3986. The proper one should be:
+ //
+ // file:///c:/...
+ //
+ // We choose to convert it to the "most conformant" (the first)
+ // representation to ease the fix-up before creating the URL object from
+ // it, when required.
+ //
+ string p (url.path->string ());
+ replace (p.begin (), p.end (), '\\', '/');
+ return "file://" + p;
+#endif
+ }
+
+ // Sense the git protocol capabilities for a specified URL.
+ //
+ // Protocols other than HTTP(S) are considered smart but without the
+ // unadvertised refs (note that this is a pessimistic assumption for
+ // git://).
+ //
+ // For HTTP(S) sense the protocol type by sending the first HTTP request of
+ // the fetch operation handshake and analyzing the first line of the
+ // response. Fail if connecting to the server failed, the response code
+ // differs from 200, or reading the response body failed.
+ //
+ // Note that, as a side-effect, this function checks the HTTP(S) server
+ // availability and so must be called prior to any git command that involves
+ // communication to the remote server. Not doing so may result in the command
+ // hanging indefinitely while trying to establish TCP/IP connection (see the
+ // timeout_opts() function for the gory details).
+ //
+ enum class capabilities
+ {
+ dumb, // No shallow clone support.
+ smart, // Support for shallow clone, but not for unadvertised refs fetch.
+ unadv // Support for shallow clone and for unadvertised refs fetch.
+ };
+
+ static capabilities
+ sense_capabilities (const common_options& co, repository_url url)
+ {
+ assert (url.path);
+
+ switch (url.scheme)
+ {
+ case repository_protocol::git:
+ case repository_protocol::file: return capabilities::smart;
+ case repository_protocol::http:
+ case repository_protocol::https: break; // Ask the server (see below).
+ }
+
+ path& up (*url.path);
+
+ if (!up.to_directory ())
+ up = path_cast<dir_path> (move (up));
+
+ up /= path ("info/refs");
+
+ if (url.query)
+ *url.query += "&service=git-upload-pack";
+ else
+ url.query = "service=git-upload-pack";
+
+ string u (url.string ());
+ process pr (start_fetch (co, u));
+
+ try
+ {
+ // We unset failbit to properly handle an empty response (no refs) from
+ // the dumb server.
+ //
+ ifdstream is (move (pr.in_ofd),
+ fdstream_mode::skip | fdstream_mode::binary,
+ ifdstream::badbit);
+
+ string l;
+ getline (is, l); // Is empty if no refs returned by the dumb server.
+
+ // If the first response line has the following form:
+ //
+ // XXXX# service=git-upload-pack"
+ //
+ // where XXXX is a sequence of 4 hex digits, then the server implements
+ // the smart protocol.
+ //
+ // Note that to consider the server to be "smart" it would make sense
+ // to also check that the response Content-Type header value is
+ // 'application/x-git-upload-pack-advertisement'. However, we will skip
+ // this check in order to not complicate the fetch API.
+ //
+ size_t n (l.size ());
+
+ capabilities r (
+ n >= 4 &&
+ xdigit (l[0]) && xdigit (l[1]) && xdigit (l[2]) && xdigit (l[3]) &&
+ l.compare (4, n - 4, "# service=git-upload-pack") == 0
+ ? capabilities::smart
+ : capabilities::dumb);
+
+ // If the transport is smart let's see it the server also supports
+ // unadvertised refs fetch.
+ //
+ if (r == capabilities::smart && !is.eof ())
+ {
+ getline (is, l);
+
+ // Parse the space-separated list of capabilities that follows the
+ // NULL character.
+ //
+ for (size_t p (l.find ('\0')); p != string::npos; )
+ {
+ size_t e (l.find (' ', ++p));
+ size_t n (e != string::npos ? e - p : e);
+
+ if (l.compare (p, n, "allow-reachable-sha1-in-want") == 0 ||
+ l.compare (p, n, "allow-tip-sha1-in-want") == 0)
+ {
+ r = capabilities::unadv;
+ break;
+ }
+
+ p = e;
+ }
+ }
+
+ is.close ();
+
+ if (pr.wait ())
+ return r;
+
+ // Fall through.
+ }
+ catch (const io_error&)
+ {
+ if (pr.wait ())
+ fail << "unable to read fetched " << url << endg;
+
+ // Fall through.
+ }
+
+ // We should only get here if the child exited with an error status.
+ //
+ assert (!pr.wait ());
+
+ fail << "unable to fetch " << url << endg;
+ }
+
+ // Return true if a commit is advertised by the remote repository. It is
+ // assumed that sense_capabilities() function was already called for the URL.
+ //
+ static bool
+ commit_advertized (const common_options& co,
+ const repository_url& url,
+ const string& commit)
+ {
+ tracer trace ("commit_advertized");
+
+ fdpipe pipe (open_pipe ());
+
+ process pr (start_git (co,
+ pipe, 2 /* stderr */,
+ timeout_opts (co, url.scheme),
+ co.git_option (),
+ "ls-remote",
+ "--refs",
+ git_url (url)));
+
+ pipe.out.close (); // Shouldn't throw, unless something is severely damaged.
+
+ try
+ {
+ bool r (false);
+ ifdstream is (move (pipe.in), fdstream_mode::skip);
+
+ while (is.peek () != ifdstream::traits_type::eof ())
+ {
+ string s;
+ getline (is, s);
+
+ l4 ([&]{trace << "ref: " << s;});
+
+ if (s.compare (0, commit.size (), commit) == 0)
+ {
+ r = true;
+ break;
+ }
+ }
+
+ is.close ();
+
+ if (pr.wait ())
+ return r;
+
+ // Fall through.
+ }
+ catch (const io_error&)
+ {
+ if (pr.wait ())
+ fail << "unable to read references for " << url << endg;
+
+ // Fall through.
+ }
+
+ // We should only get here if the child exited with an error status.
+ //
+ assert (!pr.wait ());
+
+ fail << "unable to list references for " << url << endg;
+ }
+
+ // Return true if the shallow fetch is possible for the reference.
+ //
+ static bool
+ shallow_fetch (const common_options& co,
+ const repository_url& url,
+ capabilities cap,
+ const git_reference& ref)
+ {
+ switch (cap)
+ {
+ case capabilities::dumb:
+ {
+ return false;
+ }
+ case capabilities::smart:
+ {
+ return !ref.commit || commit_advertized (co, url, *ref.commit);
+ }
+ case capabilities::unadv:
+ {
+ return true;
+ }
+ }
+
+ assert (false); // Can't be here.
+ return false;
+ }
+
+ // Return true if a commit is reachable from the tip(s).
+ //
+ // Can be used to avoid redundant fetches.
+ //
+ // Note that git-submodule script implements this check, so it is probably an
+ // important optimization.
+ //
+ static bool
+ commit_reachable (const common_options& co,
+ const dir_path& dir,
+ const string& commit)
+ {
+ fdpipe pipe (open_pipe ());
+ auto_fd dev_null (open_dev_null ());
+
+ process pr (start_git (co,
+ pipe,
+ dev_null,
+ co.git_option (),
+ "-C", dir,
+ "rev-list",
+ "-n", "1",
+ commit,
+ "--not",
+ "--all"));
+
+ // Shouldn't throw, unless something is severely damaged.
+ //
+ pipe.out.close ();
+ dev_null.close ();
+
+ try
+ {
+ ifdstream is (move (pipe.in), fdstream_mode::skip);
+
+ string s;
+ if (is.peek () != ifdstream::traits_type::eof ())
+ getline (is, s);
+
+ is.close ();
+ return pr.wait () && s.empty ();
+ }
+ catch (const io_error&) {}
+ return false;
+ }
+
+ // Print warnings about non-shallow fetching.
+ //
+ static void
+ fetch_warn (capabilities cap,
+ const char* what,
+ const dir_path& submodule = dir_path ())
+ {
+ {
+ diag_record dr (warn);
+ dr << "fetching whole " << what << " history";
+
+ if (!submodule.empty ())
+ dr << " for submodule '" << submodule.posix_string () << "'";
+
+ dr << " ("
+ << (cap == capabilities::dumb
+ ? "dumb HTTP"
+ : "unadvertised commit") // There are no other reasons so far.
+ << ')';
+
+ }
+
+ if (cap == capabilities::dumb)
+ warn << "fetching over dumb HTTP, no progress will be displayed";
+ }
+
+ // Update git index and working tree to match the reference. Fetch if
+ // necessary.
+ //
+ static void
+ update_tree (const common_options& co,
+ const dir_path& dir,
+ const dir_path& submodule, // Is relative to the top project.
+ const git_reference& ref,
+ capabilities cap,
+ bool shallow,
+ const strings& to)
+ {
+ // Don't fetch it the reference is a commit that is reachable from the
+ // tip(s).
+ //
+ if (!(ref.commit && commit_reachable (co, dir, *ref.commit)))
+ {
+ if (!shallow)
+ fetch_warn (cap, ref.commit ? "repository" : "branch", submodule);
+
+ // The clone command prints the following line prior to the progress
+ // lines:
+ //
+ // Cloning into '<dir>'...
+ //
+ // The fetch command doesn't print anything similar, for some reason.
+ // This makes it hard to understand which superproject/submodule is
+ // currently being fetched. Let's fix that.
+ //
+ if (verb != 0)
+ text << "Fetching in '" << dir.posix_string () << "'...";
+
+ // Note that we suppress the (too detailed) fetch command output if the
+ // verbosity level is 1. However, we still want to see the progress in
+ // this case, unless STDERR is not directed to a terminal.
+ //
+ // Also note that we don't need to specify --refmap option since we can
+ // rely on the clone command that properly set the remote.origin.fetch
+ // configuration option.
+ //
+ if (!run_git (co,
+ to,
+ co.git_option (),
+ "-C", dir,
+ "fetch",
+ "--no-recurse-submodules",
+ shallow ? cstrings ({"--depth", "1"}) : cstrings (),
+ verb == 1 && fdterm (2) ? opt ( "--progress") : nullopt,
+ verb < 2 ? opt ("-q") : verb > 3 ? opt ("-v") : nullopt,
+ "origin",
+ ref.commit ? *ref.commit : *ref.branch))
+ fail << "unable to fetch " << dir << endg;
+ }
+
+ const string& commit (ref.commit ? *ref.commit : string ("FETCH_HEAD"));
+
+ // For some (probably valid) reason the hard reset command doesn't remove
+ // a submodule directory that is not plugged into the project anymore. It
+ // also prints the non-suppressible warning like this:
+ //
+ // warning: unable to rmdir libbar: Directory not empty
+ //
+ // That's why we run the clean command afterwards. It may also be helpful
+ // if we produce any untracked files in the tree between fetches down the
+ // road.
+ //
+ if (!run_git (
+ co,
+ co.git_option (),
+ "-C", dir,
+ "reset",
+ "--hard",
+ verb < 2 ? opt ("-q") : nullopt,
+ commit))
+ fail << "unable to reset to " << commit << endg;
+
+ if (!run_git (
+ co,
+ co.git_option (),
+ "-C", dir,
+ "clean",
+ "-d",
+ "-x",
+ "-ff",
+ verb < 2 ? opt ("-q") : nullopt))
+ fail << "unable to clean " << dir << endg;
+ }
+
+ static void
+ update_submodules (const common_options& co,
+ const dir_path& dir,
+ const dir_path& prefix)
+ {
+ tracer trace ("update_submodules");
+
+ auto failure = [&prefix] (const char* desc)
+ {
+ diag_record dr (fail);
+ dr << desc;
+
+ if (!prefix.empty ())
+ // Strips the trailing slash.
+ //
+ dr << " for submodule '" << prefix.string () << "'";
+
+ dr << endg;
+ };
+
+ // Initialize submodules.
+ //
+ if (!run_git (
+ co,
+ co.git_option (),
+ "-C", dir,
+
+ !prefix.empty ()
+ ? strings ({"--super-prefix", prefix.posix_representation ()})
+ : strings (),
+
+ "submodule--helper", "init",
+ verb < 1 ? opt ("-q") : nullopt))
+ failure ("unable to initialize submodules");
+
+ // Iterate over the registered submodules cloning/fetching them and
+ // recursively updating their submodules.
+ //
+ // Note that we don't expect submodules nesting be too deep and so recurse
+ // while reading the git process output.
+ //
+ fdpipe pipe (open_pipe ());
+
+ process pr (start_git (co,
+ pipe, 2 /* stderr */,
+ co.git_option (),
+ "-C", dir,
+ "submodule--helper", "list"));
+
+ pipe.out.close (); // Shouldn't throw, unless something is severely damaged.
+
+ try
+ {
+ ifdstream is (move (pipe.in), fdstream_mode::skip);
+
+ while (is.peek () != ifdstream::traits_type::eof ())
+ {
+ // The line describing a submodule has the following form:
+ //
+ // <mode><SPACE><commit><SPACE><stage><TAB><path>
+ //
+ // For example:
+ //
+ // 160000 658436a9522b5a0d016c3da0253708093607f95d 0 doc/style
+ //
+ string s;
+ getline (is, s);
+
+ l4 ([&]{trace << "submodule: " << s;});
+
+ if (!(s.size () > 50 && s[48] == '0' && s[49] == '\t'))
+ failure ("invalid submodule description");
+
+ string commit (s.substr (7, 40));
+
+ // Submodule directory path, relative to the containing project.
+ //
+ dir_path sdir (s.substr (50));
+
+ // Submodule directory path, relative to the top project.
+ //
+ dir_path psdir (prefix / sdir);
+ string psd (psdir.posix_string ()); // For use in the diagnostics.
+
+ string name (git_string (co, "submodule name",
+ co.git_option (),
+ "-C", dir,
+ "submodule--helper", "name",
+ sdir));
+
+ repository_url url;
+
+ try
+ {
+ string u (git_string (co, "submodule URL",
+ co.git_option (),
+ "-C", dir,
+ "config",
+ "--get",
+ "submodule." + name + ".url"));
+
+ // Fix-up the broken Windows file URL notation (see the git_url()
+ // function for details).
+ //
+#ifdef _WIN32
+ if (casecmp (u, "file://", 7) == 0 && u[7] != '/')
+ u.insert (7, 1, '/');
+#endif
+ url = repository_url (u);
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "invalid repository URL for submodule '" << psd << "': "
+ << e << endg;
+ }
+
+ l4 ([&]{trace << "name: " << name << ", URL: " << url;});
+
+ dir_path fsdir (dir / sdir);
+ bool cloned (exists (fsdir / path (".git")));
+
+ // If the submodule is already cloned and it's commit didn't change
+ // then we skip it.
+ //
+ // Note that git-submodule script still recurse into it for some
+ // unclear reason.
+ //
+ if (cloned && git_string (co, "submodule commit",
+ co.git_option (),
+ "-C", fsdir,
+ "rev-parse",
+ "--verify",
+ "HEAD") == commit)
+ continue;
+
+ git_reference ref {nullopt, commit};
+ capabilities cap (sense_capabilities (co, url));
+ bool shallow (shallow_fetch (co, url, cap, ref));
+ strings to (timeout_opts (co, url.scheme));
+
+ // Clone new submodule.
+ //
+ if (!cloned)
+ {
+ if (!shallow)
+ fetch_warn (cap, "repository", psdir);
+
+ if (!run_git (co,
+ to,
+ co.git_option (),
+ "-C", dir,
+ "submodule--helper", "clone",
+
+ "--name", name,
+ "--path", sdir,
+ "--url", git_url (url),
+ shallow
+ ? cstrings ({"--depth", "1"})
+ : cstrings (),
+ verb < 1 ? opt ("-q") : nullopt))
+ fail << "unable to clone submodule '" << psd << "'" << endg;
+ }
+
+ update_tree (co, fsdir, psdir, ref, cap, shallow, to);
+
+ // Not quite a checkout, but let's make the message match the
+ // git-submodule script output.
+ //
+ if (verb > 0)
+ text << "Submodule path '" << psd << "': checked out '" << commit
+ << "'";
+
+ // Recurse.
+ //
+ // Can throw the failed exception that we don't catch here, relying on
+ // the fact that the process destructor will wait for the process
+ // completion.
+ //
+ update_submodules (co, fsdir, psdir);
+ }
+
+ is.close ();
+
+ if (pr.wait ())
+ return;
+
+ // Fall through.
+ }
+ catch (const io_error&)
+ {
+ if (pr.wait ())
+ failure ("unable to read submodules list");
+
+ // Fall through.
+ }
+
+ // We should only get here if the child exited with an error status.
+ //
+ assert (!pr.wait ());
+
+ failure ("unable to list submodules");
+ }
+
+ // Extract the git reference from the repository URL fragment. Set the URL
+ // fragment to nullopt.
+ //
+ static git_reference
+ parse_reference (repository_url& url, const char* what)
+ {
+ try
+ {
+ git_reference r (git_reference (url.fragment));
+ url.fragment = nullopt;
+ return r;
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "unable to " << what << ' ' << url << ": " << e << endf;
+ }
+ }
+
+ void
+ git_clone (const common_options& co,
+ const repository_location& rl,
+ const dir_path& destdir)
+ {
+ repository_url url (rl.url ());
+ git_reference ref (parse_reference (url, "clone"));
+
+ // All protocols support single branch cloning, so we will always be
+ // cloning a single branch if the branch is specified.
+ //
+ bool single_branch (ref.branch);
+ capabilities cap (sense_capabilities (co, url));
+ bool shallow (shallow_fetch (co, url, cap, ref));
+
+ if (shallow)
+ single_branch = false; // Is implied for shallow cloning.
+ else
+ fetch_warn (cap, single_branch ? "branch" : "repository");
+
+ dir_path d (destdir);
+ d /= dir_path (ref.branch ? *ref.branch : *ref.commit);
+
+ strings to (timeout_opts (co, url.scheme));
+
+ if (!run_git (
+ co,
+ to,
+ "-c", "advice.detachedHead=false",
+ co.git_option (),
+ "clone",
+
+ ref.branch ? strings ({"--branch", *ref.branch}) : strings (),
+ single_branch ? opt ("--single-branch") : nullopt,
+ shallow ? strings ({"--depth", "1"}) : strings (),
+ ref.commit ? opt ("--no-checkout") : nullopt,
+
+ verb < 1 ? opt ("-q") : verb > 3 ? opt ("-v") : nullopt,
+ git_url (url),
+ d))
+ fail << "unable to clone " << url << endg;
+
+ if (ref.commit)
+ update_tree (co, d, dir_path (), ref, cap, shallow, to);
+
+ update_submodules (co, d, dir_path ());
+ }
+
+ void
+ git_fetch (const common_options& co,
+ const repository_location& rl,
+ const dir_path& destdir)
+ {
+ repository_url url (rl.url ());
+ git_reference ref (parse_reference (url, "fetch"));
+
+ // Fetch is noop if the specific commit is checked out.
+ //
+ // What if the user replaces the repository URL with a one with a new
+ // branch/tag/commit? These are not part of the repository name which
+ // means such a repository will have the same hash. But then when we
+ // remove the repository, we will also clean up its state. So seems like
+ // this should work correctly automatically.
+ //
+ if (ref.commit)
+ return;
+
+ assert (ref.branch);
+
+ capabilities cap (sense_capabilities (co, url));
+ bool shallow (shallow_fetch (co, url, cap, ref));
+
+ dir_path d (destdir);
+ d /= dir_path (*ref.branch);
+
+ update_tree (co,
+ d,
+ dir_path (),
+ ref,
+ cap,
+ shallow,
+ timeout_opts (co, url.scheme));
+
+ update_submodules (co, d, dir_path ());
+ }
+}
diff --git a/bpkg/fetch.cxx b/bpkg/fetch.cxx
index 5464c4a..c5366e3 100644
--- a/bpkg/fetch.cxx
+++ b/bpkg/fetch.cxx
@@ -4,14 +4,8 @@
#include <bpkg/fetch.hxx>
-#include <sstream>
-
-#include <libbutl/process.mxx>
#include <libbutl/fdstream.mxx>
-#include <libbutl/filesystem.mxx>
-#include <libbutl/manifest-parser.mxx>
-#include <bpkg/checksum.hxx>
#include <bpkg/diagnostics.hxx>
using namespace std;
@@ -510,11 +504,8 @@ namespace bpkg
return fetch_kind;
}
- // If out is empty, then fetch to STDOUT. In this case also don't
- // show any progress unless we are running verbose.
- //
- static process
- start (const common_options& o, const string& url, const path& out = path ())
+ process
+ start_fetch (const common_options& o, const string& url, const path& out)
{
process (*f) (const path&,
const optional<size_t>&,
@@ -547,254 +538,4 @@ namespace bpkg
throw failed ();
}
}
-
- static path
- fetch_file (const common_options& o,
- const repository_url& u,
- const dir_path& d)
- {
- path r (d / u.path->leaf ());
-
- if (exists (r))
- fail << "file " << r << " already exists";
-
- auto_rmfile arm (r);
- process pr (start (o, u.string (), r));
-
- if (!pr.wait ())
- {
- // While it is reasonable to assuming the child process issued
- // diagnostics, some may not mention the URL.
- //
- fail << "unable to fetch " << u <<
- info << "re-run with -v for more information";
- }
-
- arm.cancel ();
- return r;
- }
-
- template <typename M>
- static pair<M, string/*checksum*/>
- fetch_manifest (const common_options& o,
- const repository_url& u,
- bool ignore_unknown)
- {
- string url (u.string ());
- process pr (start (o, url));
-
- try
- {
- // Unfortunately we cannot read from the original source twice as we do
- // below for files. There doesn't seem to be anything better than reading
- // the entire file into memory and then streaming it twice, once to
- // calculate the checksum and the second time to actually parse. We need
- // to read the original stream in the binary mode for the checksum
- // calculation, then use the binary data to create the text stream for
- // the manifest parsing.
- //
- ifdstream is (move (pr.in_ofd), fdstream_mode::binary);
- stringstream bs (ios::in | ios::out | ios::binary);
-
- // Note that the eof check is important: if the stream is at eof, write
- // will fail.
- //
- if (is.peek () != ifdstream::traits_type::eof ())
- bs << is.rdbuf ();
-
- is.close ();
-
- string s (bs.str ());
- string sha256sum (sha256 (s.c_str (), s.size ()));
-
- istringstream ts (s); // Text mode.
-
- manifest_parser mp (ts, url);
- M m (mp, ignore_unknown);
-
- if (pr.wait ())
- return make_pair (move (m), move (sha256sum));
-
- // Child existed with an error, fall through.
- }
- // Ignore these exceptions if the child process exited with
- // an error status since that's the source of the failure.
- //
- catch (const manifest_parsing& e)
- {
- if (pr.wait ())
- fail (e.name, e.line, e.column) << e.description;
- }
- catch (const io_error&)
- {
- if (pr.wait ())
- fail << "unable to read fetched " << url;
- }
-
- // We should only get here if the child exited with an error status.
- //
- assert (!pr.wait ());
-
- // While it is reasonable to assuming the child process issued
- // diagnostics, some may not mention the URL.
- //
- fail << "unable to fetch " << url <<
- info << "re-run with -v for more information" << endf;
- }
-
- static path
- fetch_file (const path& f, const dir_path& d)
- {
- path r (d / f.leaf ());
-
- try
- {
- cpfile (f, r);
- }
- catch (const system_error& e)
- {
- fail << "unable to copy " << f << " to " << r << ": " << e;
- }
-
- return r;
- }
-
- // If o is nullptr, then don't calculate the checksum.
- //
- template <typename M>
- static pair<M, string/*checksum*/>
- fetch_manifest (const common_options* o,
- const path& f,
- bool ignore_unknown)
- {
- if (!exists (f))
- fail << "file " << f << " does not exist";
-
- try
- {
- // We can not use the same file stream for both calculating the checksum
- // and reading the manifest. The file should be opened in the binary
- // mode for the first operation and in the text mode for the second one.
- //
- string sha256sum;
- if (o != nullptr)
- sha256sum = sha256 (*o, f); // Read file in the binary mode.
-
- ifdstream ifs (f); // Open file in the text mode.
-
- manifest_parser mp (ifs, f.string ());
- return make_pair (M (mp, ignore_unknown), move (sha256sum));
- }
- catch (const manifest_parsing& e)
- {
- fail (e.name, e.line, e.column) << e.description << endf;
- }
- catch (const io_error& e)
- {
- fail << "unable to read from " << f << ": " << e << endf;
- }
- }
-
- static const path repositories ("repositories");
-
- repository_manifests
- fetch_repositories (const dir_path& d, bool iu)
- {
- return fetch_manifest<repository_manifests> (
- nullptr, d / repositories, iu).first;
- }
-
- pair<repository_manifests, string/*checksum*/>
- fetch_repositories (const common_options& o,
- const repository_location& rl,
- bool iu)
- {
- assert (rl.remote () || rl.absolute ());
-
- repository_url u (rl.url ());
-
- path& f (*u.path);
- f /= repositories;
-
- return rl.remote ()
- ? fetch_manifest<repository_manifests> (o, u, iu)
- : fetch_manifest<repository_manifests> (&o, f, iu);
- }
-
- static const path packages ("packages");
-
- package_manifests
- fetch_packages (const dir_path& d, bool iu)
- {
- return fetch_manifest<package_manifests> (nullptr, d / packages, iu).first;
- }
-
- pair<package_manifests, string/*checksum*/>
- fetch_packages (const common_options& o,
- const repository_location& rl,
- bool iu)
- {
- assert (rl.remote () || rl.absolute ());
-
- repository_url u (rl.url ());
-
- path& f (*u.path);
- f /= packages;
-
- return rl.remote ()
- ? fetch_manifest<package_manifests> (o, u, iu)
- : fetch_manifest<package_manifests> (&o, f, iu);
- }
-
- static const path signature ("signature");
-
- signature_manifest
- fetch_signature (const common_options& o,
- const repository_location& rl,
- bool iu)
- {
- assert (rl.remote () || rl.absolute ());
-
- repository_url u (rl.url ());
-
- path& f (*u.path);
- f /= signature;
-
- return rl.remote ()
- ? fetch_manifest<signature_manifest> (o, u, iu).first
- : fetch_manifest<signature_manifest> (nullptr, f, iu).first;
- }
-
- path
- fetch_archive (const common_options& o,
- const repository_location& rl,
- const path& a,
- const dir_path& d)
- {
- assert (!a.empty () && a.relative ());
- assert (rl.remote () || rl.absolute ());
-
- repository_url u (rl.url ());
-
- path& f (*u.path);
- f /= a;
-
- auto bad_loc = [&u] () {fail << "invalid archive location " << u;};
-
- try
- {
- f.normalize ();
-
- if (*f.begin () == "..") // Can be the case for the remote location.
- bad_loc ();
- }
- catch (const invalid_path&)
- {
- bad_loc ();
- }
-
- return rl.remote ()
- ? fetch_file (o, u, d)
- : fetch_file (f, d);
- }
}
diff --git a/bpkg/fetch.hxx b/bpkg/fetch.hxx
index 4e7b271..49e144c 100644
--- a/bpkg/fetch.hxx
+++ b/bpkg/fetch.hxx
@@ -5,6 +5,8 @@
#ifndef BPKG_FETCH_HXX
#define BPKG_FETCH_HXX
+#include <libbutl/process.mxx>
+
#include <libbpkg/manifest.hxx>
#include <bpkg/types.hxx>
@@ -14,32 +16,64 @@
namespace bpkg
{
+ // Repository type bpkg (fetch-bpkg.cxx).
+ //
+
repository_manifests
- fetch_repositories (const dir_path&, bool ignore_unknown);
+ bpkg_fetch_repositories (const dir_path&, bool ignore_unknown);
pair<repository_manifests, string /* checksum */>
- fetch_repositories (const common_options&,
- const repository_location&,
- bool ignore_unknown);
+ bpkg_fetch_repositories (const common_options&,
+ const repository_location&,
+ bool ignore_unknown);
package_manifests
- fetch_packages (const dir_path&, bool ignore_unknown);
+ bpkg_fetch_packages (const dir_path&, bool ignore_unknown);
pair<package_manifests, string /* checksum */>
- fetch_packages (const common_options&,
- const repository_location&,
- bool ignore_unknown);
+ bpkg_fetch_packages (const common_options&,
+ const repository_location&,
+ bool ignore_unknown);
signature_manifest
- fetch_signature (const common_options&,
- const repository_location&,
- bool ignore_unknown);
+ bpkg_fetch_signature (const common_options&,
+ const repository_location&,
+ bool ignore_unknown);
path
- fetch_archive (const common_options&,
- const repository_location&,
- const path& archive,
- const dir_path& destdir);
+ bpkg_fetch_archive (const common_options&,
+ const repository_location&,
+ const path& archive,
+ const dir_path& destdir);
+
+ // Repository type git (fetch-git.cxx).
+ //
+
+ // Clone git repository into destdir/<fragment>/.
+ //
+ void
+ git_clone (const common_options&,
+ const repository_location&,
+ const dir_path& destdir);
+
+ // Fetch git repository in destdir/<fragment>/.
+ //
+ void
+ git_fetch (const common_options&,
+ const repository_location&,
+ const dir_path& destdir);
+
+ // Low-level fetch API (fetch.cxx).
+ //
+
+ // Start the process of fetching the specified URL. If out is empty, then
+ // fetch to STDOUT. In this case also don't show any progress unless we are
+ // running verbose.
+ //
+ butl::process
+ start_fetch (const common_options& o,
+ const string& url,
+ const path& out = path ());
}
#endif // BPKG_FETCH_HXX
diff --git a/bpkg/package.hxx b/bpkg/package.hxx
index 00a54d6..322fb2b 100644
--- a/bpkg/package.hxx
+++ b/bpkg/package.hxx
@@ -200,25 +200,28 @@ namespace bpkg
// repository_location
//
-
#pragma db value
struct _repository_location
{
- string url;
+ repository_url url;
repository_type type;
};
- // Note that the type() call fails for an empty repository location.
- //
- #pragma db map type(repository_location) as(_repository_location) \
- to({(?).string (), \
- (?).empty () ? bpkg::repository_type::bpkg : (?).type ()}) \
- from(bpkg::repository_location ((?).url, (?).type))
+ #pragma db map type(repository_url) as(string) \
+ to((?).string ()) \
+ from(bpkg::repository_url (?))
#pragma db map type(repository_type) as(string) \
to(to_string (?)) \
from(bpkg::to_repository_type (?))
+ // Note that the type() call fails for an empty repository location.
+ //
+ #pragma db map type(repository_location) as(_repository_location) \
+ to({(?).url (), \
+ (?).empty () ? bpkg::repository_type::bpkg : (?).type ()}) \
+ from(bpkg::repository_location (std::move ((?).url), (?).type))
+
// repository
//
#pragma db object pointer(shared_ptr) session
@@ -629,8 +632,9 @@ namespace bpkg
// certificate
//
// Information extracted from a repository X.509 certificate. The actual
- // certificate is stored on disk as .bpkg/certs/<fingerprint>.pem (we have
- // to store it as a file because that's the only way to pass it to openssl).
+ // certificate is stored on disk as .bpkg/certificates/<fingerprint>.pem (we
+ // have to store it as a file because that's the only way to pass it to
+ // openssl).
//
// If a repository is not authenticated (has no certificate/signature,
// called unauth from now on), then we ask for the user's confirmation and
diff --git a/bpkg/pkg-fetch.cxx b/bpkg/pkg-fetch.cxx
index c2a6644..26f17d2 100644
--- a/bpkg/pkg-fetch.cxx
+++ b/bpkg/pkg-fetch.cxx
@@ -216,7 +216,9 @@ namespace bpkg
text << "fetching " << pl->location.leaf () << " "
<< "from " << pl->repository->name;
- path a (fetch_archive (co, pl->repository->location, pl->location, c));
+ path a (
+ bpkg_fetch_archive (co, pl->repository->location, pl->location, c));
+
auto_rmfile arm (a);
// We can't be fetching an archive for a transient object.
diff --git a/bpkg/rep-create.cxx b/bpkg/rep-create.cxx
index 173e2b0..60f7f59 100644
--- a/bpkg/rep-create.cxx
+++ b/bpkg/rep-create.cxx
@@ -185,7 +185,9 @@ namespace bpkg
// Load the 'repositories' file to make sure it is there and
// is valid.
//
- repository_manifests rms (fetch_repositories (d, o.ignore_unknown ()));
+ repository_manifests rms (
+ bpkg_fetch_repositories (d, o.ignore_unknown ()));
+
l4 ([&]{trace << rms.size () - 1 << " prerequisite repository(s)";});
// While we could have serialized as we go along, the order of
diff --git a/bpkg/rep-fetch.cxx b/bpkg/rep-fetch.cxx
index f53919f..5566114 100644
--- a/bpkg/rep-fetch.cxx
+++ b/bpkg/rep-fetch.cxx
@@ -4,6 +4,8 @@
#include <bpkg/rep-fetch.hxx>
+#include <libbutl/sha256.mxx>
+
#include <bpkg/auth.hxx>
#include <bpkg/fetch.hxx>
#include <bpkg/package.hxx>
@@ -26,7 +28,7 @@ namespace bpkg
// certificate.
//
pair<repository_manifests, string /* checksum */> rmc (
- fetch_repositories (co, rl, ignore_unknown));
+ bpkg_fetch_repositories (co, rl, ignore_unknown));
repository_manifests& rms (rmc.first);
@@ -46,7 +48,7 @@ namespace bpkg
// we just fetched.
//
pair<package_manifests, string /* checksum */> pmc (
- fetch_packages (co, rl, ignore_unknown));
+ bpkg_fetch_packages (co, rl, ignore_unknown));
package_manifests& pms (pmc.first);
@@ -58,7 +60,7 @@ namespace bpkg
if (a)
{
signature_manifest sm (
- fetch_signature (co, rl, true /* ignore_unknown */));
+ bpkg_fetch_signature (co, rl, true /* ignore_unknown */));
if (sm.sha256sum != pmc.second)
fail << "packages manifest file checksum mismatch for "
@@ -73,12 +75,104 @@ namespace bpkg
}
static rep_fetch_data
- rep_fetch_git (const common_options&,
- const dir_path*,
- const repository_location&,
- bool)
+ rep_fetch_git (const common_options& co,
+ const dir_path* conf,
+ const repository_location& rl,
+ bool /* ignore_unknown */)
{
- fail << "not implemented" << endf;
+ // Plan:
+ //
+ // 1. Check repos_dir/<hash>/:
+ //
+ // 1.a If does not exist, git-clone into temp_dir/<hash>/.
+ //
+ // 1.a Otherwise, move as temp_dir/<hash>/ and git-fetch.
+ //
+ // 2. Move from temp_dir/<hash>/ to repos_dir/<hash>/
+ //
+ // 3. Load manifest from repos_dir/<hash>/<fragment>/
+ //
+ // 4. Run 'b info' in repos_dir/<hash>/<fragment>/ and fix-up
+ // package version.
+ //
+ // 5. Synthesize repository manifest.
+ //
+ // 6. Return repository and package manifest (certificate is NULL).
+ //
+ // Notes:
+ //
+ // - Should we truncate sha256 hash? Maybe to 16 chars (this is what we
+ // use for abbreviated git commit id in the version module). Also in
+ // auth? Add abbreviated_string(size_t) to sha1 and sha256 classes?
+ //
+ // @@ If to truncate hash for auth, we would still need to store the full
+ // fingerprint in the certificate object as rep-info needs it to print.
+ // Leaving the certificate unchanged and truncating fingerprint on the
+ // fly for the file naming seems wrong (good to have the certificate
+ // file name to match the id). Probably it makes sense to make the
+ // certificate as follows:
+ //
+ // class certificate
+ // {
+ // public:
+ // string id; // SHA256 fingerprint truncated to 16 characters.
+ //
+ // string fingerprint; // Fingerprint canonical representation.
+ // ...
+ // };
+ //
+ // Yes, sounds good.
+ //
+ //
+ //
+
+ if (conf != nullptr && conf->empty ())
+ conf = dir_exists (bpkg_dir) ? &current_dir : nullptr;
+
+ assert (conf == nullptr || !conf->empty ());
+
+ dir_path h (sha256 (rl.canonical_name ()).abbreviated_string (16));
+
+ auto_rmdir rm (temp_dir / h);
+ dir_path& td (rm.path);
+
+ if (exists (td))
+ rm_r (td);
+
+ // If the git repository directory already exists, then we are fetching
+ // an already cloned repository. Move it to the temporary directory.
+ //
+ dir_path rd;
+ bool fetch (false);
+ if (conf != nullptr)
+ {
+ rd = *conf / repos_dir / h;
+
+ if (exists (rd))
+ {
+ mv (rd, td);
+ fetch = true;
+ }
+ }
+
+ if (fetch)
+ git_fetch (co, rl, td);
+ else
+ git_clone (co, rl, td);
+
+ if (!rd.empty ())
+ mv (td, rd);
+ else
+ // If there is no configuration directory then we leave the repository
+ // in the temporary directory.
+ //
+ rd = move (td);
+
+ rm.cancel ();
+
+ // @@ TODO
+ //
+ return rep_fetch_data ();
}
rep_fetch_data
diff --git a/bpkg/utility.cxx b/bpkg/utility.cxx
index f32daa0..75ba102 100644
--- a/bpkg/utility.cxx
+++ b/bpkg/utility.cxx
@@ -22,22 +22,25 @@ namespace bpkg
const dir_path empty_dir_path;
const dir_path bpkg_dir (".bpkg");
- const dir_path certs_dir (dir_path (bpkg_dir) /= "certs");
+ const dir_path certs_dir (dir_path (bpkg_dir) /= "certificates");
+ const dir_path repos_dir (dir_path (bpkg_dir) /= "repositories");
- static dir_path tmp_dir_;
+ const dir_path current_dir (".");
+
+ dir_path temp_dir;
auto_rmfile
tmp_file (const string& p)
{
- assert (!tmp_dir_.empty ());
- return auto_rmfile (tmp_dir_ / path::traits::temp_name (p));
+ assert (!temp_dir.empty ());
+ return auto_rmfile (temp_dir / path::traits::temp_name (p));
}
auto_rmdir
tmp_dir (const string& p)
{
- assert (!tmp_dir_.empty ());
- return auto_rmdir (tmp_dir_ / dir_path (path::traits::temp_name (p)));
+ assert (!temp_dir.empty ());
+ return auto_rmdir (temp_dir / dir_path (path::traits::temp_name (p)));
}
void
@@ -56,16 +59,16 @@ namespace bpkg
mk (d); // We shouldn't need mk_p().
- tmp_dir_ = move (d);
+ temp_dir = move (d);
}
void
clean_tmp (bool ignore_error)
{
- if (!tmp_dir_.empty ())
+ if (!temp_dir.empty ())
{
- rm_r (tmp_dir_, true /* dir_itself */, 3, ignore_error);
- tmp_dir_.clear ();
+ rm_r (temp_dir, true /* dir_itself */, 3, ignore_error);
+ temp_dir.clear ();
}
}
@@ -213,6 +216,22 @@ namespace bpkg
}
}
+ void
+ mv (const dir_path& from, const dir_path& to)
+ {
+ if (verb >= 3)
+ text << "mv " << from << " to " << to; // Prints trailing slashes.
+
+ try
+ {
+ mvdir (from, to);
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to move directory " << from << " to " << to << ": " << e;
+ }
+ }
+
dir_path exec_dir;
void
diff --git a/bpkg/utility.hxx b/bpkg/utility.hxx
index f4f8690..05835b0 100644
--- a/bpkg/utility.hxx
+++ b/bpkg/utility.hxx
@@ -52,8 +52,10 @@ namespace bpkg
// Widely-used paths.
//
- extern const dir_path bpkg_dir; // .bpkg/
- extern const dir_path certs_dir; // .bpkg/certs/
+ extern const dir_path bpkg_dir; // .bpkg/
+ extern const dir_path certs_dir; // .bpkg/certificates/
+ extern const dir_path repos_dir; // .bpkg/repositories/
+ extern const dir_path current_dir; // ./
// Temporary directory facility.
//
@@ -63,6 +65,8 @@ namespace bpkg
// you don't need to call init_tmp() explicitly except for certain special
// commands (like cfg-create).
//
+ extern dir_path temp_dir;
+
auto_rmfile
tmp_file (const string& prefix);
@@ -109,6 +113,9 @@ namespace bpkg
uint16_t verbosity = 3,
bool ignore_error = false);
+ void
+ mv (const dir_path& from, const dir_path& to);
+
// Process.
//
// By default the process command line is printed for verbosity >= 2