From f2f08e0758243a820fe47128ffabaa474c0e86e7 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 13 Apr 2018 23:38:12 +0300 Subject: Implement git repository handling transition (phase 0) --- bpkg/fetch-git.cxx | 1084 ++++++++++++++++++++++++++++++++----------------- bpkg/fetch.hxx | 42 +- bpkg/package.cxx | 6 + bpkg/package.hxx | 4 +- bpkg/pkg-checkout.cxx | 39 +- bpkg/rep-add.cli | 53 ++- bpkg/rep-fetch.cxx | 156 ++++--- bpkg/rep-remove.cxx | 9 +- 8 files changed, 918 insertions(+), 475 deletions(-) (limited to 'bpkg') diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx index c22bafb..fba1c8e 100644 --- a/bpkg/fetch-git.cxx +++ b/bpkg/fetch-git.cxx @@ -4,9 +4,8 @@ #include -#ifdef _WIN32 -# include // replace() -#endif +#include +#include // find(), find_if(), replace() #include // digit(), xdigit() #include @@ -340,7 +339,81 @@ namespace bpkg u.insert (7, 1, '/'); #endif - return repository_url (u); + repository_url r (u); + + path& up (*r.path); + + if (!up.to_directory ()) + up = path_cast (move (up)); + + return r; + } + + // Get/set the repository configuration option. + // + inline static string + config_get (const common_options& co, + const dir_path& dir, + const string& key, + const char* what) + { + return git_string (co, + what, + co.git_option (), + "-C", dir, + "config", + "--get", + key); + } + + inline static void + config_set (const common_options& co, + const dir_path& dir, + const string& key, + const string& value) + { + run_git (co, co.git_option (), "-C", dir, "config", key, value); + } + + // Get option from the specified configuration file. + // + inline static string + config_get (const common_options& co, + const path& file, + const string& key, + const char* what) + { + return git_string (co, + what, + co.git_option (), + "config", + "--file", file, + "--get", + key); + } + + // Get/set the repository remote URL. + // + static repository_url + origin_url (const common_options& co, const dir_path& dir) + { + try + { + return from_git_url ( + config_get (co, dir, "remote.origin.url", "repository remote URL")); + } + catch (const invalid_argument& e) + { + fail << "invalid remote.origin.url configuration value: " << e << endg; + } + } + + inline static void + origin_url (const common_options& co, + const dir_path& dir, + const repository_url& url) + { + config_set (co, dir, "remote.origin.url", to_git_url (url)); } // Sense the git protocol capabilities for a specified URL. @@ -476,16 +549,121 @@ namespace bpkg fail << "unable to fetch " << url << endg; } - // Return true if a commit is advertised by the remote repository. It is - // assumed that sense_capabilities() function was already called for the URL. + // A git ref (tag, branch, etc) and its commit id (i.e., one line of the + // git-ls-remote output). // - static bool - commit_advertized (const common_options& co, - const repository_url& url, - const string& commit) + struct ref + { + string name; // Note: without the peel marker ('^{}'). + string commit; + bool peeled; // True for '...^{}' references. + }; + + // List of all refs and their commit ids advertized by a repository (i.e., + // the git-ls-remote output). + // + class refs: public vector + { + public: + // Resolve a git refname (tag, branch, etc) returning NULL if not found. + // + const ref* + find_name (const string& n, bool abbr_commit) const + { + auto find = [this] (const string& n) -> const ref* + { + auto i (find_if ( + begin (), end (), + [&n] (const ref& i) + { + return !i.peeled && i.name == n; // Note: skip peeled. + })); + + return i != end () ? &*i : nullptr; + }; + + // Let's search in the order refs are disambiguated by git (see + // gitrevisions(7) for details). + // + // What should we do if the name already contains a slash, for example, + // tags/v1.2.3? Note that while it most likely is relative to refs/, it + // can also be relative to other subdirectories (say releases/v1.2.3 in + // tags/releases/v1.2.3). So we just check everywhere. + + // This handles symbolic references like HEAD. + // + if (const ref* r = find (n)) + return r; + + if (const ref* r = find ("refs/" + n)) + return r; + + if (const ref* r = find ("refs/tags/" + n)) + return r; + + if (const ref* r = find ("refs/heads/" + n)) + return r; + + // See if this is an abbreviated commit id. We do this check last since + // this can be ambiguous. We also don't bother checking strings shorter + // than 7 characters (git default). + // + return abbr_commit && n.size () >= 7 ? find_commit (n) : nullptr; + } + + // Resolve (potentially abbreviated) commit id returning NULL if not + // found and failing if the resolution is ambiguous. + // + const ref* + find_commit (const string& c) const + { + const ref* r (nullptr); + size_t n (c.size ()); + + for (const ref& rf: *this) + { + if (rf.commit.compare (0, n, c) == 0) + { + if (r == nullptr) + r = &rf; + + // Note that different names can refer to the same commit. + // + else if (r->commit != rf.commit) + fail << "abbreviated commit id " << c << " is ambiguous" << + info << "candidate: " << r->commit << + info << "candidate: " << rf.commit; + } + } + + return r; + } + }; + + // Map of repository URLs to their advertized refs/commits. + // + using repository_refs_map = map; + + static repository_refs_map repository_refs; + + // It is assumed that sense_capabilities() function was already called for + // the URL. + // + static const refs& + load_refs (const common_options& co, const repository_url& url) { - tracer trace ("commit_advertized"); + tracer trace ("load_refs"); + string u (url.string ()); + auto i (repository_refs.find (u)); + + if (i != repository_refs.end ()) + return i->second; + + if (verb) + text << "querying " << url; + + refs rs; fdpipe pipe (open_pipe ()); process pr (start_git (co, @@ -493,47 +671,128 @@ namespace bpkg timeout_opts (co, url.scheme), co.git_option (), "ls-remote", - "--refs", to_git_url (url))); - pipe.out.close (); // Shouldn't throw, unless something is severely damaged. + // Shouldn't throw, unless something is severely damaged. + // + pipe.out.close (); - try + for (;;) // Breakout loop. { - bool r (false); - ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); - - for (string l; !eof (getline (is, l)); ) + try { - l4 ([&]{trace << "ref: " << l;}); + ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); - if (l.compare (0, commit.size (), commit) == 0) + for (string l; !eof (getline (is, l)); ) { - r = true; - break; + l4 ([&]{trace << "ref line: " << l;}); + + size_t n (l.find ('\t')); + + if (n == string::npos) + fail << "unable to parse references for " << url << endg; + + string cm (l, 0, n); + string nm (l, n + 1); + + n = nm.rfind ("^{"); + bool peeled (n != string::npos); + + if (peeled) + nm.resize (n); // Strip the peel operation ('^{...}'). + + rs.push_back (ref {move (nm), move (cm), peeled}); } + + is.close (); + + if (pr.wait ()) + break; + + // Fall through. } + catch (const io_error&) + { + if (pr.wait ()) + fail << "unable to read references for " << url << endg; - is.close (); + // Fall through. + } - if (pr.wait ()) - return r; + // We should only get here if the child exited with an error status. + // + assert (!pr.wait ()); - // Fall through. + fail << "unable to list references for " << url << endg; } - catch (const io_error&) - { - if (pr.wait ()) - fail << "unable to read references for " << url << endg; - // Fall through. - } + return repository_refs.emplace (move (u), move (rs)).first->second; + } - // We should only get here if the child exited with an error status. + // Return true if a commit is advertised by the remote repository. It is + // assumed that sense_capabilities() function was already called for the URL. + // + static bool + commit_advertized (const common_options& co, + const repository_url& url, + const string& commit) + { + return load_refs (co, url).find_commit (commit) != nullptr; + } + + // Return true if a commit is already fetched. + // + static bool + commit_fetched (const common_options& co, + const dir_path& dir, + const string& commit) + { + auto_fd dev_null (open_dev_null ()); + + process pr (start_git (co, + 1, // The output is suppressed by -e. + dev_null, + co.git_option (), + "-C", dir, + "cat-file", + "-e", + commit + "^{commit}")); + + // Shouldn't throw, unless something is severely damaged. // - assert (!pr.wait ()); + dev_null.close (); + return pr.wait (); + } + + // Create an empty repository and configure the remote origin URL and the + // default fetch refspec. If requested, use a separate git directory, + // creating it if absent. + // + static void + init (const common_options& co, + const dir_path& dir, + const repository_url& url, + const dir_path& git_dir = dir_path ()) + { + if (!run_git ( + co, + co.git_option (), + "init", + + !git_dir.empty () + ? strings ({"--separate-git-dir=" + git_dir.string ()}) + : strings (), + + verb < 2 ? opt ("-q") : nullopt, + dir)) + fail << "unable to init " << dir << endg; + + origin_url (co, dir, url); - fail << "unable to list references for " << url << endg; + config_set (co, + dir, + "remote.origin.fetch", + "+refs/heads/*:refs/remotes/origin/*"); } // Return true if the shallow fetch is possible for the reference. @@ -542,7 +801,7 @@ namespace bpkg shallow_fetch (const common_options& co, const repository_url& url, capabilities cap, - const git_reference& ref) + const git_ref_filter& rf) { switch (cap) { @@ -552,7 +811,7 @@ namespace bpkg } case capabilities::smart: { - return !ref.commit || commit_advertized (co, url, *ref.commit); + return !rf.commit || commit_advertized (co, url, *rf.commit); } case capabilities::unadv: { @@ -564,123 +823,177 @@ namespace bpkg return false; } - // Return true if a commit is reachable from the tip(s). + // @@ Move to libbpkg when adding support for multiple fragments. // - // Can be used to avoid redundant fetches. - // - // Note that git-submodule script implements this check, so it is probably an - // important optimization. + using git_ref_filters = vector; + + // Fetch the references and return their commit ids. If there are no + // reference filters specified, then fetch all the tags and branches. // - static bool - commit_reachable (const common_options& co, - const dir_path& dir, - const string& commit) + static strings + fetch (const common_options& co, + const dir_path& dir, + const dir_path& submodule, // Used only for diagnostics. + const git_ref_filters& rfs) { - fdpipe pipe (open_pipe ()); - auto_fd dev_null (open_dev_null ()); + strings r; + capabilities cap; + repository_url url; - process pr (start_git (co, - pipe, - dev_null, - co.git_option (), - "-C", dir, - "rev-list", - "-n", "1", - commit, - "--not", - "--all")); + strings scs; // Shallow fetch commits. + strings dcs; // Deep fetch commits. - // Shouldn't throw, unless something is severely damaged. - // - pipe.out.close (); - dev_null.close (); + bool fetch_repo (false); - try + // Translate a name to the corresponding commit. Fail if the name is not + // known by the remote repository. + // + auto commit = [&co, &dir, &url] (const string& nm, bool abbr_commit) + -> const string& { - ifdstream is (move (pipe.in), fdstream_mode::skip); + if (url.empty ()) + url = origin_url (co, dir); - string s; - if (is.peek () != ifdstream::traits_type::eof ()) - getline (is, s); + const ref* r (load_refs (co, url).find_name (nm, abbr_commit)); - is.close (); - return pr.wait () && s.empty (); - } - catch (const io_error&) {} - return false; - } + if (r == nullptr) + fail << "unable to fetch " << nm << " from " << url << + info << "name is not recognized" << endg; - // Print warnings about non-shallow fetching. - // - static void - fetch_warn (capabilities cap, - const char* what, - const dir_path& submodule = dir_path ()) - { + return r->commit; + }; + + // Add a commit to the list, suppressing duplicates. + // + auto add = [] (const string& c, strings& cs) { - diag_record dr (warn); - dr << "fetching whole " << what << " history"; + if (find (cs.begin (), cs.end (), c) == cs.end ()) + cs.push_back (c); + }; - if (!submodule.empty ()) - dr << " for submodule '" << submodule.posix_string () << "'"; + if (rfs.empty ()) + { + url = origin_url (co, dir); - dr << " (" - << (cap == capabilities::dumb - ? "dumb HTTP" - : "unadvertised commit") // There are no other reasons so far. - << ')'; + for (const ref& rf: load_refs (co, url)) + { + // Skip everything other than tags and branches. + // + if (rf.peeled || + (rf.name.compare (0, 11, "refs/heads/") != 0 && + rf.name.compare (0, 10, "refs/tags/") != 0)) + continue; - } + // Add an already fetched commit to the resulting list. + // + if (commit_fetched (co, dir, rf.commit)) + { + add (rf.commit, r); + continue; + } - if (cap == capabilities::dumb) - warn << "fetching over dumb HTTP, no progress will be shown"; - } + // Evaluate if the commit can be obtained with the shallow fetch and + // add it to the proper list. + // + if (scs.empty () && dcs.empty ()) + cap = sense_capabilities (co, url); - // Update git index and working tree to match the reference. Fetch if - // necessary. - // - static void - update_tree (const common_options& co, - const dir_path& dir, - const dir_path& submodule, // Is relative to the top project. - const git_reference& ref, - capabilities cap, - bool shallow, - const strings& to) - { - // Don't fetch it the reference is a commit that is reachable from the - // tip(s). - // - if (!(ref.commit && commit_reachable (co, dir, *ref.commit))) + // The commit is advertised, so we can fetch shallow, unless the + // protocol is dumb. + // + add (rf.commit, cap != capabilities::dumb ? scs : dcs); + } + } + else { - if (!shallow) - fetch_warn (cap, ref.commit ? "repository" : "branch", submodule); + for (const git_ref_filter& rf: rfs) + { + // Add an already fetched commit to the resulting list. + // + if (rf.commit && commit_fetched (co, dir, *rf.commit)) + { + add (*rf.commit, r); + continue; + } - // The clone command prints the following line prior to the progress - // lines: - // - // Cloning into ''... - // - // The fetch command doesn't print anything similar, for some reason. - // This makes it hard to understand which superproject/submodule is - // currently being fetched. Let's fix that. - // - // Note that we have "fixed" that capital letter nonsense (hoping that - // git-clone will do the same at some point). - // - if (verb != 0) - text << "fetching in '" << dir.posix_string () << "'..."; + if (!rf.commit) + { + assert (rf.name); + const string& c (commit (*rf.name, true /* abbr_commit */)); + + if (commit_fetched (co, dir, c)) + { + add (c, r); + continue; + } + } + + // Evaluate if the commit can be obtained with the shallow fetch and + // add it to the proper list. + // + if (scs.empty () && dcs.empty ()) + { + if (url.empty ()) // Can already be assigned by commit() lambda. + url = origin_url (co, dir); + + cap = sense_capabilities (co, url); + } + + bool shallow (shallow_fetch (co, url, cap, rf)); + strings& commits (shallow ? scs : dcs); + + // If commit is not specified, then we fetch the commit the refname + // translates to. + // + if (!rf.commit) + { + assert (rf.name); + + add (commit (*rf.name, true /* abbr_commit */), commits); + } + + // If commit is specified and the shallow fetch is possible, then we + // fetch the commit. + // + else if (shallow) + add (*rf.commit, commits); + + // If commit is specified and the shallow fetch is not possible, but + // the refname containing the commit is specified, then we fetch the + // whole refname history. + // + else if (rf.name) + add (commit (*rf.name, false /* abbr_commit */), commits); + + // Otherwise, if the refname is not specified and the commit is not + // advertised, we have to fetch the whole repository history. + // + else + { + add (*rf.commit, commits); + fetch_repo = !commit_advertized (co, url, *rf.commit); + } + } + } + + // Bail out if all commits are already fetched. + // + if (scs.empty () && dcs.empty ()) + return r; + + auto fetch = [&co, &url, &dir] (const strings& refspecs, bool shallow) + { // Note that we suppress the (too detailed) fetch command output if the // verbosity level is 1. However, we still want to see the progress in // this case, unless STDERR is not directed to a terminal. // // Also note that we don't need to specify --refmap option since we can - // rely on the clone command that properly set the remote.origin.fetch - // configuration option. + // rely on the init() function that properly sets the + // remote.origin.fetch configuration option. // if (!run_git (co, - to, + timeout_opts (co, url.scheme), co.git_option (), "-C", dir, "fetch", @@ -689,50 +1002,113 @@ namespace bpkg verb == 1 && fdterm (2) ? opt ( "--progress") : nullopt, verb < 2 ? opt ("-q") : verb > 3 ? opt ("-v") : nullopt, "origin", - ref.commit ? *ref.commit : *ref.branch)) + refspecs)) fail << "unable to fetch " << dir << endg; + }; + + // Print warnings prior to the deep fetching. + // + if (!dcs.empty ()) + { + { + diag_record dr (warn); + dr << "fetching whole " << (fetch_repo ? "repository" : "reference") + << " history"; + + if (!submodule.empty ()) + dr << " for submodule '" << submodule.posix_string () << "'"; + + dr << " (" + << (cap == capabilities::dumb + ? "dumb HTTP" + : "unadvertised commit") // There are no other reasons so far. + << ')'; + } + + if (cap == capabilities::dumb) + warn << "fetching over dumb HTTP, no progress will be shown"; } - const string& commit (ref.commit ? *ref.commit : string ("FETCH_HEAD")); + // Print the progress indicator. + // + // Note that the clone command prints the following line prior to the + // progress lines: + // + // Cloning into ''... + // + // The fetch command doesn't print anything similar, for some reason. + // This makes it hard to understand which superproject/submodule is + // currently being fetched. Let's fix that. + // + // Also note that we have "fixed" that capital letter nonsense and stripped + // the trailing '...'. + // + if (verb) + { + diag_record dr (text); + dr << "fetching "; + + if (!submodule.empty ()) + dr << "submodule '" << submodule.posix_string () << "' "; - // For some (probably valid) reason the hard reset command doesn't remove - // a submodule directory that is not plugged into the project anymore. It - // also prints the non-suppressible warning like this: + dr << "from " << url; + + if (verb >= 2) + dr << " in '" << dir.posix_string () << "'"; // Is used by tests. + } + + // Note that the shallow, deep and the resulting lists don't overlap. + // Thus, we will be moving commits between the lists not caring about + // suppressing duplicates. // - // warning: unable to rmdir libbar: Directory not empty + + // First, we fetch deep commits. // - // That's why we run the clean command afterwards. It may also be helpful - // if we produce any untracked files in the tree between fetches down the - // road. + if (!dcs.empty ()) + { + fetch (!fetch_repo ? dcs : strings (), false); + + r.insert (r.end (), + make_move_iterator (dcs.begin ()), + make_move_iterator (dcs.end ())); + } + + // After the deep fetching some of the shallow commits might also be + // fetched, so we move them to the resulting list. // - if (!run_git ( - co, - co.git_option (), - "-C", dir, - "reset", - "--hard", - verb < 2 ? opt ("-q") : nullopt, - commit)) - fail << "unable to reset to " << commit << endg; + strings cs; + for (auto& c: scs) + (commit_fetched (co, dir, c) ? r : cs).push_back (move (c)); - if (!run_git ( - co, - co.git_option (), - "-C", dir, - "clean", - "-d", - "-x", - "-ff", - verb < 2 ? opt ("-q") : nullopt)) - fail << "unable to clean " << dir << endg; + // Finally, fetch shallow commits. + // + if (!cs.empty ()) + { + fetch (cs, true); + + r.insert (r.end (), + make_move_iterator (cs.begin ()), + make_move_iterator (cs.end ())); + } + + return r; } + // Checkout the repository submodules (see git_checkout_submodules() + // description for details). + // static void - update_submodules (const common_options& co, - const dir_path& dir, - const dir_path& prefix) + checkout_submodules (const common_options& co, + const dir_path& dir, + const dir_path& git_dir, + const dir_path& prefix) { - tracer trace ("update_submodules"); + tracer trace ("checkout_submodules"); + + path mf (dir / path (".gitmodules")); + + if (!exists (mf)) + return; auto failure = [&prefix] (const char* desc) { @@ -759,15 +1135,20 @@ namespace bpkg : strings (), "submodule--helper", "init", - verb < 1 ? opt ("-q") : nullopt)) + verb < 2 ? opt ("-q") : nullopt)) failure ("unable to initialize submodules"); - // Iterate over the registered submodules cloning/fetching them and - // recursively updating their submodules. + repository_url orig_url (origin_url (co, dir)); + + // Iterate over the registered submodules initializing/fetching them and + // recursively checking them out. // // Note that we don't expect submodules nesting be too deep and so recurse // while reading the git process output. // + // Also note that we don't catch the failed exception here, relying on the + // fact that the process destructor will wait for the process completion. + // fdpipe pipe (open_pipe ()); process pr (start_git (co, @@ -776,7 +1157,9 @@ namespace bpkg "-C", dir, "submodule--helper", "list")); - pipe.out.close (); // Shouldn't throw, unless something is severely damaged. + // Shouldn't throw, unless something is severely damaged. + // + pipe.out.close (); try { @@ -808,92 +1191,119 @@ namespace bpkg dir_path psdir (prefix / sdir); string psd (psdir.posix_string ()); // For use in the diagnostics. - string name (git_string (co, "submodule name", - co.git_option (), - "-C", dir, - "submodule--helper", "name", - sdir)); + string nm (git_string (co, "submodule name", + co.git_option (), + "-C", dir, + "submodule--helper", "name", + sdir)); + + string uo ("submodule." + nm + ".url"); + string uv (config_get (co, dir, uo, "submodule URL")); + l4 ([&]{trace << "name: " << nm << ", URL: " << uv;}); + + dir_path fsdir (dir / sdir); + bool initialized (exists (fsdir / path (".git"))); + + // If the submodule is already initialized and its commit didn't + // change then we skip it. + // + if (initialized && git_string (co, "submodule commit", + co.git_option (), + "-C", fsdir, + "rev-parse", + "--verify", + "HEAD") == commit) + continue; + + // Note that the "submodule--helper init" command (see above) doesn't + // sync the submodule URL in .git/config file with the one in + // .gitmodules file, that is a primary URL source. Thus, we always + // calculate the URL using .gitmodules and update it in .git/config, if + // necessary. + // repository_url url; try { - url = from_git_url (git_string (co, "submodule URL", - co.git_option (), - "-C", dir, - "config", - "--get", - "submodule." + name + ".url")); + url = from_git_url ( + config_get (co, mf, uo, "submodule original URL")); + + // Complete the relative submodule URL against the containing + // repository origin URL. + // + if (url.scheme == repository_protocol::file && url.path->relative ()) + { + repository_url u (orig_url); + *u.path /= *url.path; + + // Note that we need to collapse 'example.com/a/..' to + // 'example.com/', rather than to 'example.com/.'. + // + u.path->normalize ( + false /* actual */, + orig_url.scheme != repository_protocol::file /* cur_empty */); + + url = move (u); + } + + // Fix-up submodule URL in .git/config file, if required. + // + if (url != from_git_url (move (uv))) + { + config_set (co, dir, uo, to_git_url (url)); + + // We also need to fix-up submodule's origin URL, if its + // repository is already initialized. + // + if (initialized) + origin_url (co, fsdir, url); + } } catch (const invalid_argument& e) { fail << "invalid repository URL for submodule '" << psd << "': " << e << endg; } + catch (const invalid_path& e) + { + fail << "invalid repository path for submodule '" << psd << "': " + << e << endg; + } - l4 ([&]{trace << "name: " << name << ", URL: " << url;}); - - dir_path fsdir (dir / sdir); - bool cloned (exists (fsdir / path (".git"))); - - // If the submodule is already cloned and it's commit didn't change - // then we skip it. + // Initialize the submodule repository. // - // Note that git-submodule script still recurse into it for some - // unclear reason. + // Note that we initialize the submodule repository git directory out + // of the working tree, the same way as "submodule--helper clone" + // does. This prevents us from loosing the fetched data when switching + // the containing repository between revisions, that potentially + // contain different sets of submodules. // - if (cloned && git_string (co, "submodule commit", - co.git_option (), - "-C", fsdir, - "rev-parse", - "--verify", - "HEAD") == commit) - continue; + dir_path gdir (git_dir / dir_path ("modules") / sdir); - git_reference ref {nullopt, commit}; - capabilities cap (sense_capabilities (co, url)); - bool shallow (shallow_fetch (co, url, cap, ref)); - strings to (timeout_opts (co, url.scheme)); - - // Clone new submodule. - // - if (!cloned) + if (!initialized) { - if (!shallow) - fetch_warn (cap, "repository", psdir); - - if (!run_git (co, - to, - co.git_option (), - "-C", dir, - "submodule--helper", "clone", - - "--name", name, - "--path", sdir, - "--url", to_git_url (url), - shallow - ? cstrings ({"--depth", "1"}) - : cstrings (), - verb < 1 ? opt ("-q") : nullopt)) - fail << "unable to clone submodule '" << psd << "'" << endg; + mk_p (gdir); + init (co, fsdir, url, gdir); } - update_tree (co, fsdir, psdir, ref, cap, shallow, to); + // Fetch and checkout the submodule. + // + git_ref_filters rfs {git_ref_filter {nullopt, commit}}; + fetch (co, fsdir, psdir, rfs); + + git_checkout (co, fsdir, commit); - // Not quite a checkout, but let's make the message match the - // git-submodule script output (again, except for capitalization). + // Let's make the message match the git-submodule script output + // (again, except for capitalization). // - if (verb > 0) + if (verb) text << "submodule path '" << psd << "': checked out '" << commit << "'"; - // Recurse. + // Check out the submodule submodules, recursively. // - // Can throw the failed exception that we don't catch here, relying on - // the fact that the process destructor will wait for the process - // completion. - // - update_submodules (co, fsdir, psdir); + checkout_submodules (co, fsdir, gdir, psdir); } is.close (); @@ -918,169 +1328,107 @@ namespace bpkg failure ("unable to list submodules"); } - // Extract the git reference from the repository URL fragment. Set the URL - // fragment to nullopt. - // - static git_reference - parse_reference (repository_url& url, const char* what) + void + git_init (const common_options& co, + const repository_location& rl, + const dir_path& dir) + { + repository_url url (rl.url ()); + url.fragment = nullopt; + + init (co, dir, url); + } + + strings + git_fetch (const common_options& co, + const repository_location& rl, + const dir_path& dir) { + git_ref_filters refs; + repository_url url (rl.url ()); + + if (url.fragment) try { - git_reference r (git_reference (url.fragment)); + refs.emplace_back (*url.fragment); url.fragment = nullopt; - return r; } catch (const invalid_argument& e) { - fail << "unable to " << what << ' ' << url << ": " << e << endf; + fail << "unable to fetch " << url << ": " << e; } - } - - // Produce a repository directory name for the specified git reference. - // - // Truncate commit id-based directory names to shorten absolute directory - // paths, lowering the probability of hitting the limit on Windows. - // - // Note that we can't truncate them for branches/tags as chances to clash - // would be way higher than for commit ids. Though such names are normally - // short anyway. - // - static inline dir_path - repository_dir (const git_reference& ref) - { - return dir_path (ref.commit ? ref.commit->substr (0, 16) : *ref.branch); - } - - dir_path - git_clone (const common_options& co, - const repository_location& rl, - const dir_path& destdir) - { - repository_url url (rl.url ()); - git_reference ref (parse_reference (url, "clone")); - // All protocols support single branch cloning, so we will always be - // cloning a single branch if the branch is specified. + // Update the repository URL, if changed. // - bool single_branch (ref.branch); - capabilities cap (sense_capabilities (co, url)); - bool shallow (shallow_fetch (co, url, cap, ref)); - - if (shallow) - single_branch = false; // Is implied for shallow cloning. - else - fetch_warn (cap, single_branch ? "branch" : "repository"); - - dir_path r (repository_dir (ref)); - dir_path d (destdir / r); + repository_url u (origin_url (co, dir)); - strings to (timeout_opts (co, url.scheme)); - - if (!run_git ( - co, - to, - "-c", "advice.detachedHead=false", - co.git_option (), - "clone", - - ref.branch ? strings ({"--branch", *ref.branch}) : strings (), - single_branch ? opt ("--single-branch") : nullopt, - shallow ? strings ({"--depth", "1"}) : strings (), - ref.commit ? opt ("--no-checkout") : nullopt, + if (url != u) + { + // Note that the repository canonical name can not change under the + // legal scenarios that lead to the location change. Changed canonical + // name means that the repository was manually amended. We could fix-up + // such repositories as well but want to leave the backdoor for tests. + // + u.fragment = rl.url ().fragment; // Restore the fragment. + repository_location l (u, rl.type ()); - verb < 1 ? opt ("-q") : verb > 3 ? opt ("-v") : nullopt, - to_git_url (url), - d)) - fail << "unable to clone " << url << endg; + if (rl.canonical_name () == l.canonical_name ()) + { + if (verb) + info << "location changed for " << rl.canonical_name () << + info << "new location " << rl << + info << "old location " << l; - if (ref.commit) - update_tree (co, d, dir_path (), ref, cap, shallow, to); + origin_url (co, dir, url); + } + } - update_submodules (co, d, dir_path ()); - return r; + return fetch (co, dir, dir_path () /* submodule */, refs); } - dir_path - git_fetch (const common_options& co, - const repository_location& rl, - const dir_path& destdir) + void + git_checkout (const common_options& co, + const dir_path& dir, + const string& commit) { - repository_url url (rl.url ()); - git_reference ref (parse_reference (url, "fetch")); - - dir_path r (repository_dir (ref)); - - // Fetch is noop if the specific commit is checked out. - // - // What if the user replaces the repository URL with a one with a new - // branch/tag/commit? These are not part of the repository name which - // means such a repository will have the same hash. But then when we - // remove the repository, we will also clean up its state. So seems like - // this should work correctly automatically. + // For some (probably valid) reason the hard reset command doesn't remove + // a submodule directory that is not plugged into the project anymore. It + // also prints the non-suppressible warning like this: // - if (ref.commit) - return r; - - assert (ref.branch); - - dir_path d (destdir); - d /= dir_path (*ref.branch); - - // If the repository location differs from the one that was used to clone - // the repository then we re-clone it from the new location. + // warning: unable to rmdir libbar: Directory not empty // - // Another (more hairy) way of doing this would be fixing up the remote - // origin URLs recursively prior to fetching. + // That's why we run the clean command afterwards. It may also be helpful + // if we produce any untracked files in the tree between checkouts down + // the road. // - try - { - repository_url u (from_git_url (git_string (co, "remote repository URL", - co.git_option (), - "-C", d, - "config", - "--get", - "remote.origin.url"))); - if (u != url) - { - // Note that the repository canonical name can not change under the - // legal scenarios that lead to the location change. Changed canonical - // name means that the repository was manually amended. We could - // re-clone such repositories as well but want to leave the backdoor - // for tests. - // - u.fragment = rl.url ().fragment; // Restore the fragment. - repository_location l (u, rl.type ()); - - if (rl.canonical_name () == l.canonical_name ()) - { - if (verb) - info << "re-cloning " << rl.canonical_name () - << " due to location change" << - info << "new location " << rl << - info << "old location " << l; - - rm_r (d); - return git_clone (co, rl, destdir); - } - } - } - catch (const invalid_argument& e) - { - fail << "invalid remote.origin.url configuration value: " << e << endg; - } - - capabilities cap (sense_capabilities (co, url)); - bool shallow (shallow_fetch (co, url, cap, ref)); + if (!run_git ( + co, + co.git_option (), + "-C", dir, + "reset", + "--hard", + verb < 2 ? opt ("-q") : nullopt, + commit)) + fail << "unable to reset to " << commit << endg; - update_tree (co, - d, - dir_path (), - ref, - cap, - shallow, - timeout_opts (co, url.scheme)); + if (!run_git ( + co, + co.git_option (), + "-C", dir, + "clean", + "-d", + "-x", + "-ff", + verb < 2 ? opt ("-q") : nullopt)) + fail << "unable to clean " << dir << endg; + } - update_submodules (co, d, dir_path ()); - return r; + void + git_checkout_submodules (const common_options& co, const dir_path& dir) + { + checkout_submodules (co, + dir, + dir / dir_path (".git"), + dir_path () /* prefix */); } } diff --git a/bpkg/fetch.hxx b/bpkg/fetch.hxx index e784cb6..9530455 100644 --- a/bpkg/fetch.hxx +++ b/bpkg/fetch.hxx @@ -49,21 +49,41 @@ namespace bpkg // Repository type git (fetch-git.cxx). // - // Clone git repository into destdir//. Return the cloned repository - // directory name that was deduced from the repository URL fragment. + // Create a git repository in the specified directory and prepare it for + // fetching from the specified repository location. Note that the repository + // URL fragment is neither used nor validated. // - dir_path - git_clone (const common_options&, - const repository_location&, - const dir_path& destdir); - - // Fetch git repository in destdir//. Return the fetched repository - // directory name that was deduced from the repository URL fragment. + void + git_init (const common_options&, + const repository_location&, + const dir_path&); + + // Fetch a git repository in the specifid directory (previously created by + // git_init()) for the references obtained from the repository URL fragment + // returning commit ids these references resolve to. After fetching the + // repository working tree state is unspecified (see git_checkout ()). // - dir_path + // Note that submodules are not fetched. + // + strings git_fetch (const common_options&, const repository_location&, - const dir_path& destdir); + const dir_path&); + + // Checkout the specified commit previously fetched by git_fetch(). + // + // Note that submodules are not checked out. + // + void + git_checkout (const common_options&, + const dir_path&, + const string& commit); + + // Fetch (if necessary) and checkout submodules, recursively, in a working + // tree previously checked out by git_checkout(). + // + void + git_checkout_submodules (const common_options&, const dir_path&); // Low-level fetch API (fetch.cxx). // diff --git a/bpkg/package.cxx b/bpkg/package.cxx index 08e71f9..2880368 100644 --- a/bpkg/package.cxx +++ b/bpkg/package.cxx @@ -65,6 +65,12 @@ namespace bpkg const auto& ps (r->prerequisites); const auto& cs (r->complements); + // @@ The same repository can be present in the location set multiple times + // with different fragment values. Given that we may traverse the same + // repository tree multiple times, which is inefficient but harmless. + // Let's leave it this way for now as it likely to be changed with + // adding support for repository fragment objects. + // for (const package_location& pl: ap->locations) { const lazy_shared_ptr& lr (pl.repository); diff --git a/bpkg/package.hxx b/bpkg/package.hxx index f0bb6fd..5b0db8c 100644 --- a/bpkg/package.hxx +++ b/bpkg/package.hxx @@ -935,7 +935,7 @@ namespace bpkg // Return a list of packages available from this repository. // - #pragma db view object(repository) \ + #pragma db view object(repository) query(distinct) \ table("available_package_locations" = "pl" inner: \ "pl.repository = " + repository::name) \ object(available_package = package inner: \ @@ -956,7 +956,7 @@ namespace bpkg // Return a list of repositories the packages come from. // - #pragma db view object(repository) \ + #pragma db view object(repository) query(distinct) \ table("available_package_locations" = "pl" inner: \ "pl.repository = " + repository::name) \ object(available_package = package inner: \ diff --git a/bpkg/pkg-checkout.cxx b/bpkg/pkg-checkout.cxx index 6524730..1759991 100644 --- a/bpkg/pkg-checkout.cxx +++ b/bpkg/pkg-checkout.cxx @@ -6,6 +6,7 @@ #include +#include // git_checkout*() #include #include #include @@ -22,6 +23,38 @@ using namespace butl; namespace bpkg { + static void + checkout (const common_options& o, + const repository_location& rl, + const dir_path& dir, + const string& fragment, + const shared_ptr& ap) + { + switch (rl.type ()) + { + case repository_type::git: + { + git_checkout (o, dir, fragment); + + if (exists (dir / path (".gitmodules"))) + { + // Print the progress indicator to attribute the possible fetching + // progress. + // + if (verb) + text << "checking out " + << package_string (ap->id.name, ap->version); + + git_checkout_submodules (o, dir); + } + + break; + } + case repository_type::pkg: + case repository_type::dir: assert (false); break; + } + } + shared_ptr pkg_checkout (const common_options& o, const dir_path& c, @@ -106,10 +139,10 @@ namespace bpkg // working tree so all we need to do is distribute it to the package // directory. // - dir_path sd (c / repos_dir); + dir_path sd (c / repos_dir / repository_state (rl)); + + checkout (o, rl, sd, pl->fragment, ap); - sd /= repository_state (rl); - sd /= dir_path (pl->fragment); sd /= path_cast (pl->location); // Verify the package prerequisites are all configured since the dist diff --git a/bpkg/rep-add.cli b/bpkg/rep-add.cli index fc69f77..a5ede84 100644 --- a/bpkg/rep-add.cli +++ b/bpkg/rep-add.cli @@ -72,36 +72,45 @@ namespace bpkg as there are commits. Practically, however, we are normally only interested in a small subset of them while fetching and processing the necessary information for all of them could be prohibitively expensive. - As a result, a \cb{git} repository URL must include the fragment - component that restricts the set of versions to consider as available. - While in the future it will be possible to specify multiple available - versions, currently the fragment must identify a single version using one - of the following forms: + As a result, by default, only advertised tags (\cb{refs/tags/*}) and + branches (\cb{refs/heads/*}) are considered to be sources of useful + package versions (see \cb{git-ls-remote(1)} for details). + + It is also possible to narrow down (or expand) the set of available + versions by specifying one or more references and/or commit ids in the + repository URL fragment. For example: \ - # - # - # - #@ + https://example.com/foo.git#v1.2.3 + https://example.com/foo.git#master + https://example.com/foo.git#af234f56 + https://example.com/foo.git#HEAD,tags/v1.2.3,heads/feature1 \ - In the last form is only used to minimize the amount of data to - fetch. If specified, then the commit must belong to its history. + A \cb{git} repository URL fragment must be a comma-separated list of + reference filters in the following form: - No abbreviated commit ids are supported and must be the - complete, 40-characters SHA1 string. A 40-characters fragment that - consists of only hexadecimal digits is assumed to be a commit id and a - branch/tag otherwise. In an unlikely case this produces an incorrect - result, the last form with omitted can be used. + \c{[\i{refname}][\b{@}\i{commit}]} - Below are some examples of \cb{git} repository URLs: + Either \ci{refname}, \ci{commit}, or both must be specified. If both are + specified then \ci{refname} is only used to minimize the amount of data + to fetch and \ci{commit} must belong to its history. For example: \ - https://example.com/foo.git#v1.2.3 - https://example.com/foo.git#master - https://example.com/foo.git#48fba3625d65941bb85a39061bcf795d4949c778 - https://example.com/foo.git#bar@48fba3625d65941bb85a39061bcf795d4949c778 - https://example.com/foo.git#deadbeefdeadbeefdeadbeefdeadbeefdeadbeef@ + .../foo.git#master@48fba3625d65941bb85a39061bcf795d4949c778 + \ + + The \ci{refname} part can be a tag, branch, an abbreviated commit id, or + some other advertised reference under \cb{refs/}. While \ci{commit} must + be the complete, 40-characters SHA1 that need not be advertised. For + convenience, a 40-characters filter that consists of only hexadecimal + digits is assumed to be \ci{commit} even if not prefixed with \cb{@}. In + an unlikely event this produces an incorrect result, the \cb{@}-form with + omitted \ci{commit} can be used. For example: + + \ + .../foo.git#48fba3625d65941bb85a39061bcf795d4949c778 (commit id) + .../foo.git#deadbeefdeadbeefdeadbeefdeadbeefdeadbeef@ (reference) \ A \cb{git} repository has the same structure and manifest files as the diff --git a/bpkg/rep-fetch.cxx b/bpkg/rep-fetch.cxx index 6d37f9f..26a1dec 100644 --- a/bpkg/rep-fetch.cxx +++ b/bpkg/rep-fetch.cxx @@ -281,45 +281,11 @@ namespace bpkg const repository_location& rl, bool ignore_unknown) { - // Plan: - // - // 1. Check repos_dir//: - // - // 1.a If does not exist, git-clone into temp_dir///. - // - // 1.a Otherwise, move as temp_dir// and git-fetch. - // - // 2. Move from temp_dir// to repos_dir/// - // - // 3. Check if repos_dir///repositories.manifest exists: - // - // 3.a If exists, load. - // - // 3.b Otherwise, synthesize repository list with base repository. - // - // 4. Check if repos_dir///packages.manifest exists: - // - // 4.a If exists, load. (into "skeleton" packages list to be filled?) - // - // 4.b Otherwise, synthesize as if single 'location: ./'. - // - // 5. For each package location obtained on step 4: - // - // 5.a Load repos_dir////manifest. - // - // 5.b Run 'b info: repos_dir////' and fix-up - // package version. - // - // 6. Return repository and package manifests (certificate is NULL). - // - if (conf != nullptr && conf->empty ()) conf = dir_exists (bpkg_dir) ? ¤t_dir : nullptr; assert (conf == nullptr || !conf->empty ()); - // Clone or fetch the repository. - // dir_path sd (repository_state (rl)); auto_rmdir rm (temp_dir / sd); @@ -329,17 +295,19 @@ namespace bpkg rm_r (td); // If the git repository directory already exists, then we are fetching - // an already cloned repository. Move it to the temporary directory. + // an already existing repository, moved to the temporary directory first. + // Otherwise, we initialize the repository in the temporary directory. // - // In this case also set the filesystem_state_changed flag since we are - // modifying the repository filesystem state. + // In the first case also set the filesystem_state_changed flag since we + // are modifying the repository filesystem state. // // In the future we can probably do something smarter about the flag, // keeping it unset unless the repository state directory is really // changed. // dir_path rd; - bool fetch (false); + bool init (true); + if (conf != nullptr) { rd = *conf / repos_dir / sd; @@ -348,34 +316,95 @@ namespace bpkg { mv (rd, td); filesystem_state_changed = true; - fetch = true; + init = false; } } - dir_path nm (fetch ? git_fetch (co, rl, td) : git_clone (co, rl, td)); - dir_path fd (td / nm); // Full directory path. + // Initialize a new repository in the temporary directory. + // + if (init) + git_init (co, rl, td); - // Parse manifests. + // Fetch the repository in the temporary directory. // - git_repository_manifests rms ( - parse_repository_manifests ( - fd / repositories_file, - ignore_unknown, - rl)); + strings commits (git_fetch (co, rl, td)); + assert (!commits.empty ()); - git_package_manifests pms ( - parse_directory_manifests ( - fd / packages_file, - ignore_unknown, - rl)); + // Go through fetched commits, checking them out and collecting the + // prerequisite repositories and packages. + // + // For each checked out commit: + // + // - If repositories.manifest file doesn't exist, then synthesize the + // repository list with just the base repository. + // + // - Save the repositories into the resulting list. + // + // @@ Currently we just save ones from the first commit, assuming them + // to be the same for others. However, this is not very practical + // and must be fixed. + // + // - If packages.manifest file exists then load it into the "skeleton" + // packages list. Otherwise, synthesize it with the single: + // + // location: ./ + // + // - If any of the package locations point to non-existent directory, then + // assume it to be in a submodule and checkout submodules, recursively. + // + // - For each package location parse the package manifest and add it to + // the resulting list. + // + git_repository_manifests rms; + vector fps; - vector fps ( - parse_package_manifests (co, - fd, - nm.string (), - move (pms), - ignore_unknown, - rl)); + for (const string& c: commits) + { + git_checkout (co, td, c); + + // Parse repository manifests. + // + if (rms.empty ()) + rms = parse_repository_manifests ( + td / repositories_file, + ignore_unknown, + rl); + + // Parse package skeleton manifests. + // + git_package_manifests pms ( + parse_directory_manifests ( + td / packages_file, + ignore_unknown, + rl)); + + // Checkout submodules, if required. + // + for (const package_manifest& sm: pms) + { + dir_path d (td / path_cast (*sm.location)); + + if (!exists (d) || empty (d)) + { + git_checkout_submodules (co, td); + break; + } + } + + // Parse package manifests. + // + vector cps ( + parse_package_manifests (co, + td, + c, + move (pms), + ignore_unknown, + rl)); + + fps.insert (fps.end (), + make_move_iterator (cps.begin ()), + make_move_iterator (cps.end ())); + } // Move the state directory to its proper place. // @@ -389,7 +418,7 @@ namespace bpkg filesystem_state_changed = true; } - return rep_fetch_data {move (rms), move (fps), nullptr}; + return rep_fetch_data {move (rms), move (fps), nullptr /* certificate */}; } rep_fetch_data @@ -740,9 +769,8 @@ namespace bpkg } } - // This repository shouldn't already be in the location set since - // that would mean it has already been loaded and we shouldn't be - // here. + // Note that the repository may already be present in the location set + // multiple times with different fragments. // p->locations.push_back ( package_location {lazy_shared_ptr (db, r), @@ -799,7 +827,7 @@ namespace bpkg rep_remove (conf, t, r); // Finally, make sure that the external packages are available from a - // single repository. + // single directory-based repository. // // Sort the packages by name and version. This way the external packages // with the same upstream version and revision will be adjacent. diff --git a/bpkg/rep-remove.cxx b/bpkg/rep-remove.cxx index f161c71..fafe5a8 100644 --- a/bpkg/rep-remove.cxx +++ b/bpkg/rep-remove.cxx @@ -82,13 +82,12 @@ namespace bpkg const shared_ptr& p (rp); vector& ls (p->locations); - for (auto i (ls.cbegin ()), e (ls.cend ()); i != e; ++i) + for (auto i (ls.cbegin ()); i != ls.cend (); ) { if (i->repository.object_id () == name) - { - ls.erase (i); - break; - } + i = ls.erase (i); + else + ++i; } if (ls.empty ()) -- cgit v1.1