From 780290277a51853b2e515b16898ca0fcfa1e9e71 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 26 Feb 2018 22:00:22 +0300 Subject: Update rep-fetch --- bpkg/rep-fetch.cxx | 315 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 238 insertions(+), 77 deletions(-) (limited to 'bpkg/rep-fetch.cxx') diff --git a/bpkg/rep-fetch.cxx b/bpkg/rep-fetch.cxx index 3956c5a..3bede3c 100644 --- a/bpkg/rep-fetch.cxx +++ b/bpkg/rep-fetch.cxx @@ -4,15 +4,19 @@ #include +#include + #include #include // operator<<(ostream, process_path) #include #include #include +#include #include #include #include +#include #include #include @@ -21,6 +25,22 @@ using namespace butl; namespace bpkg { + // The fetch operation failure may result in mismatch of the (rolled back) + // repository database state and the repository filesystem state. Restoring + // the filesystem state on failure would require making copies which seems + // unnecessarily pessimistic. So instead, we will revert the repository + // state to the clean state as if repositories were added but never fetched + // (see rep_remove_clean() for more details). + // + // The following flag is set by the rep_fetch_*() functions when they are + // about to change the repository filesystem state. That, in particular, + // means that the flag will be set even if the subsequent fetch operation + // fails, and so the caller can rely on it while handling the thrown + // exception. The flag must be reset by such a caller prior to the + // rep_fetch_*() call. + // + static bool filesystem_state_changed; + static rep_fetch_data rep_fetch_bpkg (const common_options& co, const dir_path* conf, @@ -157,6 +177,14 @@ namespace bpkg auto_rmdir rm (temp_dir / sd); dir_path& td (rm.path); + // We are about to modify the repository filesystem state. + // + // In the future we can probably do something smarter about the flag, + // keeping it unset unless the repository state directory is really + // changed. + // + filesystem_state_changed = true; + if (exists (td)) rm_r (td); @@ -389,21 +417,40 @@ namespace bpkg return rep_fetch_data (); } + using repositories = set>; + static void - rep_fetch (const configuration_options& co, - transaction& t, + rep_fetch (const common_options& co, + const dir_path& conf, + database& db, const shared_ptr& r, - const shared_ptr& root, - const string& reason) + repositories& fetched, + repositories& removed, + const string& reason = string ()) { tracer trace ("rep_fetch(rep)"); - database& db (t.database ()); tracer_guard tg (db, trace); + // Check that the repository is not fetched yet and register it as fetched + // otherwise. + // + // Note that we can end up with a repository dependency cycle via + // prerequisites. Thus we register the repository before recursing into its + // dependencies. + // + if (!fetched.insert (r).second) // Is already fetched. + return; + const repository_location& rl (r->location); l4 ([&]{trace << r->name << " " << rl;}); - assert (rl.absolute () || rl.remote ()); + + // Cancel the repository removal. + // + // Note that this is an optimization as the rep_remove() function checks + // for reachability of the repository being removed. + // + removed.erase (r); // The fetch_*() functions below will be quiet at level 1, which // can be quite confusing if the download hangs. @@ -414,23 +461,45 @@ namespace bpkg dr << "fetching " << r->name; - const auto& ua (root->complements); - - if (ua.find (lazy_shared_ptr (db, r)) == ua.end ()) - { - assert (!reason.empty ()); + if (!reason.empty ()) dr << " (" << reason << ")"; - } } - r->fetched = true; // Mark as being fetched. + // Register complements and prerequisites for potential removal unless + // they are fetched. Clear repository dependency sets afterwards. + // + auto remove = [&fetched, &removed] (const lazy_shared_ptr& rp) + { + shared_ptr r (rp.load ()); + if (fetched.find (r) == fetched.end ()) + removed.insert (move (r)); + }; - // Load the repositories and packages and use it to populate the + for (const lazy_shared_ptr& cr: r->complements) + { + // Remove the complement unless it is the root repository (see + // rep_fetch() for details). + // + if (cr.object_id () != "") + remove (cr); + } + + for (const lazy_weak_ptr& pr: r->prerequisites) + remove (lazy_shared_ptr (pr)); + + r->complements.clear (); + r->prerequisites.clear (); + + // Remove this repository from locations of the available packages it + // contains. + // + rep_remove_package_locations (db, r->name); + + // Load the repository and package manifests and use them to populate the // prerequisite and complement repository sets as well as available // packages. // - rep_fetch_data rfd ( - rep_fetch (co, &co.directory (), rl, true /* ignore_unknow */)); + rep_fetch_data rfd (rep_fetch (co, &conf, rl, true /* ignore_unknow */)); for (repository_manifest& rm: rfd.repositories) { @@ -439,51 +508,53 @@ namespace bpkg if (rr == repository_role::base) continue; // Entry for this repository. + repository_location& l (rm.location); + // If the location is relative, complete it using this repository // as a base. // - if (rm.location.relative ()) + if (l.relative ()) { try { - rm.location = repository_location (rm.location, rl); + l = repository_location (l, rl); } catch (const invalid_argument& e) { - fail << "invalid relative repository location '" << rm.location + fail << "invalid relative repository location '" << l << "': " << e << info << "base repository location is " << rl; } } - // We might already have this repository in the database. + // Create the new repository if it is not in the database yet. Otherwise + // update its location. // - shared_ptr pr ( - db.find ( - rm.location.canonical_name ())); + shared_ptr pr (db.find (l.canonical_name ())); if (pr == nullptr) { - pr = make_shared (move (rm.location)); + pr = make_shared (move (l)); db.persist (pr); // Enter into session, important if recursive. } + else if (pr->location.url () != l.url ()) + { + pr->location = move (l); + db.update (r); + } - // Load the prerequisite repository unless it has already been - // (or is already being) fetched. + // Load the prerequisite repository. // - if (!pr->fetched) + string reason; + switch (rr) { - string reason; - switch (rr) - { - case repository_role::complement: reason = "complements "; break; - case repository_role::prerequisite: reason = "prerequisite of "; break; - case repository_role::base: assert (false); - } - reason += r->name; - - rep_fetch (co, t, pr, root, reason); + case repository_role::complement: reason = "complements "; break; + case repository_role::prerequisite: reason = "prerequisite of "; break; + case repository_role::base: assert (false); } + reason += r->name; + + rep_fetch (co, conf, db, pr, fetched, removed, reason); // @@ What if we have duplicated? Ideally, we would like to check // this once and as early as possible. The original idea was to @@ -530,7 +601,11 @@ namespace bpkg if (rl.type () == repository_type::git && r->complements.empty () && r->prerequisites.empty ()) - r->complements.insert (lazy_shared_ptr (db, root)); + r->complements.insert (lazy_shared_ptr (db, string ())); + + // Save the changes to the repository object. + // + db.update (r); // "Suspend" session while persisting packages to reduce memory // consumption. @@ -600,70 +675,156 @@ namespace bpkg } session::current (s); // "Resume". + } - // Save the changes to the repository object. + static void + rep_fetch (const common_options& o, + const dir_path& conf, + transaction& t, + const vector>& repos) + { + database& db (t.database ()); + + // As a fist step we fetch repositories recursively building the list of + // the former prerequisites and complements to be considered for removal. // - db.update (r); + // We delay the actual removal until we fetch all the required repositories + // as a dependency dropped by one repository can appear for another one. + // + try + { + // If fetch fails and the repository filesystem state is changed, then + // the configuration is broken, and we have to take some drastic + // measures (see below). + // + filesystem_state_changed = false; + + repositories fetched; + repositories removed; + + for (const lazy_shared_ptr& r: repos) + rep_fetch (o, conf, db, r.load (), fetched, removed); + + // Finally, remove dangling repositories. + // + for (const shared_ptr& r: removed) + rep_remove (conf, db, r); + } + catch (const failed&) + { + t.rollback (); + + if (filesystem_state_changed) + { + // Warn prior to the cleanup operation that potentially can also fail. + // Note that we assume that the diagnostics has already been issued. + // + warn << "repository state is now broken and will be cleaned up" << + info << "run 'bpkg rep-fetch' to update"; + + rep_remove_clean (conf, db); + } + + throw; + } + } + + void + rep_fetch (const common_options& o, + const dir_path& conf, + database& db, + const vector& rls) + { + vector> repos; + repos.reserve (rls.size ()); + + transaction t (db.begin ()); + + shared_ptr root (db.load ("")); + repository::complements_type& ua (root->complements); // User-added repos. + + for (const repository_location& rl: rls) + { + lazy_shared_ptr r (db, rl.canonical_name ()); + + // Add the repository, unless it is already a top-level one and has the + // same location. + // + if (ua.find (r) == ua.end () || r.load ()->location.url () != rl.url ()) + rep_add (db, rl); + + repos.emplace_back (r); + } + + rep_fetch (o, conf, t, repos); + + t.commit (); } int - rep_fetch (const rep_fetch_options& o, cli::scanner&) + rep_fetch (const rep_fetch_options& o, cli::scanner& args) { tracer trace ("rep_fetch"); dir_path c (o.directory ()); l4 ([&]{trace << "configuration: " << c;}); + // Build the list of repositories the user wants to fetch. + // + vector> repos; + database db (open (c, trace)); transaction t (db.begin ()); session s; // Repository dependencies can have cycles. shared_ptr root (db.load ("")); - const auto& ua (root->complements); // User-added repositories. + repository::complements_type& ua (root->complements); // User-added repos. - if (ua.empty ()) - fail << "configuration " << c << " has no repositories" << - info << "use 'bpkg rep-add' to add a repository"; - - // Clean repositories and available packages. At the end only - // repositories that were explicitly added by the user and the - // special root repository should remain. - // - db.erase_query (); + if (!args.more ()) + { + if (ua.empty ()) + fail << "configuration " << c << " has no repositories" << + info << "use 'bpkg rep-add' to add a repository"; - for (shared_ptr r: pointer_result (db.query ())) + for (const lazy_shared_ptr& r: ua) + repos.push_back (r); + } + else { - if (r == root) - { - l5 ([&]{trace << "skipping root";}); - } - else if (ua.find (lazy_shared_ptr (db, r)) != ua.end ()) + while (args.more ()) { - l4 ([&]{trace << "cleaning " << r->name;}); + // Try to map the argument to a user-added repository. + // + // If this is a repository name then it must be present in the + // configuration. If this is a repository location then we add it to + // the configuration. + // + lazy_shared_ptr r; + string a (args.next ()); - r->complements.clear (); - r->prerequisites.clear (); - r->fetched = false; - db.update (r); - } - else - { - l4 ([&]{trace << "erasing " << r->name;}); - db.erase (r); - } - } + if (repository_name (a)) + { + lazy_shared_ptr rp (db, a); - // Now recursively fetch prerequisite/complement repositories and - // their packages. - // - for (const lazy_shared_ptr& lp: ua) - { - shared_ptr r (lp.load ()); + if (ua.find (rp) != ua.end ()) + r = move (rp); + else + fail << "repository '" << a << "' does not exist in this " + << "configuration"; + } + else + //@@ TODO: check if exists in root & same location and avoid + // calling rep_add. Get rid of quiet mode. + // + r = lazy_shared_ptr ( + db, rep_add (db, parse_location (a, nullopt /* type */))); - if (!r->fetched) // Can already be loaded as a prerequisite/complement. - rep_fetch (o, t, r, root, ""); // No reason (user-added). + repos.emplace_back (move (r)); + } } + rep_fetch (o, c, t, repos); + size_t rcount (0), pcount (0); if (verb) { -- cgit v1.1