aboutsummaryrefslogtreecommitdiff
path: root/bbot/agent/agent.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'bbot/agent/agent.cxx')
-rw-r--r--bbot/agent/agent.cxx78
1 files changed, 60 insertions, 18 deletions
diff --git a/bbot/agent/agent.cxx b/bbot/agent/agent.cxx
index 09d520f..24e3f81 100644
--- a/bbot/agent/agent.cxx
+++ b/bbot/agent/agent.cxx
@@ -670,13 +670,20 @@ snapshot_path (const dir_path& tp)
to_string (inst));
}
-// Return available machines, (re-)bootstrapping them if necessary.
+// Return the global toolchain lock and the list of available machines,
+// (re-)bootstrapping them if necessary.
+//
+// Note that this function returns both machines that this process managed to
+// lock as well as the machines locked by other processes (except those that
+// are being bootstrapped), in case the caller needs to interrupt one of them
+// for a higher-priority task. In the latter case, the manifest only has the
+// machine_manifest information.
//
struct bootstrapped_machine
{
dir_path path;
- bootstrapped_machine_manifest manifest;
machine_lock lock;
+ bootstrapped_machine_manifest manifest;
};
using bootstrapped_machines = vector<bootstrapped_machine>;
@@ -712,6 +719,7 @@ try
r.push_back (
bootstrapped_machine {
dir_path (ops.machines ()) /= mh.name, // For diagnostics.
+ machine_lock (),
bootstrapped_machine_manifest {
machine_manifest {
move (mh.id),
@@ -722,8 +730,7 @@ try
nullopt,
strings ()},
toolchain_manifest {tc_id},
- bootstrap_manifest {}},
- machine_lock ()});
+ bootstrap_manifest {}}});
return pr;
}
@@ -821,22 +828,52 @@ try
none = none && sp.empty ();
- // Try to lock the machine, skipping it if already locked.
+ // Try to lock the machine, skipping it if being bootstrapped.
//
machine_lock ml (lock_machine (tl, tp));
if (!ml.locked ())
{
- if (verb >= 1) // @@ TMP: restore l4 tracing.
+ // @@ TMP: restore l4 tracing.
+
+ if (!ml.prio) // Being bootstrapped.
{
- diag_record dr (trace);
- dr << "skipping " << md << ": locked by " << ml.pid
- << " with priority ";
- if (ml.prio)
- dr << *ml.prio;
- else
- dr << "<bootstrap>";
+ l1 ([&]{trace << "skipping " << md << ": being bootstrapped "
+ << "by " << ml.pid;});
+ break;
+ }
+
+ // Get the machine manifest (subset of the steps performed for
+ // the locked case below).
+ //
+ // Note that it's possible the machine we get is not what was
+ // originally locked by the other process (e.g., it has been
+ // upgraded since). It's also possible that if and when we
+ // interrupt and lock this machine, it will be a different
+ // machine (e.g., it has been upgraded since we read this
+ // machine manifest). To deal with all of that we will be
+ // reloading this information if/when we acquire the lock to
+ // this machine.
+ //
+ if (sp.empty ())
+ {
+ l3 ([&]{trace << "skipping " << md << ": no subvolume link";});
+ break;
}
+
+ l1 ([&]{trace << "keeping " << md << ": locked by " << ml.pid
+ << " with priority " << *ml.prio;});
+
+ auto mm (
+ parse_manifest<machine_manifest> (sp / "manifest", "machine"));
+
+ // Add the machine to the lists and bail out.
+ //
+ r.push_back (bootstrapped_machine {
+ move (tp),
+ move (ml),
+ bootstrapped_machine_manifest {move (mm), {}, {}}});
+
break;
}
@@ -1007,7 +1044,7 @@ try
// Add the machine to the lists.
//
r.push_back (
- bootstrapped_machine {move (tp), move (*bmm), move (ml)});
+ bootstrapped_machine {move (tp), move (ml), move (*bmm)});
break;
} // Retry loop.
@@ -1731,9 +1768,14 @@ try
// Note: do not assume tq.machines.size () == ms.size ().
//
for (const bootstrapped_machine& m: ms)
- tq.machines.emplace_back (m.manifest.machine.id,
- m.manifest.machine.name,
- m.manifest.machine.summary);
+ {
+ // @@ For now skip machines locked by other processes.
+ //
+ if (ops.fake_machine_specified () || m.lock.locked ())
+ tq.machines.emplace_back (m.manifest.machine.id,
+ m.manifest.machine.name,
+ m.manifest.machine.summary);
+ }
if (ops.dump_machines ())
{
@@ -1898,7 +1940,7 @@ try
{
for (bootstrapped_machine& m: ms)
{
- if (mh.id == m.manifest.machine.id)
+ if (mh.name == m.manifest.machine.name)
{
if (!ops.fake_machine_specified ())
m.lock.write (tl, 1234 /* prio */);