aboutsummaryrefslogtreecommitdiff
path: root/bbot/agent.cxx
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-04-08 14:14:26 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-04-08 14:14:26 +0200
commit36e0c88e7a3912c8a2e6594841172adb9c14525b (patch)
tree909a269ded721a0201a01d3493af6fc11dd75292 /bbot/agent.cxx
parentcfd31379be5eefb22a72b5ee90ce8fd17a0802b7 (diff)
Implement machine enumeration
Diffstat (limited to 'bbot/agent.cxx')
-rw-r--r--bbot/agent.cxx486
1 files changed, 263 insertions, 223 deletions
diff --git a/bbot/agent.cxx b/bbot/agent.cxx
index 3e4f8dc..76c3a86 100644
--- a/bbot/agent.cxx
+++ b/bbot/agent.cxx
@@ -9,12 +9,8 @@
#include <iostream>
#include <butl/pager>
-#include <butl/fdstream>
#include <butl/filesystem> // dir_iterator
-#include <butl/manifest-parser>
-#include <butl/manifest-serializer>
-
#include <bbot/manifest>
#include <bbot/types>
@@ -29,50 +25,64 @@ using namespace std;
using namespace butl;
using namespace bbot;
+// The btrfs tool likes to print informational messages, like "Created
+// snapshot such and such". Luckily, it writes them to stdout while proper
+// diagnostics to stderr.
+//
+template <typename... A>
+inline void
+btrfs (tracer& t, A&&... a)
+{
+ if (verb >= 3)
+ run (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...);
+ else
+ run (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...);
+}
+
+template <typename... A>
+inline butl::process_exit::code_type
+btrfs_exit (tracer& t, A&&... a)
+{
+ return verb >= 3
+ ? run_exit (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...)
+ : run_exit (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...);
+}
+
+agent_options ops;
+
const string bs_prot ("1"); // Bootstrap protocol version.
string tc_name; // Toolchain name.
string tc_num; // Toolchain number.
string tc_id; // Toolchain id.
-template <typename T>
-static T
-parse_manifest (const path& f, const char* what, bool ignore_unknown = true)
+static bootstrapped_machine_manifest
+bootstrap_machine (const dir_path& md, const machine_manifest& mm)
{
- try
- {
- if (!file_exists (f))
- fail << what << " manifest file " << f << " does not exist";
+ bootstrapped_machine_manifest r {
+ mm,
+ toolchain_manifest {tc_id},
+ bootstrap_manifest {
+ bootstrap_manifest::versions_type {
+ {"bbot", BBOT_VERSION},
+ {"libbbot", LIBBBOT_VERSION},
+ {"libbpkg", LIBBPKG_VERSION},
+ {"libbutl", LIBBUTL_VERSION}
+ }
+ }
+ };
- ifdstream ifs (f);
- manifest_parser mp (ifs, f.string ());
- return T (mp, ignore_unknown);
- }
- catch (const manifest_parsing& e)
- {
- fail << "invalid " << what << " manifest: "
- << f << ':' << e.line << ':' << e.column << ": " << e.description
- << endf;
- }
- catch (const io_error& e)
- {
- fail << "unable to read " << what << " manifest " << f << ": " << e
- << endf;
- }
- catch (const system_error& e) // EACCES, etc.
+ if (!ops.fake_bootstrap ())
{
- fail << "unable to access " << what << " manifest " << f << ": " << e
- << endf;
}
-}
-
-/*
-static bootstrapped_machine_manifest
-bootstrap_machine (const dir_path&);
+ serialize_manifest (r, md / "manifest", "bootstrapped machine");
+ return r;
+}
static machine_manifests
enumerate_machines (const dir_path& rd)
+try
{
tracer trace ("enumerate_machines");
@@ -93,231 +103,242 @@ enumerate_machines (const dir_path& rd)
// Inside we have machines.
//
- for (const dir_entry& me: dir_iterator (vd))
+ try
{
- const string mn (me.path ().string ());
-
- if (me.type () != entry_type::directory || mn[0] == '.')
- continue;
-
- const dir_path md (dir_path (vd) /= mn);
-
- // Our endgoal here is to obtain a bootstrapped snapshot of this machine
- // while watching out for potential race conditions (machines being
- // added/upgraded/removed; see the manual for details).
- //
- // So here is our overall plan:
- //
- // 1. Resolve current subvolume link for our bootstrap protocol.
- //
- // 2. If there is no link, cleanup and ignore this machine.
- //
- // 3. Try to create a snapshot of current subvolume (this operation is
- // atomic). If failed (e.g., someone changed the link and removed the
- // subvolume in the meantime), retry from #1.
- //
- // 4. Compare the snapshot to the already bootstrapped version (if any)
- // and see if we need to re-bootstrap. If so, use the snapshot as a
- // starting point. Rename to bootstrapped at the end (atomic).
- //
- const dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P>
- const dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<toolc...>
- bool te (dir_exists (tp));
-
- auto delete_t = [&tp] ()
+ for (const dir_entry& me: dir_iterator (vd))
{
- // btrfs property set -ts $tp ro false
- // btrfs subvolume delete $tp
- };
+ const string mn (me.path ().string ());
- for (size_t retry (0);; ++retry)
- {
- if (retry != 0)
- sleep (1);
+ if (me.type () != entry_type::directory || mn[0] == '.')
+ continue;
+
+ const dir_path md (dir_path (vd) /= mn);
- // Resolve the link to subvolume path.
+ // Our endgoal here is to obtain a bootstrapped snapshot of this
+ // machine while watching out for potential race conditions (machines
+ // being added/upgraded/removed; see the manual for details).
//
- dir_path sp; // <name>-<P>.<R>
- try
+ // So here is our overall plan:
+ //
+ // 1. Resolve current subvolume link for our bootstrap protocol.
+ //
+ // 2. If there is no link, cleanup and ignore this machine.
+ //
+ // 3. Try to create a snapshot of current subvolume (this operation is
+ // atomic). If failed (e.g., someone changed the link and removed
+ // the subvolume in the meantime), retry from #1.
+ //
+ // 4. Compare the snapshot to the already bootstrapped version (if
+ // any) and see if we need to re-bootstrap. If so, use the snapshot
+ // as a starting point. Rename to bootstrapped at the end (atomic).
+ //
+ const dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P>
+ const dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<too...>
+ bool te (dir_exists (tp));
+
+ auto delete_t = [&tp, &trace] ()
+ {
+ btrfs (trace, "property", "set", "-ts", tp, "ro", "false");
+ btrfs (trace, "subvolume", "delete", tp);
+ };
+
+ for (size_t retry (0);; ++retry)
{
- char b [PATH_MAX + 1];
- ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b)));
+ if (retry != 0)
+ sleep (1);
- if (r == -1)
+ // Resolve the link to subvolume path.
+ //
+ dir_path sp; // <name>-<P>.<R>
+ try
{
- if (errno != ENOENT)
- throw_generic_error (errno);
+ char b [PATH_MAX + 1];
+ ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b)));
+
+ if (r == -1)
+ {
+ if (errno != ENOENT)
+ throw_generic_error (errno);
+ }
+ else if (static_cast<size_t> (r) >= sizeof (b))
+ throw_generic_error (EINVAL);
+ else
+ {
+ b[r] = '\0';
+ sp = dir_path (b);
+ if (sp.relative ())
+ sp = md / sp;
+ }
}
- else if (static_cast<size_t> (r) >= sizeof (b))
- throw_generic_error (EINVAL);
- else
+ catch (const system_error& e)
{
- b[r] = '\0';
- sp = dir_path (b);
- if (sp.relative ())
- sp = md / sp;
+ fail << "unable to read subvolume link " << lp << ": " << e;
}
- }
- catch (const system_error& e)
- {
- fail << "unable to read subvolume link " << lp << ": " << e;
- }
- // If the resolution fails, then this means there is no current
- // machine subvolume (for this bootstrap protocol). In this case we
- // clean up our toolchain subvolume (<name>-<toolchain>) and ignore
- // this machine.
- //
- if (sp.empty ())
- {
- if (te)
- delete_t ();
+ // If the resolution fails, then this means there is no current
+ // machine subvolume (for this bootstrap protocol). In this case we
+ // clean up our toolchain subvolume (<name>-<toolchain>) and ignore
+ // this machine.
+ //
+ if (sp.empty ())
+ {
+ if (te)
+ delete_t ();
- break;
- }
+ l2 ([&]{trace << "skipping " << md << ": no subvolume link";});
+ break;
+ }
- // <name>-<toolchain>-<xxx>
- //
- const dir_path xp (dir_path (md) /=
- path::traits::temp_name (mn + '-' + tc_name));
+ // <name>-<toolchain>-<xxx>
+ //
+ const dir_path xp (dir_path (md) /=
+ path::traits::temp_name (mn + '-' + tc_name));
- // btrfs subvolume snapshot $sp $xp
- if (false)
- {
- if (retry >= 10)
- fail << "unable to snapshot subvolume " << sp;
+ if (btrfs_exit (trace, "subvolume", "snapshot", sp, xp) != 0)
+ {
+ if (retry >= 10)
+ fail << "unable to snapshot subvolume " << sp;
- continue;
- }
+ continue;
+ }
- // Load the (original) machine manifest.
- //
- auto mm (
- parse_manifest<machine_manifest> (sp / "manifest", "machine"));
+ // Load the (original) machine manifest.
+ //
+ auto mm (
+ parse_manifest<machine_manifest> (sp / "manifest", "machine"));
- // If we already have <name>-<toolchain>, see if it needs to be re-
- // bootstrapped. Things that render it obsolete:
- //
- // 1. New machine revision (compare machine ids).
- // 2. New toolchain (compare toolchain ids).
- // 3. New bbot/libbbot (compare versions).
- //
- // The last case has a complication: what should we do if we have
- // bootstrapped a newer version of bbot? This would mean that we are
- // about to be stopped and upgraded (and the upgraded version will
- // probably be able to use the result). So we simply ignore this
- // machine for this run.
+ // If we already have <name>-<toolchain>, see if it needs to be re-
+ // bootstrapped. Things that render it obsolete:
+ //
+ // 1. New machine revision (compare machine ids).
+ // 2. New toolchain (compare toolchain ids).
+ // 3. New bbot/libbbot (compare versions).
+ //
+ // The last case has a complication: what should we do if we have
+ // bootstrapped a newer version of bbot? This would mean that we are
+ // about to be stopped and upgraded (and the upgraded version will
+ // probably be able to use the result). So we simply ignore this
+ // machine for this run.
- // Return -1 if older, 0 if the same, and +1 if newer.
- //
- auto compare_bbot = [] (const bootstrap_manifest& m) -> int
- {
- auto cmp = [&m] (const string& n, uint64_t v) -> int
+ // Return -1 if older, 0 if the same, and +1 if newer.
+ //
+ auto compare_bbot = [] (const bootstrap_manifest& m) -> int
{
- auto i = m.versions.find (n);
+ auto cmp = [&m] (const string& n, uint64_t v) -> int
+ {
+ auto i = m.versions.find (n);
+ return
+ i == m.versions.end () || i->second < v
+ ? -1
+ : i->second > v ? 1 : 0;
+ };
+
+ // Start from the top assuming a new dependency cannot be added
+ // without changing the dependent's version.
+ //
+ int r;
return
- i == m.versions.end () || i->second < v
- ? -1
- : i->second > v ? 1 : 0;
+ (r = cmp ("bbot", BBOT_VERSION)) != 0 ? r :
+ (r = cmp ("libbbot", LIBBBOT_VERSION)) != 0 ? r :
+ (r = cmp ("libbpkg", LIBBPKG_VERSION)) != 0 ? r :
+ (r = cmp ("libbutl", LIBBUTL_VERSION)) != 0 ? r : 0;
};
- // Start from the top assuming a new dependency cannot be added
- // without changing the dependent's version.
- //
- int r;
- return
- (r = cmp ("bbot", BBOT_VERSION)) != 0 ? r :
- (r = cmp ("libbbot", LIBBBOT_VERSION)) != 0 ? r :
- (r = cmp ("libbpkg", LIBBPKG_VERSION)) != 0 ? r :
- (r = cmp ("libbutl", LIBBUTL_VERSION)) != 0 ? r : 0;
- };
-
- if (te)
- {
- auto bmm (
- parse_manifest<bootstrapped_machine_manifest> (
- tp / "manifest",
- "bootstrapped machine"));
-
- if (bmm.machine.id != mm.id)
+ if (te)
{
- trace << "re-bootstrapping " << tp << ": new machine";
- te = false;
- }
+ auto bmm (
+ parse_manifest<bootstrapped_machine_manifest> (
+ tp / "manifest",
+ "bootstrapped machine"));
- if (bmm.toolchain.id != tc_id)
- {
- trace << "re-bootstrapping " << tp << ": new toolchain";
- te = false;
- }
+ if (bmm.machine.id != mm.id)
+ {
+ l2 ([&]{trace << "re-bootstrapping " << tp << ": new machine";});
+ te = false;
+ }
- if (int i = compare_bbot (bmm.bootstrap))
- {
- if (i < 0)
+ if (bmm.toolchain.id != tc_id)
{
- trace << "re-bootstrapping " << tp << ": new bbot";
+ l2 ([&]{trace << "re-bootstrapping " << tp << ": new toolchain";});
te = false;
}
- else
+
+ if (int i = compare_bbot (bmm.bootstrap))
{
- trace << "ignoring " << tp << ": newer bbot";
- // btrfs subvolume snapshot $xp
- break;
+ if (i < 0)
+ {
+ l2 ([&]{trace << "re-bootstrapping " << tp << ": new bbot";});
+ te = false;
+ }
+ else
+ {
+ l2 ([&]{trace << "ignoring " << tp << ": old bbot";});
+ btrfs (trace, "subvolume", "delete", xp);
+ break;
+ }
}
+
+ if (!te)
+ delete_t ();
}
+ else
+ l2 ([&]{trace << "bootstrapping " << tp;});
if (!te)
- delete_t ();
- }
-
- if (!te)
- {
- // Use the <name>-<toolchain>-<xxx> snapshot that we have made to
- // bootstrap the new machine. Then atomically rename it to
- // <name>-<toolchain>.
- //
- bootstrapped_machine_manifest bmm (bootstrap_machine (xp));
-
- try
- {
- mvdir (xp, tp);
- }
- catch (const system_error& e)
{
- fail << "unable to rename " << xp << " to " << tp;
- }
+ // Use the <name>-<toolchain>-<xxx> snapshot that we have made to
+ // bootstrap the new machine. Then atomically rename it to
+ // <name>-<toolchain>.
+ //
+ bootstrapped_machine_manifest bmm (bootstrap_machine (xp, mm));
- te = true;
+ try
+ {
+ mvdir (xp, tp);
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to rename " << xp << " to " << tp;
+ }
- // Check the boostrapped bbot version as above and ignore this
- // machine if it's newer than us.
- //
- if (int i = compare_bbot (bmm.bootstrap))
- {
- assert (i > 0);
- trace << "ignoring " << tp << ": newer bbot";
- break;
+ te = true;
+
+ // Check the boostrapped bbot version as above and ignore this
+ // machine if it's newer than us.
+ //
+ if (int i = compare_bbot (bmm.bootstrap))
+ {
+ assert (i > 0);
+ l2 ([&]{trace << "ignoring " << tp << ": old bbot";});
+ break;
+ }
}
- }
- else
- ;// btrfs subvolume snapshot $xp
+ else
+ btrfs (trace, "subvolume", "delete", xp);
- // Add the machine to the list.
- //
- // In order not to forget to clear new fields, we are instead going to
- // create a new instance with just the required fields.
- //
- r.push_back (machine_manifest (mm.id, mm.name, mm.summary));
+ // Add the machine to the list.
+ //
+ // In order not to forget to clear new fields, we are instead going
+ // to create a new instance with just the required fields.
+ //
+ r.push_back (machine_manifest (mm.id, mm.name, mm.summary));
- break;
+ break;
+ }
}
}
+ catch (const system_error& e)
+ {
+ fail << "unable to iterate over " << vd << ": " << e << endf;
+ }
}
return r;
}
-
-*/
+catch (const system_error& e)
+{
+ fail << "unable to iterate over " << rd << ": " << e << endf;
+}
extern "C" void
handle_signal (int sig)
@@ -339,7 +360,9 @@ main (int argc, char* argv[])
try
{
cli::argv_scanner scan (argc, argv, true);
- agent_options ops (scan);
+ ops.parse (scan);
+
+ verb = ops.verbose ();
if (ops.systemd_daemon ())
{
@@ -359,6 +382,11 @@ try
warn.type_ = "<4>";
info.type_ = "<6>";
trace_type = "<7>";
+
+ info << "bbot agent for " << tc_name << '/' << tc_num <<
+ info << "toolchain id " << tc_id <<
+ info << "CPU(s) " << ops.cpu () <<
+ info << "RAM(kB) " << ops.ram ();
}
tracer trace ("main");
@@ -412,16 +440,28 @@ try
fail << "unable to set signal handler: "
<< system_error (errno, generic_category ()); // Sanitize.
- info << "bbot agent for " << tc_name << '/' << tc_num <<
- info << "toolchain id " << tc_id <<
- info << "CPU(s) " << ops.cpu () <<
- info << "RAM(kB) " << ops.ram ();
-
- for (;;)
+ // The work loop. The steps we go through are:
+ //
+ // 1. Enumerate the available machines, (re-)bootstrapping any of necessary.
+ //
+ // 2. Poll controller(s) for build tasks.
+ //
+ // 3. If no build tasks are available, go to #1 after sleeping a bit.
+ //
+ // 4. If a build task is returned, do it, upload the result, and go to #1
+ // immediately.
+ //
+ for (unsigned int s; (s = 60); sleep (s))
{
- error << "sleeping" <<
- warn << "lightly";
- sleep (10);
+ machine_manifests mms (enumerate_machines (ops.machines ()));
+
+ if (ops.dump_machines ())
+ {
+ for (const machine_manifest& mm: mms)
+ serialize_manifest (mm, cout, "stdout", "machine manifest");
+
+ return 0;
+ }
}
}
catch (const failed&)