diff options
Diffstat (limited to 'bbot/agent.cxx')
-rw-r--r-- | bbot/agent.cxx | 486 |
1 files changed, 263 insertions, 223 deletions
diff --git a/bbot/agent.cxx b/bbot/agent.cxx index 3e4f8dc..76c3a86 100644 --- a/bbot/agent.cxx +++ b/bbot/agent.cxx @@ -9,12 +9,8 @@ #include <iostream> #include <butl/pager> -#include <butl/fdstream> #include <butl/filesystem> // dir_iterator -#include <butl/manifest-parser> -#include <butl/manifest-serializer> - #include <bbot/manifest> #include <bbot/types> @@ -29,50 +25,64 @@ using namespace std; using namespace butl; using namespace bbot; +// The btrfs tool likes to print informational messages, like "Created +// snapshot such and such". Luckily, it writes them to stdout while proper +// diagnostics to stderr. +// +template <typename... A> +inline void +btrfs (tracer& t, A&&... a) +{ + if (verb >= 3) + run (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...); + else + run (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...); +} + +template <typename... A> +inline butl::process_exit::code_type +btrfs_exit (tracer& t, A&&... a) +{ + return verb >= 3 + ? run_exit (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...) + : run_exit (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...); +} + +agent_options ops; + const string bs_prot ("1"); // Bootstrap protocol version. string tc_name; // Toolchain name. string tc_num; // Toolchain number. string tc_id; // Toolchain id. -template <typename T> -static T -parse_manifest (const path& f, const char* what, bool ignore_unknown = true) +static bootstrapped_machine_manifest +bootstrap_machine (const dir_path& md, const machine_manifest& mm) { - try - { - if (!file_exists (f)) - fail << what << " manifest file " << f << " does not exist"; + bootstrapped_machine_manifest r { + mm, + toolchain_manifest {tc_id}, + bootstrap_manifest { + bootstrap_manifest::versions_type { + {"bbot", BBOT_VERSION}, + {"libbbot", LIBBBOT_VERSION}, + {"libbpkg", LIBBPKG_VERSION}, + {"libbutl", LIBBUTL_VERSION} + } + } + }; - ifdstream ifs (f); - manifest_parser mp (ifs, f.string ()); - return T (mp, ignore_unknown); - } - catch (const manifest_parsing& e) - { - fail << "invalid " << what << " manifest: " - << f << ':' << e.line << ':' << e.column << ": " << e.description - << endf; - } - catch (const io_error& e) - { - fail << "unable to read " << what << " manifest " << f << ": " << e - << endf; - } - catch (const system_error& e) // EACCES, etc. + if (!ops.fake_bootstrap ()) { - fail << "unable to access " << what << " manifest " << f << ": " << e - << endf; } -} - -/* -static bootstrapped_machine_manifest -bootstrap_machine (const dir_path&); + serialize_manifest (r, md / "manifest", "bootstrapped machine"); + return r; +} static machine_manifests enumerate_machines (const dir_path& rd) +try { tracer trace ("enumerate_machines"); @@ -93,231 +103,242 @@ enumerate_machines (const dir_path& rd) // Inside we have machines. // - for (const dir_entry& me: dir_iterator (vd)) + try { - const string mn (me.path ().string ()); - - if (me.type () != entry_type::directory || mn[0] == '.') - continue; - - const dir_path md (dir_path (vd) /= mn); - - // Our endgoal here is to obtain a bootstrapped snapshot of this machine - // while watching out for potential race conditions (machines being - // added/upgraded/removed; see the manual for details). - // - // So here is our overall plan: - // - // 1. Resolve current subvolume link for our bootstrap protocol. - // - // 2. If there is no link, cleanup and ignore this machine. - // - // 3. Try to create a snapshot of current subvolume (this operation is - // atomic). If failed (e.g., someone changed the link and removed the - // subvolume in the meantime), retry from #1. - // - // 4. Compare the snapshot to the already bootstrapped version (if any) - // and see if we need to re-bootstrap. If so, use the snapshot as a - // starting point. Rename to bootstrapped at the end (atomic). - // - const dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P> - const dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<toolc...> - bool te (dir_exists (tp)); - - auto delete_t = [&tp] () + for (const dir_entry& me: dir_iterator (vd)) { - // btrfs property set -ts $tp ro false - // btrfs subvolume delete $tp - }; + const string mn (me.path ().string ()); - for (size_t retry (0);; ++retry) - { - if (retry != 0) - sleep (1); + if (me.type () != entry_type::directory || mn[0] == '.') + continue; + + const dir_path md (dir_path (vd) /= mn); - // Resolve the link to subvolume path. + // Our endgoal here is to obtain a bootstrapped snapshot of this + // machine while watching out for potential race conditions (machines + // being added/upgraded/removed; see the manual for details). // - dir_path sp; // <name>-<P>.<R> - try + // So here is our overall plan: + // + // 1. Resolve current subvolume link for our bootstrap protocol. + // + // 2. If there is no link, cleanup and ignore this machine. + // + // 3. Try to create a snapshot of current subvolume (this operation is + // atomic). If failed (e.g., someone changed the link and removed + // the subvolume in the meantime), retry from #1. + // + // 4. Compare the snapshot to the already bootstrapped version (if + // any) and see if we need to re-bootstrap. If so, use the snapshot + // as a starting point. Rename to bootstrapped at the end (atomic). + // + const dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P> + const dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<too...> + bool te (dir_exists (tp)); + + auto delete_t = [&tp, &trace] () + { + btrfs (trace, "property", "set", "-ts", tp, "ro", "false"); + btrfs (trace, "subvolume", "delete", tp); + }; + + for (size_t retry (0);; ++retry) { - char b [PATH_MAX + 1]; - ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b))); + if (retry != 0) + sleep (1); - if (r == -1) + // Resolve the link to subvolume path. + // + dir_path sp; // <name>-<P>.<R> + try { - if (errno != ENOENT) - throw_generic_error (errno); + char b [PATH_MAX + 1]; + ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b))); + + if (r == -1) + { + if (errno != ENOENT) + throw_generic_error (errno); + } + else if (static_cast<size_t> (r) >= sizeof (b)) + throw_generic_error (EINVAL); + else + { + b[r] = '\0'; + sp = dir_path (b); + if (sp.relative ()) + sp = md / sp; + } } - else if (static_cast<size_t> (r) >= sizeof (b)) - throw_generic_error (EINVAL); - else + catch (const system_error& e) { - b[r] = '\0'; - sp = dir_path (b); - if (sp.relative ()) - sp = md / sp; + fail << "unable to read subvolume link " << lp << ": " << e; } - } - catch (const system_error& e) - { - fail << "unable to read subvolume link " << lp << ": " << e; - } - // If the resolution fails, then this means there is no current - // machine subvolume (for this bootstrap protocol). In this case we - // clean up our toolchain subvolume (<name>-<toolchain>) and ignore - // this machine. - // - if (sp.empty ()) - { - if (te) - delete_t (); + // If the resolution fails, then this means there is no current + // machine subvolume (for this bootstrap protocol). In this case we + // clean up our toolchain subvolume (<name>-<toolchain>) and ignore + // this machine. + // + if (sp.empty ()) + { + if (te) + delete_t (); - break; - } + l2 ([&]{trace << "skipping " << md << ": no subvolume link";}); + break; + } - // <name>-<toolchain>-<xxx> - // - const dir_path xp (dir_path (md) /= - path::traits::temp_name (mn + '-' + tc_name)); + // <name>-<toolchain>-<xxx> + // + const dir_path xp (dir_path (md) /= + path::traits::temp_name (mn + '-' + tc_name)); - // btrfs subvolume snapshot $sp $xp - if (false) - { - if (retry >= 10) - fail << "unable to snapshot subvolume " << sp; + if (btrfs_exit (trace, "subvolume", "snapshot", sp, xp) != 0) + { + if (retry >= 10) + fail << "unable to snapshot subvolume " << sp; - continue; - } + continue; + } - // Load the (original) machine manifest. - // - auto mm ( - parse_manifest<machine_manifest> (sp / "manifest", "machine")); + // Load the (original) machine manifest. + // + auto mm ( + parse_manifest<machine_manifest> (sp / "manifest", "machine")); - // If we already have <name>-<toolchain>, see if it needs to be re- - // bootstrapped. Things that render it obsolete: - // - // 1. New machine revision (compare machine ids). - // 2. New toolchain (compare toolchain ids). - // 3. New bbot/libbbot (compare versions). - // - // The last case has a complication: what should we do if we have - // bootstrapped a newer version of bbot? This would mean that we are - // about to be stopped and upgraded (and the upgraded version will - // probably be able to use the result). So we simply ignore this - // machine for this run. + // If we already have <name>-<toolchain>, see if it needs to be re- + // bootstrapped. Things that render it obsolete: + // + // 1. New machine revision (compare machine ids). + // 2. New toolchain (compare toolchain ids). + // 3. New bbot/libbbot (compare versions). + // + // The last case has a complication: what should we do if we have + // bootstrapped a newer version of bbot? This would mean that we are + // about to be stopped and upgraded (and the upgraded version will + // probably be able to use the result). So we simply ignore this + // machine for this run. - // Return -1 if older, 0 if the same, and +1 if newer. - // - auto compare_bbot = [] (const bootstrap_manifest& m) -> int - { - auto cmp = [&m] (const string& n, uint64_t v) -> int + // Return -1 if older, 0 if the same, and +1 if newer. + // + auto compare_bbot = [] (const bootstrap_manifest& m) -> int { - auto i = m.versions.find (n); + auto cmp = [&m] (const string& n, uint64_t v) -> int + { + auto i = m.versions.find (n); + return + i == m.versions.end () || i->second < v + ? -1 + : i->second > v ? 1 : 0; + }; + + // Start from the top assuming a new dependency cannot be added + // without changing the dependent's version. + // + int r; return - i == m.versions.end () || i->second < v - ? -1 - : i->second > v ? 1 : 0; + (r = cmp ("bbot", BBOT_VERSION)) != 0 ? r : + (r = cmp ("libbbot", LIBBBOT_VERSION)) != 0 ? r : + (r = cmp ("libbpkg", LIBBPKG_VERSION)) != 0 ? r : + (r = cmp ("libbutl", LIBBUTL_VERSION)) != 0 ? r : 0; }; - // Start from the top assuming a new dependency cannot be added - // without changing the dependent's version. - // - int r; - return - (r = cmp ("bbot", BBOT_VERSION)) != 0 ? r : - (r = cmp ("libbbot", LIBBBOT_VERSION)) != 0 ? r : - (r = cmp ("libbpkg", LIBBPKG_VERSION)) != 0 ? r : - (r = cmp ("libbutl", LIBBUTL_VERSION)) != 0 ? r : 0; - }; - - if (te) - { - auto bmm ( - parse_manifest<bootstrapped_machine_manifest> ( - tp / "manifest", - "bootstrapped machine")); - - if (bmm.machine.id != mm.id) + if (te) { - trace << "re-bootstrapping " << tp << ": new machine"; - te = false; - } + auto bmm ( + parse_manifest<bootstrapped_machine_manifest> ( + tp / "manifest", + "bootstrapped machine")); - if (bmm.toolchain.id != tc_id) - { - trace << "re-bootstrapping " << tp << ": new toolchain"; - te = false; - } + if (bmm.machine.id != mm.id) + { + l2 ([&]{trace << "re-bootstrapping " << tp << ": new machine";}); + te = false; + } - if (int i = compare_bbot (bmm.bootstrap)) - { - if (i < 0) + if (bmm.toolchain.id != tc_id) { - trace << "re-bootstrapping " << tp << ": new bbot"; + l2 ([&]{trace << "re-bootstrapping " << tp << ": new toolchain";}); te = false; } - else + + if (int i = compare_bbot (bmm.bootstrap)) { - trace << "ignoring " << tp << ": newer bbot"; - // btrfs subvolume snapshot $xp - break; + if (i < 0) + { + l2 ([&]{trace << "re-bootstrapping " << tp << ": new bbot";}); + te = false; + } + else + { + l2 ([&]{trace << "ignoring " << tp << ": old bbot";}); + btrfs (trace, "subvolume", "delete", xp); + break; + } } + + if (!te) + delete_t (); } + else + l2 ([&]{trace << "bootstrapping " << tp;}); if (!te) - delete_t (); - } - - if (!te) - { - // Use the <name>-<toolchain>-<xxx> snapshot that we have made to - // bootstrap the new machine. Then atomically rename it to - // <name>-<toolchain>. - // - bootstrapped_machine_manifest bmm (bootstrap_machine (xp)); - - try - { - mvdir (xp, tp); - } - catch (const system_error& e) { - fail << "unable to rename " << xp << " to " << tp; - } + // Use the <name>-<toolchain>-<xxx> snapshot that we have made to + // bootstrap the new machine. Then atomically rename it to + // <name>-<toolchain>. + // + bootstrapped_machine_manifest bmm (bootstrap_machine (xp, mm)); - te = true; + try + { + mvdir (xp, tp); + } + catch (const system_error& e) + { + fail << "unable to rename " << xp << " to " << tp; + } - // Check the boostrapped bbot version as above and ignore this - // machine if it's newer than us. - // - if (int i = compare_bbot (bmm.bootstrap)) - { - assert (i > 0); - trace << "ignoring " << tp << ": newer bbot"; - break; + te = true; + + // Check the boostrapped bbot version as above and ignore this + // machine if it's newer than us. + // + if (int i = compare_bbot (bmm.bootstrap)) + { + assert (i > 0); + l2 ([&]{trace << "ignoring " << tp << ": old bbot";}); + break; + } } - } - else - ;// btrfs subvolume snapshot $xp + else + btrfs (trace, "subvolume", "delete", xp); - // Add the machine to the list. - // - // In order not to forget to clear new fields, we are instead going to - // create a new instance with just the required fields. - // - r.push_back (machine_manifest (mm.id, mm.name, mm.summary)); + // Add the machine to the list. + // + // In order not to forget to clear new fields, we are instead going + // to create a new instance with just the required fields. + // + r.push_back (machine_manifest (mm.id, mm.name, mm.summary)); - break; + break; + } } } + catch (const system_error& e) + { + fail << "unable to iterate over " << vd << ": " << e << endf; + } } return r; } - -*/ +catch (const system_error& e) +{ + fail << "unable to iterate over " << rd << ": " << e << endf; +} extern "C" void handle_signal (int sig) @@ -339,7 +360,9 @@ main (int argc, char* argv[]) try { cli::argv_scanner scan (argc, argv, true); - agent_options ops (scan); + ops.parse (scan); + + verb = ops.verbose (); if (ops.systemd_daemon ()) { @@ -359,6 +382,11 @@ try warn.type_ = "<4>"; info.type_ = "<6>"; trace_type = "<7>"; + + info << "bbot agent for " << tc_name << '/' << tc_num << + info << "toolchain id " << tc_id << + info << "CPU(s) " << ops.cpu () << + info << "RAM(kB) " << ops.ram (); } tracer trace ("main"); @@ -412,16 +440,28 @@ try fail << "unable to set signal handler: " << system_error (errno, generic_category ()); // Sanitize. - info << "bbot agent for " << tc_name << '/' << tc_num << - info << "toolchain id " << tc_id << - info << "CPU(s) " << ops.cpu () << - info << "RAM(kB) " << ops.ram (); - - for (;;) + // The work loop. The steps we go through are: + // + // 1. Enumerate the available machines, (re-)bootstrapping any of necessary. + // + // 2. Poll controller(s) for build tasks. + // + // 3. If no build tasks are available, go to #1 after sleeping a bit. + // + // 4. If a build task is returned, do it, upload the result, and go to #1 + // immediately. + // + for (unsigned int s; (s = 60); sleep (s)) { - error << "sleeping" << - warn << "lightly"; - sleep (10); + machine_manifests mms (enumerate_machines (ops.machines ())); + + if (ops.dump_machines ()) + { + for (const machine_manifest& mm: mms) + serialize_manifest (mm, cout, "stdout", "machine manifest"); + + return 0; + } } } catch (const failed&) |