aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-04-06 15:11:11 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-04-06 15:11:11 +0200
commite3af4c881790a95be2676a7f4fb8df4136f8a3f3 (patch)
treec646734af1f3d539385fec20facf4c636987ef27
parent578413a1f8781f03c2ddccb4d3c5d441c9579a3b (diff)
Add machine enumeration outline
-rw-r--r--bbot/agent.cli16
-rw-r--r--bbot/agent.cxx374
-rw-r--r--bbot/bbot-agent@.service12
-rw-r--r--bbot/bootstrap-manifest57
-rw-r--r--doc/manual.cli5
5 files changed, 426 insertions, 38 deletions
diff --git a/bbot/agent.cli b/bbot/agent.cli
index 761580c..c4ee356 100644
--- a/bbot/agent.cli
+++ b/bbot/agent.cli
@@ -11,18 +11,25 @@ include <bbot/common.cli>;
namespace bbot
{
{
- "<options> <file>",
+ "<options> <name> <num> <id> ",
"
\h|SYNOPSIS|
\cb{bbot-agent --help}\n
\cb{bbot-agent --version}\n
- \c{\b{bbot-agent} [<options>] <toolchain>}
+ \c{\b{bbot-agent} [<options>] <name> <num> <id>}
\h|DESCRIPTION|
\cb{bbot-agent} @@ TODO.
+
+ The <name> argument is the toolchain name, <id> \- the toolchain id,
+ and <num> \- the toolchain number in this deployment.
+
+ Note that on termination \cb{bbot-agent} may leave a working machine
+ snapshot behind. It is expected that the caller (normally Build OS
+ monitor) cleans them up before restarting the agent.
"
}
@@ -30,6 +37,11 @@ namespace bbot
{
"\h|OPTIONS|"
+ bool --systemd-daemon
+ {
+ "Start as a simple systemd daemon."
+ }
+
size_t --cpu = 1
{
"<num>",
diff --git a/bbot/agent.cxx b/bbot/agent.cxx
index 8c281fe..3e4f8dc 100644
--- a/bbot/agent.cxx
+++ b/bbot/agent.cxx
@@ -2,12 +2,20 @@
// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
// license : MIT; see accompanying LICENSE file
+#include <limits.h> // PATH_MAX
#include <signal.h> // signal()
-#include <unistd.h> // sleep()
+#include <unistd.h> // sleep(), realink()
#include <iostream>
#include <butl/pager>
+#include <butl/fdstream>
+#include <butl/filesystem> // dir_iterator
+
+#include <butl/manifest-parser>
+#include <butl/manifest-serializer>
+
+#include <bbot/manifest>
#include <bbot/types>
#include <bbot/utility>
@@ -15,10 +23,302 @@
#include <bbot/diagnostics>
#include <bbot/agent-options>
+#include <bbot/bootstrap-manifest>
+
using namespace std;
using namespace butl;
using namespace bbot;
+const string bs_prot ("1"); // Bootstrap protocol version.
+
+string tc_name; // Toolchain name.
+string tc_num; // Toolchain number.
+string tc_id; // Toolchain id.
+
+template <typename T>
+static T
+parse_manifest (const path& f, const char* what, bool ignore_unknown = true)
+{
+ try
+ {
+ if (!file_exists (f))
+ fail << what << " manifest file " << f << " does not exist";
+
+ ifdstream ifs (f);
+ manifest_parser mp (ifs, f.string ());
+ return T (mp, ignore_unknown);
+ }
+ catch (const manifest_parsing& e)
+ {
+ fail << "invalid " << what << " manifest: "
+ << f << ':' << e.line << ':' << e.column << ": " << e.description
+ << endf;
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to read " << what << " manifest " << f << ": " << e
+ << endf;
+ }
+ catch (const system_error& e) // EACCES, etc.
+ {
+ fail << "unable to access " << what << " manifest " << f << ": " << e
+ << endf;
+ }
+}
+
+/*
+
+static bootstrapped_machine_manifest
+bootstrap_machine (const dir_path&);
+
+static machine_manifests
+enumerate_machines (const dir_path& rd)
+{
+ tracer trace ("enumerate_machines");
+
+ machine_manifests r;
+
+ // The first level are machine volumes.
+ //
+ for (const dir_entry& ve: dir_iterator (rd))
+ {
+ const string vn (ve.path ().string ());
+
+ // Ignore hidden directories.
+ //
+ if (ve.type () != entry_type::directory || vn[0] == '.')
+ continue;
+
+ const dir_path vd (dir_path (rd) /= vn);
+
+ // Inside we have machines.
+ //
+ for (const dir_entry& me: dir_iterator (vd))
+ {
+ const string mn (me.path ().string ());
+
+ if (me.type () != entry_type::directory || mn[0] == '.')
+ continue;
+
+ const dir_path md (dir_path (vd) /= mn);
+
+ // Our endgoal here is to obtain a bootstrapped snapshot of this machine
+ // while watching out for potential race conditions (machines being
+ // added/upgraded/removed; see the manual for details).
+ //
+ // So here is our overall plan:
+ //
+ // 1. Resolve current subvolume link for our bootstrap protocol.
+ //
+ // 2. If there is no link, cleanup and ignore this machine.
+ //
+ // 3. Try to create a snapshot of current subvolume (this operation is
+ // atomic). If failed (e.g., someone changed the link and removed the
+ // subvolume in the meantime), retry from #1.
+ //
+ // 4. Compare the snapshot to the already bootstrapped version (if any)
+ // and see if we need to re-bootstrap. If so, use the snapshot as a
+ // starting point. Rename to bootstrapped at the end (atomic).
+ //
+ const dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P>
+ const dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<toolc...>
+ bool te (dir_exists (tp));
+
+ auto delete_t = [&tp] ()
+ {
+ // btrfs property set -ts $tp ro false
+ // btrfs subvolume delete $tp
+ };
+
+ for (size_t retry (0);; ++retry)
+ {
+ if (retry != 0)
+ sleep (1);
+
+ // Resolve the link to subvolume path.
+ //
+ dir_path sp; // <name>-<P>.<R>
+ try
+ {
+ char b [PATH_MAX + 1];
+ ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b)));
+
+ if (r == -1)
+ {
+ if (errno != ENOENT)
+ throw_generic_error (errno);
+ }
+ else if (static_cast<size_t> (r) >= sizeof (b))
+ throw_generic_error (EINVAL);
+ else
+ {
+ b[r] = '\0';
+ sp = dir_path (b);
+ if (sp.relative ())
+ sp = md / sp;
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to read subvolume link " << lp << ": " << e;
+ }
+
+ // If the resolution fails, then this means there is no current
+ // machine subvolume (for this bootstrap protocol). In this case we
+ // clean up our toolchain subvolume (<name>-<toolchain>) and ignore
+ // this machine.
+ //
+ if (sp.empty ())
+ {
+ if (te)
+ delete_t ();
+
+ break;
+ }
+
+ // <name>-<toolchain>-<xxx>
+ //
+ const dir_path xp (dir_path (md) /=
+ path::traits::temp_name (mn + '-' + tc_name));
+
+ // btrfs subvolume snapshot $sp $xp
+ if (false)
+ {
+ if (retry >= 10)
+ fail << "unable to snapshot subvolume " << sp;
+
+ continue;
+ }
+
+ // Load the (original) machine manifest.
+ //
+ auto mm (
+ parse_manifest<machine_manifest> (sp / "manifest", "machine"));
+
+ // If we already have <name>-<toolchain>, see if it needs to be re-
+ // bootstrapped. Things that render it obsolete:
+ //
+ // 1. New machine revision (compare machine ids).
+ // 2. New toolchain (compare toolchain ids).
+ // 3. New bbot/libbbot (compare versions).
+ //
+ // The last case has a complication: what should we do if we have
+ // bootstrapped a newer version of bbot? This would mean that we are
+ // about to be stopped and upgraded (and the upgraded version will
+ // probably be able to use the result). So we simply ignore this
+ // machine for this run.
+
+ // Return -1 if older, 0 if the same, and +1 if newer.
+ //
+ auto compare_bbot = [] (const bootstrap_manifest& m) -> int
+ {
+ auto cmp = [&m] (const string& n, uint64_t v) -> int
+ {
+ auto i = m.versions.find (n);
+ return
+ i == m.versions.end () || i->second < v
+ ? -1
+ : i->second > v ? 1 : 0;
+ };
+
+ // Start from the top assuming a new dependency cannot be added
+ // without changing the dependent's version.
+ //
+ int r;
+ return
+ (r = cmp ("bbot", BBOT_VERSION)) != 0 ? r :
+ (r = cmp ("libbbot", LIBBBOT_VERSION)) != 0 ? r :
+ (r = cmp ("libbpkg", LIBBPKG_VERSION)) != 0 ? r :
+ (r = cmp ("libbutl", LIBBUTL_VERSION)) != 0 ? r : 0;
+ };
+
+ if (te)
+ {
+ auto bmm (
+ parse_manifest<bootstrapped_machine_manifest> (
+ tp / "manifest",
+ "bootstrapped machine"));
+
+ if (bmm.machine.id != mm.id)
+ {
+ trace << "re-bootstrapping " << tp << ": new machine";
+ te = false;
+ }
+
+ if (bmm.toolchain.id != tc_id)
+ {
+ trace << "re-bootstrapping " << tp << ": new toolchain";
+ te = false;
+ }
+
+ if (int i = compare_bbot (bmm.bootstrap))
+ {
+ if (i < 0)
+ {
+ trace << "re-bootstrapping " << tp << ": new bbot";
+ te = false;
+ }
+ else
+ {
+ trace << "ignoring " << tp << ": newer bbot";
+ // btrfs subvolume snapshot $xp
+ break;
+ }
+ }
+
+ if (!te)
+ delete_t ();
+ }
+
+ if (!te)
+ {
+ // Use the <name>-<toolchain>-<xxx> snapshot that we have made to
+ // bootstrap the new machine. Then atomically rename it to
+ // <name>-<toolchain>.
+ //
+ bootstrapped_machine_manifest bmm (bootstrap_machine (xp));
+
+ try
+ {
+ mvdir (xp, tp);
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to rename " << xp << " to " << tp;
+ }
+
+ te = true;
+
+ // Check the boostrapped bbot version as above and ignore this
+ // machine if it's newer than us.
+ //
+ if (int i = compare_bbot (bmm.bootstrap))
+ {
+ assert (i > 0);
+ trace << "ignoring " << tp << ": newer bbot";
+ break;
+ }
+ }
+ else
+ ;// btrfs subvolume snapshot $xp
+
+ // Add the machine to the list.
+ //
+ // In order not to forget to clear new fields, we are instead going to
+ // create a new instance with just the required fields.
+ //
+ r.push_back (machine_manifest (mm.id, mm.name, mm.summary));
+
+ break;
+ }
+ }
+ }
+
+ return r;
+}
+
+*/
+
extern "C" void
handle_signal (int sig)
{
@@ -30,28 +330,36 @@ handle_signal (int sig)
}
}
+// Right arrow followed by newline.
+//
+const char systemd_indent[] = "\xE2\x86\xB2\n";
+
int
main (int argc, char* argv[])
try
{
- // Map to systemd severity prefixes (see sd-daemon(3) for details). Note
- // that here we assume we will never have location (like file name which
- // would end up being before the prefix).
- //
- const char indent[] = "\xE2\x86\xB2\n"; // Right arrow followed by newline.
+ cli::argv_scanner scan (argc, argv, true);
+ agent_options ops (scan);
- trace_indent =
- fail.indent_ =
- error.indent_ =
- warn.indent_ =
- info.indent_ =
- text.indent_ = indent;
+ if (ops.systemd_daemon ())
+ {
+ // Map to systemd severity prefixes (see sd-daemon(3) for details). Note
+ // that here we assume we will never have location (like file name which
+ // would end up being before the prefix).
+ //
+ trace_indent =
+ fail.indent_ =
+ error.indent_ =
+ warn.indent_ =
+ info.indent_ =
+ text.indent_ = systemd_indent;
- fail.type_ = "<3>";
- error.type_ = "<3>";
- warn.type_ = "<4>";
- info.type_ = "<6>";
- trace_type = "<7>";
+ fail.type_ = "<3>";
+ error.type_ = "<3>";
+ warn.type_ = "<4>";
+ info.type_ = "<6>";
+ trace_type = "<7>";
+ }
tracer trace ("main");
@@ -64,16 +372,6 @@ try
fail << "unable to ignore broken pipe (SIGPIPE) signal: "
<< system_error (errno, generic_category ()); // Sanitize.
- // Handle SIGHUP and SIGTERM.
- //
- if (signal (SIGHUP, &handle_signal) == SIG_ERR ||
- signal (SIGTERM, &handle_signal) == SIG_ERR)
- fail << "unable to set signal handler: "
- << system_error (errno, generic_category ()); // Sanitize.
-
- cli::argv_scanner scan (argc, argv, true);
- agent_options ops (scan);
-
// Version.
//
if (ops.version ())
@@ -99,15 +397,25 @@ try
return p.wait () ? 0 : 1;
}
- if (argc != 2)
- fail << "toolchain name excected" <<
+ if (argc != 4)
+ fail << "toolchain name/id/num excected" <<
info << "run " << argv[0] << " --help for details";
- string tc (argv[1]);
+ tc_name = argv[1];
+ tc_num = argv[2];
+ tc_id = argv[3];
+
+ // Handle SIGHUP and SIGTERM.
+ //
+ if (signal (SIGHUP, &handle_signal) == SIG_ERR ||
+ signal (SIGTERM, &handle_signal) == SIG_ERR)
+ fail << "unable to set signal handler: "
+ << system_error (errno, generic_category ()); // Sanitize.
- info << "starting bbot agent for toolchain " << tc <<
- info << ops.cpu () << " CPU(s)" <<
- info << ops.ram () << " RAM(kB)";
+ info << "bbot agent for " << tc_name << '/' << tc_num <<
+ info << "toolchain id " << tc_id <<
+ info << "CPU(s) " << ops.cpu () <<
+ info << "RAM(kB) " << ops.ram ();
for (;;)
{
diff --git a/bbot/bbot-agent@.service b/bbot/bbot-agent@.service
index 64e07d2..496692f 100644
--- a/bbot/bbot-agent@.service
+++ b/bbot/bbot-agent@.service
@@ -4,9 +4,19 @@ After=default.target
[Service]
Type=simple
+
Environment=CPU=1
Environment=RAM=1048576
-ExecStart=/build/bbot/%i/bin/bbot-agent --cpu ${CPU} --ram ${RAM} %i
+Environment=TOOLCHAIN_ID=123abc
+Environment=TOOLCHAIN_NUM=1
+
+ExecStart=/build/bbot/%i/bin/bbot-agent --systemd-daemon \
+ --cpu ${CPU} \
+ --ram ${RAM} \
+ %i \
+ ${TOOLCHAIN_NUM} \
+ ${TOOLCHAIN_ID}
+
User=build
Group=build
WorkingDirectory=~
diff --git a/bbot/bootstrap-manifest b/bbot/bootstrap-manifest
new file mode 100644
index 0000000..502127b
--- /dev/null
+++ b/bbot/bootstrap-manifest
@@ -0,0 +1,57 @@
+// file : bbot/bootstrap-manifest -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BBOT_BOOTSTRAP_MANIFEST
+#define BBOT_BOOTSTRAP_MANIFEST
+
+#include <map>
+
+#include <bbot/types>
+#include <bbot/utility>
+
+#include <bbot/manifest> // machine_manifest
+
+namespace bbot
+{
+ // Toolchain manifest.
+ //
+ class toolchain_manifest
+ {
+ public:
+
+ // Toolchain id (SHAXXX).
+ //
+ string id;
+ };
+
+ // Bootstrap result manifest. Uploaded by the worker to the agent's TFTP
+ // server.
+ //
+ class bootstrap_manifest
+ {
+ public:
+
+ // Map of packages to their (numeric) versions that were used inside the
+ // bootstrapped machine. Used to make sure bbot agent/worker use the same
+ // versions. For example:
+ //
+ // libbbot-version: 1010100 # 1.1.1
+ // bbot-version: 1010200 # 1.1.2
+ //
+ std::map<string, uint64_t> versions;
+ };
+
+ // The manifest stored in <name>-<toolchain>/ consists of the machine
+ // manifest (original), toolchain manifest, and bootstrap result manifest.
+ //
+ class bootstrapped_machine_manifest
+ {
+ public:
+ machine_manifest machine;
+ toolchain_manifest toolchain;
+ bootstrap_manifest bootstrap;
+ };
+}
+
+#endif // BBOT_BOOTSTRAP_MANIFEST
diff --git a/doc/manual.cli b/doc/manual.cli
index 336e2dd..d5ab992 100644
--- a/doc/manual.cli
+++ b/doc/manual.cli
@@ -185,8 +185,9 @@ manifests is also sent by \c{bbot} agents to controllers.
\li|\n\c{type: <machine-type>}\n
- The machine type. Valid values are \c{vm} and \c{container}. Note that this
- value is not sent by agents to controllers.|
+ The machine type. Valid values are \c{kvm} (QEMU/KVM virtual machine) and
+ \c{nspawn} (\c{systemd-nspawn} container). Note that this value is not sent
+ by agents to controllers.|
\li|\n\c{summary: <string>}\n