aboutsummaryrefslogtreecommitdiff
path: root/bbot/agent
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-07-13 22:50:15 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-07-14 19:10:22 +0300
commitc8ace1ee0a6cab5fd4ea2f084ea436cfa513637d (patch)
treea8db884a665fbf14797393a3b2ff95438c338bb9 /bbot/agent
parent8e8d599b129d35f638f2c1957c869b054a38b021 (diff)
Make use of wildcards in buildfiles
Diffstat (limited to 'bbot/agent')
-rw-r--r--bbot/agent/agent.cli232
-rw-r--r--bbot/agent/agent.cxx1248
-rw-r--r--bbot/agent/agent.hxx45
-rw-r--r--bbot/agent/machine-manifest.cxx355
-rw-r--r--bbot/agent/machine-manifest.hxx118
-rw-r--r--bbot/agent/machine.cxx474
-rw-r--r--bbot/agent/machine.hxx84
-rw-r--r--bbot/agent/tftp.cxx137
-rw-r--r--bbot/agent/tftp.hxx47
9 files changed, 2740 insertions, 0 deletions
diff --git a/bbot/agent/agent.cli b/bbot/agent/agent.cli
new file mode 100644
index 0000000..a5dbe01
--- /dev/null
+++ b/bbot/agent/agent.cli
@@ -0,0 +1,232 @@
+// file : bbot/agent.cli
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+include <bbot/common.cli>;
+
+"\section=1"
+"\name=bbot-agent"
+"\summary=build bot agent"
+
+namespace bbot
+{
+ {
+ "<options> <url>",
+
+ "
+ \h|SYNOPSIS|
+
+ \cb{bbot-agent --help}\n
+ \cb{bbot-agent --version}\n
+ \c{\b{bbot-agent} [<options>] <url>...}
+
+ \h|DESCRIPTION|
+
+ \cb{bbot-agent} @@ TODO.
+
+ Note that on termination \cb{bbot-agent} may leave a working machine
+ snapshot behind. It is expected that the caller (normally Build OS
+ monitor) cleans them up before restarting the agent.
+ "
+ }
+
+ class agent_options
+ {
+ "\h|OPTIONS|"
+
+ bool --help {"Print usage information and exit."}
+ bool --version {"Print version and exit."}
+
+ bool --systemd-daemon
+ {
+ "Run as a simple systemd daemon."
+ }
+
+ path --auth-key
+ {
+ "<file>",
+ "Private key for the public key-based agent authentication. If not
+ specified, then the agent will not be able to request tasks from
+ controllers that require authentication.
+
+ The file is expected to contain a single PEM-encoded private key
+ without a password. A suitable key can be generated using the
+ following command:
+
+ \
+ $ openssl genrsa 4096 >key.pem
+ \
+ "
+ }
+
+ path --openssl = "openssl"
+ {
+ "<path>",
+ "The openssl program to be used for crypto operations. You can also
+ specify additional options that should be passed to the openssl program
+ with \cb{--openssl-option}. If the openssl program is not explicitly
+ specified, then \cb{bbot-agent} will use \cb{openssl} by default."
+ }
+
+ strings --openssl-option
+ {
+ "<opt>",
+ "Additional option to be passed to the openssl program (see
+ \cb{--openssl} for details). Repeat this option to specify multiple
+ openssl options."
+ }
+
+ size_t --cpu = 1
+ {
+ "<num>",
+ "Number of CPUs (threads) to use, 1 by default."
+ }
+
+ size_t --ram (1024 * 1024) // 1G
+ {
+ "<num>",
+ "Amount of RAM (in kB) to use, 1G by default."
+ }
+
+ string --toolchain-name = "default"
+ {
+ "<str>",
+ "Toolchain name, \cb{default} by default."
+ }
+
+ uint16_t --toolchain-num = 1
+ {
+ "<num>",
+ "Toolchain number, 1 by default."
+ }
+
+ standard_version --toolchain-ver
+ {
+ "<stdver>",
+ "Toolchain version. If unspecified, then the agent's version will be
+ used (which will be imprecise for snapshot versions)."
+ }
+
+ string --toolchain-id
+ {
+ "<str>",
+ "Toolchain id. If unspecified or empty, then no re-bootstrapping on
+ toolchain changes will be performed (which is primarily useful for
+ testing)."
+ }
+
+ strings --trust
+ {
+ "<fingerprint>",
+ "Trust repository certificate with a SHA256 <fingerprint>."
+ }
+
+ dir_path --machines = "/build/machines/"
+ {
+ "<dir>",
+ "The location of the build machines, \cb{/build/machines/} by default."
+ }
+
+ dir_path --tftp = "/build/tftp/"
+ {
+ "<dir>",
+ "The location of the TFTP server root, \cb{/build/tftp/} by default."
+ }
+
+ uint16_t --tftp-port = 23400
+ {
+ "<num>",
+ "TFTP server port base, 23400 by default. The actual port is calculated
+ by adding the toolchain number \c{--toolchain-num} to this value."
+ }
+
+ size_t --bootstrap-timeout = 900
+ {
+ "<sec>",
+ "Maximum number of seconds to wait for machine bootstrap completion,
+ 900 (15 minutes) by default."
+ }
+
+ size_t --bootstrap-retries = 2
+ {
+ "<num>",
+ "Number of time to retry a mis-booted bootstrap, 2 by default."
+ }
+
+ size_t --build-timeout = 1800
+ {
+ "<sec>",
+ "Maximum number of seconds to wait for build completion, 1800 (30
+ minutes) by default."
+ }
+
+ size_t --build-retries = 2
+ {
+ "<num>",
+ "Number of time to retry a mis-booted build, 2 by default."
+ }
+
+ size_t --request-timeout = 300
+ {
+ "<sec>",
+ "Maximum number of seconds to wait for controller request completion,
+ 300 (5 minutes) by default."
+ }
+
+ uint16_t --verbose = 1
+ {
+ "<level>",
+ "Set the diagnostics verbosity to <level> between 0 and 6 with level 1
+ being the default."
+ }
+
+ // Testing options.
+ //
+ bool --dump-machines
+ {
+ "Dump the available machines to \cb{stdout}, (re)-bootstrapping any if
+ necessary, and exit."
+ }
+
+ bool --dump-task
+ {
+ "Dump the received build task to \cb{stdout} and exit."
+ }
+
+ bool --dump-result
+ {
+ "Dump the obtained build result to \cb{stdout} and exit."
+ }
+
+ bool --fake-bootstrap
+ {
+ "Fake the machine bootstrap process by creating the expected bootstrapped
+ machine manifest."
+ }
+
+ bool --fake-build
+ {
+ "Fake the package building process by creating the aborted build result."
+ }
+
+ path --fake-machine
+ {
+ "<file>",
+ "Fake the machine enumeration process by reading the machine header
+ manifest from <file> (or \cb{stdin} if <file> is '\cb{-}')."
+ }
+
+ path --fake-request
+ {
+ "<file>",
+ "Fake the task request process by reading the task manifest from <file>
+ (or \cb{stdin} if <file> is '\cb{-}')."
+ }
+ };
+
+ "
+ \h|EXIT STATUS|
+
+ Non-zero exit status is returned in case of an error.
+ "
+}
diff --git a/bbot/agent/agent.cxx b/bbot/agent/agent.cxx
new file mode 100644
index 0000000..ff697db
--- /dev/null
+++ b/bbot/agent/agent.cxx
@@ -0,0 +1,1248 @@
+// file : bbot/agent/agent.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+#include <bbot/agent/agent.hxx>
+
+#include <pwd.h> // getpwuid()
+#include <limits.h> // PATH_MAX
+#include <signal.h> // signal()
+#include <stdlib.h> // rand_r()
+#include <unistd.h> // sleep(), realink(), getuid(), fsync()
+
+#include <net/if.h> // ifreq
+#include <netinet/in.h> // sockaddr_in
+#include <arpa/inet.h> // inet_ntop()
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include <chrono>
+#include <iostream>
+
+#include <libbutl/pager.hxx>
+#include <libbutl/sha256.hxx>
+#include <libbutl/openssl.hxx>
+#include <libbutl/filesystem.hxx> // dir_iterator
+
+#include <libbbot/manifest.hxx>
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+#include <bbot/diagnostics.hxx>
+
+#include <bbot/bootstrap-manifest.hxx>
+
+#include <bbot/agent/tftp.hxx>
+#include <bbot/agent/machine.hxx>
+#include <bbot/agent/machine-manifest.hxx>
+
+using namespace std;
+using namespace butl;
+using namespace bbot;
+
+namespace bbot
+{
+ agent_options ops;
+
+ const string bs_prot ("1");
+
+ string tc_name;
+ uint16_t tc_num;
+ standard_version tc_ver;
+ string tc_id;
+
+ string hname;
+ uid_t uid;
+ string uname;
+}
+
+static void
+file_sync (const path& f)
+{
+ auto_fd fd (fdopen (f, fdopen_mode::in));
+ if (fsync (fd.get ()) != 0)
+ throw_system_error (errno);
+}
+
+// The btrfs tool likes to print informational messages, like "Created
+// snapshot such and such". Luckily, it writes them to stdout while proper
+// diagnostics to stderr.
+//
+template <typename... A>
+inline void
+run_btrfs (tracer& t, A&&... a)
+{
+ if (verb >= 4)
+ run_io (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...);
+ else
+ run_io (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...);
+}
+
+template <typename... A>
+inline butl::process_exit::code_type
+btrfs_exit (tracer& t, A&&... a)
+{
+ return verb >= 4
+ ? run_io_exit (t, fdnull (), 2, 2, "btrfs", forward<A> (a)...)
+ : run_io_exit (t, fdnull (), fdnull (), 2, "btrfs", forward<A> (a)...);
+}
+
+// Bootstrap the machine. Return the bootstrapped machine manifest if
+// successful and nullopt otherwise (in which case the machine directory
+// should be cleaned and the machine ignored for now).
+//
+static optional<bootstrapped_machine_manifest>
+bootstrap_machine (const dir_path& md,
+ const machine_manifest& mm,
+ optional<bootstrapped_machine_manifest> obmm)
+{
+ tracer trace ("bootstrap_machine", md.string ().c_str ());
+
+ bootstrapped_machine_manifest r {
+ mm,
+ toolchain_manifest {tc_id.empty () ? "bogus" : tc_id},
+ bootstrap_manifest {
+ bootstrap_manifest::versions_type {
+ {"bbot", standard_version (BBOT_VERSION_STR)},
+ {"libbbot", standard_version (LIBBBOT_VERSION_STR)},
+ {"libbpkg", standard_version (LIBBPKG_VERSION_STR)},
+ {"libbutl", standard_version (LIBBUTL_VERSION_STR)}
+ }
+ }
+ };
+
+ if (ops.fake_bootstrap ())
+ {
+ r.machine.mac = "de:ad:be:ef:de:ad";
+ }
+ else
+ try
+ {
+ string br ("br1"); // Using private bridge for now.
+
+ // Start the TFTP server (server chroot is --tftp). Map:
+ //
+ // GET requests to .../toolchains/<name>/*
+ // PUT requests to .../bootstrap/<name>/*
+ //
+ auto_rmdir arm ((dir_path (ops.tftp ()) /= "bootstrap") /= tc_name);
+ try_mkdir_p (arm.path ());
+
+ // Bootstrap result manifest.
+ //
+ path mf (arm.path () / "manifest");
+ try_rmfile (mf);
+
+ // Note that unlike build, here we use the same VM snapshot for retries,
+ // which is not ideal.
+ //
+ for (size_t retry (0);; ++retry)
+ {
+ tftp_server tftpd ("Gr ^/?(.+)$ /toolchains/" + tc_name + "/\\1\n" +
+ "Pr ^/?(.+)$ /bootstrap/" + tc_name + "/\\1\n",
+ ops.tftp_port () + tc_num);
+
+ l3 ([&]{trace << "tftp server on port " << tftpd.port ();});
+
+ // Start the machine.
+ //
+ unique_ptr<machine> m (
+ start_machine (md,
+ mm,
+ obmm ? obmm->machine.mac : nullopt,
+ br,
+ tftpd.port ()));
+
+ {
+ // If we are terminating with an exception then force the machine down.
+ // Failed that, the machine's destructor will block waiting for its
+ // completion.
+ //
+ auto mg (
+ make_exception_guard (
+ [&m, &md] ()
+ {
+ info << "trying to force machine " << md << " down";
+ try {m->forcedown ();} catch (const failed&) {}
+ }));
+
+ // What happens if the bootstrap process hangs? The simple thing would
+ // be to force the machine down after some timeout and then fail. But
+ // that won't be very helpful for investigating the cause. So instead
+ // the plan is to suspend it after some timeout, issue diagnostics
+ // (without failing and which Build OS monitor will relay to the
+ // admin), and wait for the external intervention.
+ //
+ auto soft_fail = [&md, &m] (const char* msg)
+ {
+ {
+ diag_record dr (error);
+ dr << msg << " for machine " << md << ", suspending";
+ m->print_info (dr);
+ }
+ m->suspend ();
+ m->wait ();
+ info << "resuming after machine suspension";
+ return nullopt;
+ };
+
+ // The first request should be the toolchain download. Wait for up to
+ // 5 minutes for that to arrive. In a sense we use it as an indication
+ // that the machine has booted and the bootstrap process has started.
+ // Why wait so long you may wonder? Well, we may be using a new MAC
+ // address and operating systems like Windows may need to digest that.
+ //
+ size_t to;
+ const size_t startup_to (5 * 60);
+ const size_t bootstrap_to (ops.bootstrap_timeout ());
+ const size_t shutdown_to (5 * 60);
+
+ // This can mean two things: machine mis-configuration or what we
+ // euphemistically call a "mis-boot": the VM failed to boot for some
+ // unknown/random reason. Mac OS is particularly know for suffering
+ // from this. So the strategy is to retry it a couple of times and
+ // then suspend for investigation.
+ //
+ if (!tftpd.serve ((to = startup_to)))
+ {
+ if (retry > ops.bootstrap_retries ())
+ return soft_fail ("bootstrap startup timeout");
+
+ warn << "machine " << mm.name << " appears to have "
+ << "mis-booted, retrying";
+
+ try {m->forcedown (false);} catch (const failed&) {}
+ continue;
+ }
+
+ l3 ([&]{trace << "completed startup in " << startup_to - to << "s";});
+
+ // Next the bootstrap process may download additional toolchain
+ // archives, build things, and then upload the result manifest. So on
+ // our side we serve TFTP requests while periodically checking for the
+ // manifest file. To workaround some obscure filesystem races (the
+ // file's mtime/size is updated several seconds later; maybe tmpfs
+ // issue?), we periodically re-check.
+ //
+ for (to = bootstrap_to; to != 0; tftpd.serve (to, 2))
+ {
+ if (file_exists (mf))
+ {
+ file_sync (mf);
+ if (!file_empty (mf))
+ break;
+ }
+ }
+
+ if (to == 0)
+ return soft_fail ("bootstrap timeout");
+
+ l3 ([&]{trace << "completed bootstrap in " << bootstrap_to - to << "s";});
+
+ // Shut the machine down cleanly.
+ //
+ if (!m->shutdown ((to = shutdown_to)))
+ return soft_fail ("bootstrap shutdown timeout");
+
+ l3 ([&]{trace << "completed shutdown in " << shutdown_to - to << "s";});
+ }
+
+ // Parse the result manifest.
+ //
+ r.bootstrap = parse_manifest<bootstrap_manifest> (mf, "bootstrap");
+
+ r.machine.mac = m->mac; // Save the MAC address.
+
+ break;
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail << "bootstrap error: " << e;
+ }
+
+ serialize_manifest (r, md / "manifest", "bootstrapped machine");
+ return r;
+}
+
+// Return available machines and their directories as a parallel array.
+//
+static pair<bootstrapped_machine_manifests, dir_paths>
+enumerate_machines (const dir_path& machines)
+try
+{
+ tracer trace ("enumerate_machines", machines.string ().c_str ());
+
+ bootstrapped_machine_manifests rm;
+ dir_paths rd;
+
+ if (ops.fake_machine_specified ())
+ {
+ auto mh (
+ parse_manifest<machine_header_manifest> (
+ ops.fake_machine (), "machine header"));
+
+ rm.push_back (
+ bootstrapped_machine_manifest {
+ machine_manifest {
+ mh.id,
+ mh.name,
+ mh.summary,
+ machine_type::kvm,
+ string ("de:ad:be:ef:de:ad"),
+ nullopt},
+ toolchain_manifest {tc_id},
+ bootstrap_manifest {}
+ });
+
+ rd.push_back (dir_path (ops.machines ()) /= mh.name); // For diagnostics.
+
+ return make_pair (move (rm), move (rd));
+ }
+
+ // The first level are machine volumes.
+ //
+ for (const dir_entry& ve: dir_iterator (machines))
+ {
+ const string vn (ve.path ().string ());
+
+ // Ignore hidden directories.
+ //
+ if (ve.type () != entry_type::directory || vn[0] == '.')
+ continue;
+
+ const dir_path vd (dir_path (machines) /= vn);
+
+ // Inside we have machines.
+ //
+ try
+ {
+ for (const dir_entry& me: dir_iterator (vd))
+ {
+ const string mn (me.path ().string ());
+
+ if (me.type () != entry_type::directory || mn[0] == '.')
+ continue;
+
+ const dir_path md (dir_path (vd) /= mn);
+
+ // Our endgoal here is to obtain a bootstrapped snapshot of this
+ // machine while watching out for potential race conditions (machines
+ // being added/upgraded/removed; see the manual for details).
+ //
+ // So here is our overall plan:
+ //
+ // 1. Resolve current subvolume link for our bootstrap protocol.
+ //
+ // 2. If there is no link, cleanup and ignore this machine.
+ //
+ // 3. Try to create a snapshot of current subvolume (this operation is
+ // atomic). If failed (e.g., someone changed the link and removed
+ // the subvolume in the meantime), retry from #1.
+ //
+ // 4. Compare the snapshot to the already bootstrapped version (if
+ // any) and see if we need to re-bootstrap. If so, use the snapshot
+ // as a starting point. Rename to bootstrapped at the end (atomic).
+ //
+ dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -<P>
+ dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // -<toolchain>
+ bool te (dir_exists (tp));
+
+ auto delete_t = [&tp, &trace] ()
+ {
+ run_btrfs (trace, "property", "set", "-ts", tp, "ro", "false");
+ run_btrfs (trace, "subvolume", "delete", tp);
+ };
+
+ for (size_t retry (0);; ++retry)
+ {
+ if (retry != 0)
+ sleep (1);
+
+ // Resolve the link to subvolume path.
+ //
+ dir_path sp; // <name>-<P>.<R>
+ try
+ {
+ char b [PATH_MAX + 1];
+ ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b)));
+
+ if (r == -1)
+ {
+ if (errno != ENOENT)
+ throw_generic_error (errno);
+ }
+ else if (static_cast<size_t> (r) >= sizeof (b))
+ throw_generic_error (EINVAL);
+ else
+ {
+ b[r] = '\0';
+ sp = dir_path (b);
+ if (sp.relative ())
+ sp = md / sp;
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to read subvolume link " << lp << ": " << e;
+ }
+
+ // If the resolution fails, then this means there is no current
+ // machine subvolume (for this bootstrap protocol). In this case we
+ // clean up our toolchain subvolume (<name>-<toolchain>) and ignore
+ // this machine.
+ //
+ if (sp.empty ())
+ {
+ if (te)
+ delete_t ();
+
+ l3 ([&]{trace << "skipping " << md << ": no subvolume link";});
+ break;
+ }
+
+ // <name>-<toolchain>-<xxx>
+ //
+ const dir_path xp (
+ dir_path (md) /= path::traits::temp_name (mn + '-' + tc_name));
+
+ if (btrfs_exit (trace, "subvolume", "snapshot", sp, xp) != 0)
+ {
+ if (retry >= 10)
+ fail << "unable to snapshot subvolume " << sp;
+
+ continue;
+ }
+
+ // Load the (original) machine manifest.
+ //
+ auto mm (
+ parse_manifest<machine_manifest> (sp / "manifest", "machine"));
+
+ // If we already have <name>-<toolchain>, see if it needs to be re-
+ // bootstrapped. Things that render it obsolete:
+ //
+ // 1. New machine revision (compare machine ids).
+ // 2. New toolchain (compare toolchain ids).
+ // 3. New bbot/libbbot (compare versions).
+ //
+ // The last case has a complication: what should we do if we have
+ // bootstrapped a newer version of bbot? This would mean that we are
+ // about to be stopped and upgraded (and the upgraded version will
+ // probably be able to use the result). So we simply ignore this
+ // machine for this run.
+
+ // Return -1 if older, 0 if the same, and +1 if newer.
+ //
+ auto compare_bbot = [] (const bootstrap_manifest& m) -> int
+ {
+ auto cmp = [&m] (const string& n, const char* v) -> int
+ {
+ standard_version sv (v);
+ auto i = m.versions.find (n);
+
+ return (i == m.versions.end () || i->second < sv
+ ? -1
+ : i->second > sv ? 1 : 0);
+ };
+
+ // Start from the top assuming a new dependency cannot be added
+ // without changing the dependent's version.
+ //
+ int r;
+ return
+ (r = cmp ("bbot", BBOT_VERSION_STR)) != 0 ? r :
+ (r = cmp ("libbbot", LIBBBOT_VERSION_STR)) != 0 ? r :
+ (r = cmp ("libbpkg", LIBBPKG_VERSION_STR)) != 0 ? r :
+ (r = cmp ("libbutl", LIBBUTL_VERSION_STR)) != 0 ? r : 0;
+ };
+
+ optional<bootstrapped_machine_manifest> bmm;
+ if (te)
+ {
+ bmm = parse_manifest<bootstrapped_machine_manifest> (
+ tp / "manifest", "bootstrapped machine");
+
+ if (bmm->machine.id != mm.id)
+ {
+ l3 ([&]{trace << "re-bootstrapping " << tp << ": new machine";});
+ te = false;
+ }
+
+ if (!tc_id.empty () && bmm->toolchain.id != tc_id)
+ {
+ l3 ([&]{trace << "re-bootstrapping " << tp << ": new toolchain";});
+ te = false;
+ }
+
+ if (int i = compare_bbot (bmm->bootstrap))
+ {
+ if (i < 0)
+ {
+ l3 ([&]{trace << "re-bootstrapping " << tp << ": new bbot";});
+ te = false;
+ }
+ else
+ {
+ l3 ([&]{trace << "ignoring " << tp << ": old bbot";});
+ run_btrfs (trace, "subvolume", "delete", xp);
+ break;
+ }
+ }
+
+ if (!te)
+ delete_t ();
+ }
+ else
+ l3 ([&]{trace << "bootstrapping " << tp;});
+
+ if (!te)
+ {
+ // Use the <name>-<toolchain>-<xxx> snapshot that we have made to
+ // bootstrap the new machine. Then atomically rename it to
+ // <name>-<toolchain>.
+ //
+ bmm = bootstrap_machine (xp, mm, move (bmm));
+
+ if (!bmm)
+ {
+ l3 ([&]{trace << "ignoring " << tp << ": failed to bootstrap";});
+ run_btrfs (trace, "subvolume", "delete", xp);
+ break;
+ }
+
+ try
+ {
+ mvdir (xp, tp);
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to rename " << xp << " to " << tp;
+ }
+
+ l2 ([&]{trace << "bootstrapped " << bmm->machine.name;});
+
+ // Check the bootstrapped bbot version as above and ignore this
+ // machine if it's newer than us.
+ //
+ if (int i = compare_bbot (bmm->bootstrap))
+ {
+ if (i > 0)
+ {
+ l3 ([&]{trace << "ignoring " << tp << ": old bbot";});
+ break;
+ }
+ else
+ warn << "bootstrapped " << tp << " bbot worker is older "
+ << "than agent; assuming test setup";
+ }
+ }
+ else
+ run_btrfs (trace, "subvolume", "delete", xp);
+
+ // Add the machine to the lists.
+ //
+ rm.push_back (move (*bmm));
+ rd.push_back (move (tp));
+
+ break;
+ }
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to iterate over " << vd << ": " << e << endf;
+ }
+ }
+
+ return make_pair (move (rm), move (rd));
+}
+catch (const system_error& e)
+{
+ fail << "unable to iterate over " << machines << ": " << e << endf;
+}
+
+static result_manifest
+perform_task (const dir_path& md,
+ const bootstrapped_machine_manifest& mm,
+ const task_manifest& tm)
+try
+{
+ tracer trace ("perform_task", md.string ().c_str ());
+
+ result_manifest r {
+ tm.name,
+ tm.version,
+ result_status::abort,
+ operation_results {}};
+
+ if (ops.fake_build ())
+ return r;
+
+ // The overall plan is as follows:
+ //
+ // 1. Snapshot the (bootstrapped) machine.
+ //
+ // 2. Save the task manifest to the TFTP directory (to be accessed by the
+ // worker).
+ //
+ // 3. Start the TFTP server and the machine.
+ //
+ // 4. Serve TFTP requests while watching out for the result manifest.
+ //
+ // 5. Clean up (force the machine down and delete the snapshot).
+ //
+
+ // TFTP server mapping (server chroot is --tftp):
+ //
+ // GET requests to .../build/<name>/get/*
+ // PUT requests to .../build/<name>/put/*
+ //
+ auto_rmdir arm ((dir_path (ops.tftp ()) /= "build") /= tc_name);
+
+ dir_path gd (dir_path (arm.path ()) /= "get");
+ dir_path pd (dir_path (arm.path ()) /= "put");
+
+ try_mkdir_p (gd);
+ try_mkdir_p (pd);
+
+ path tf (gd / "manifest"); // Task manifest file.
+ path rf (pd / "manifest"); // Result manifest file.
+
+ serialize_manifest (tm, tf, "task");
+
+ if (ops.fake_machine_specified ())
+ {
+ // Simply wait for the file to appear.
+ //
+ for (size_t i (0); !file_exists (rf); sleep (1))
+ if (i++ % 10 == 0)
+ l3 ([&]{trace << "waiting for result manifest";});
+
+ r = parse_manifest<result_manifest> (rf, "result");
+ }
+ else
+ {
+ try_rmfile (rf);
+
+ // <name>-<toolchain>-<xxx>
+ //
+ const dir_path xp (
+ md.directory () /= path::traits::temp_name (md.leaf ().string ()));
+
+ string br ("br1"); // Using private bridge for now.
+
+ for (size_t retry (0);; ++retry)
+ {
+ if (retry != 0)
+ run_btrfs (trace, "subvolume", "delete", xp);
+
+ run_btrfs (trace, "subvolume", "snapshot", md, xp);
+
+ // Start the TFTP server.
+ //
+ tftp_server tftpd ("Gr ^/?(.+)$ /build/" + tc_name + "/get/\\1\n" +
+ "Pr ^/?(.+)$ /build/" + tc_name + "/put/\\1\n",
+ ops.tftp_port () + tc_num);
+
+ l3 ([&]{trace << "tftp server on port " << tftpd.port ();});
+
+ // Start the machine.
+ //
+ unique_ptr<machine> m (
+ start_machine (xp,
+ mm.machine,
+ mm.machine.mac,
+ br,
+ tftpd.port ()));
+
+ // Note: the machine handling logic is similar to bootstrap.
+ //
+ {
+ auto mg (
+ make_exception_guard (
+ [&m, &xp] ()
+ {
+ info << "trying to force machine " << xp << " down";
+ try {m->forcedown ();} catch (const failed&) {}
+ }));
+
+ auto soft_fail = [&xp, &m, &r] (const char* msg)
+ {
+ {
+ diag_record dr (error);
+ dr << msg << " for machine " << xp << ", suspending";
+ m->print_info (dr);
+ }
+ m->suspend ();
+ m->wait ();
+ info << "resuming after machine suspension";
+ return r;
+ };
+
+ // The first request should be the task manifest download. Wait for up
+ // to 60 seconds for that to arrive. In a sense we use it as an
+ // indication that the machine has booted and the worker process has
+ // started.
+ //
+ size_t to;
+ const size_t startup_to (60);
+ const size_t build_to (ops.build_timeout ());
+
+ if (!tftpd.serve ((to = startup_to)))
+ {
+ if (retry > ops.build_retries ())
+ return soft_fail ("build startup timeout");
+
+ warn << "machine " << mm.machine.name << " appears to have "
+ << "mis-booted, retrying";
+
+ try {m->forcedown (false);} catch (const failed&) {}
+ continue;
+ }
+
+ l3 ([&]{trace << "completed startup in " << startup_to - to << "s";});
+
+ // Next the worker builds things and then uploads the result manifest.
+ // So on our side we serve TFTP requests while checking for the
+ // manifest file. To workaround some obscure filesystem races (the
+ // file's mtime/size is updated several seconds later; maybe tmpfs
+ // issue?), we periodically re-check.
+ //
+ for (to = build_to; to != 0; tftpd.serve (to, 2))
+ {
+ if (file_exists (rf))
+ {
+ file_sync (rf);
+ if (!file_empty (rf))
+ break;
+ }
+ }
+
+ if (to == 0)
+ return soft_fail ("build timeout");
+
+ l3 ([&]{trace << "completed build in " << build_to - to << "s";});
+
+ // Parse the result manifest.
+ //
+ try
+ {
+ r = parse_manifest<result_manifest> (rf, "result", false);
+ }
+ catch (const failed&)
+ {
+ r.status = result_status::abnormal; // Soft-fail below.
+ }
+
+ if (r.status == result_status::abnormal)
+ {
+ // If the build terminated abnormally, suspend the machine for
+ // investigation.
+ //
+ return soft_fail ("build terminated abnormally");
+ }
+ else
+ {
+ // Force the machine down (there is no need wasting time on clean
+ // shutdown since the next step is to drop the snapshot). Also fail
+ // softly if things go badly.
+ //
+ try {m->forcedown (false);} catch (const failed&) {}
+ }
+ }
+
+ run_btrfs (trace, "subvolume", "delete", xp);
+ break;
+ }
+ }
+
+ // Update package name/version if the returned value as "unknown".
+ //
+ if (r.version == bpkg::version ("0"))
+ {
+ assert (r.status == result_status::abnormal);
+
+ r.name = tm.name;
+ r.version = tm.version;
+ }
+
+ return r;
+}
+catch (const system_error& e)
+{
+ fail << "build error: " << e << endf;
+}
+
+extern "C" void
+handle_signal (int sig)
+{
+ switch (sig)
+ {
+ case SIGHUP: exit (3); // Unimplemented feature.
+ case SIGTERM: exit (0);
+ default: assert (false);
+ }
+}
+
+int
+main (int argc, char* argv[])
+try
+{
+ cli::argv_scanner scan (argc, argv, true);
+ ops.parse (scan);
+
+ verb = ops.verbose ();
+
+ if (ops.systemd_daemon ())
+ systemd_diagnostics (true); // With critical errors.
+
+ tracer trace ("main");
+
+ uid = getuid ();
+ uname = getpwuid (uid)->pw_name;
+
+ {
+ char buf[HOST_NAME_MAX + 1];
+
+ if (gethostname (buf, sizeof (buf)) == -1)
+ fail << "unable to obtain hostname: "
+ << system_error (errno, generic_category ()); // Sanitize.
+
+ hname = buf;
+ }
+
+ // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if
+ // the pipe reading end is closed. Note that by default this signal
+ // terminates a process. Also note that there is no way to disable this
+ // behavior on a file descriptor basis or for the write() function call.
+ //
+ if (signal (SIGPIPE, SIG_IGN) == SIG_ERR)
+ fail << "unable to ignore broken pipe (SIGPIPE) signal: "
+ << system_error (errno, generic_category ()); // Sanitize.
+
+ // Version.
+ //
+ if (ops.version ())
+ {
+ cout << "bbot-agent " << BBOT_VERSION_ID << endl
+ << "libbbot " << LIBBBOT_VERSION_ID << endl
+ << "libbpkg " << LIBBBOT_VERSION_ID << endl
+ << "libbutl " << LIBBUTL_VERSION_ID << endl
+ << "Copyright (c) 2014-2017 Code Synthesis Ltd" << endl
+ << "TBC; All rights reserved" << endl;
+
+ return 0;
+ }
+
+ // Help.
+ //
+ if (ops.help ())
+ {
+ pager p ("bbot-agent help", false);
+ print_bbot_agent_usage (p.stream ());
+
+ // If the pager failed, assume it has issued some diagnostics.
+ //
+ return p.wait () ? 0 : 1;
+ }
+
+ tc_name = ops.toolchain_name ();
+ tc_num = ops.toolchain_num ();
+ tc_ver = (ops.toolchain_ver_specified ()
+ ? ops.toolchain_ver ()
+ : standard_version (BBOT_VERSION_STR));
+ tc_id = ops.toolchain_id ();
+
+
+ // Controller URLs.
+ //
+ if (argc < 2 &&
+ !ops.dump_machines () &&
+ !ops.fake_request_specified ())
+ {
+ fail << "controller url expected" <<
+ info << "run " << argv[0] << " --help for details";
+ }
+
+ strings controllers;
+
+ for (int i (1); i != argc; ++i)
+ controllers.push_back (argv[i]);
+
+ // Handle SIGHUP and SIGTERM.
+ //
+ if (signal (SIGHUP, &handle_signal) == SIG_ERR ||
+ signal (SIGTERM, &handle_signal) == SIG_ERR)
+ fail << "unable to set signal handler: "
+ << system_error (errno, generic_category ()); // Sanitize.
+
+ optional<string> fingerprint;
+
+ if (ops.auth_key_specified ())
+ try
+ {
+ // Note that the process always prints to STDERR, so we redirect it to the
+ // null device. We also check for the key file existence to print more
+ // meaningful error message if that's not the case.
+ //
+ if (!file_exists (ops.auth_key ()))
+ throw_generic_error (ENOENT);
+
+ openssl os (trace,
+ ops.auth_key (), path ("-"), fdnull (),
+ ops.openssl (), "rsa",
+ ops.openssl_option (), "-pubout", "-outform", "DER");
+
+ vector<char> k (os.in.read_binary ());
+ os.in.close ();
+
+ if (!os.wait ())
+ throw_generic_error (EIO);
+
+ fingerprint = sha256 (k.data (), k.size ()).string ();
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to obtain authentication public key: " << e;
+ }
+
+ if (ops.systemd_daemon ())
+ {
+ diag_record dr;
+
+ dr << info << "bbot agent " << BBOT_VERSION_ID;
+
+ if (fingerprint)
+ dr << info << "auth key fp " << *fingerprint;
+
+ dr <<
+ info << "toolchain name " << tc_name <<
+ info << "toolchain num " << tc_num <<
+ info << "toolchain ver " << tc_ver.string () <<
+ info << "toolchain id " << tc_id <<
+ info << "CPU(s) " << ops.cpu () <<
+ info << "RAM(kB) " << ops.ram ();
+
+ for (const string& u: controllers)
+ dr << info << "controller url " << u;
+ }
+
+ // The work loop. The steps we go through are:
+ //
+ // 1. Enumerate the available machines, (re-)bootstrapping any if necessary.
+ //
+ // 2. Poll controller(s) for build tasks.
+ //
+ // 3. If no build tasks are available, go to #1 (after sleeping a bit).
+ //
+ // 4. If a build task is returned, do it, upload the result, and go to #1
+ // (immediately).
+ //
+ for (bool sleep (false);; ::sleep (sleep ? 60 : 0), sleep = false)
+ {
+ // Enumerate the machines.
+ //
+ auto mp (enumerate_machines (ops.machines ()));
+ bootstrapped_machine_manifests& ms (mp.first);
+ dir_paths& ds (mp.second);
+
+ // Prepare task request.
+ //
+ task_request_manifest tq {
+ hname,
+ tc_name,
+ tc_ver,
+ fingerprint,
+ machine_header_manifests {}
+ };
+
+ for (const bootstrapped_machine_manifest& m: ms)
+ tq.machines.emplace_back (m.machine.id,
+ m.machine.name,
+ m.machine.summary);
+
+ if (ops.dump_machines ())
+ {
+ for (const machine_header_manifest& m: tq.machines)
+ serialize_manifest (m, cout, "stdout", "machine");
+
+ return 0;
+ }
+
+ if (tq.machines.empty ())
+ {
+ warn << "no build machines for toolchain " << tc_name;
+ sleep = true;
+ continue;
+ }
+
+ // Send task requests.
+ //
+ //
+ string url;
+ task_response_manifest tr;
+
+ if (ops.fake_request_specified ())
+ {
+ auto t (parse_manifest<task_manifest> (ops.fake_request (), "task"));
+
+ tr = task_response_manifest {
+ "fake-session", // Dummy session.
+ nullopt, // No challenge.
+ url, // Empty result URL.
+ move (t)};
+
+ url = "http://example.org";
+ }
+ else
+ {
+ for (const string& u: controllers)
+ {
+ try
+ {
+ http_curl c (trace,
+ path ("-"),
+ path ("-"),
+ curl::post,
+ u,
+ "--header", "Content-Type: text/manifest",
+ "--max-time", ops.request_timeout ());
+
+ // This is tricky/hairy: we may fail hard parsing the output before
+ // seeing that curl exited with an error and failing softly.
+ //
+ bool f (false);
+
+ try
+ {
+ serialize_manifest (tq, c.out, u, "task request", false);
+ }
+ catch (const failed&) {f = true;}
+
+ c.out.close ();
+
+ if (!f)
+ try
+ {
+ tr = parse_manifest<task_response_manifest> (
+ c.in, u, "task response", false);
+ }
+ catch (const failed&) {f = true;}
+
+ c.in.close ();
+
+ if (!c.wait () || f)
+ throw_generic_error (EIO);
+ }
+ catch (const system_error& e)
+ {
+ error << "unable to request task from " << u << ": " << e;
+ continue;
+ }
+
+ if (tr.challenge && !fingerprint) // Controller misbehaves.
+ {
+ error << "unexpected challenge from " << u << ": " << *tr.challenge;
+ continue;
+ }
+
+ if (!tr.session.empty ()) // Got a task.
+ {
+ url = u;
+
+ task_manifest& t (*tr.task);
+ l2 ([&]{trace << "task for " << t.name << '/' << t.version << " "
+ << "on " << t.machine << " "
+ << "from " << url;});
+ break;
+ }
+ }
+ }
+
+ if (tr.session.empty ()) // No task from any of the controllers.
+ {
+ l2 ([&]{trace << "no tasks from any controllers, sleeping";});
+ sleep = true;
+ continue;
+ }
+
+ // We have a build task.
+ //
+ // First find the index of the machine we were asked to use (and also
+ // verify it is one of those we sent).
+ //
+ size_t i (0);
+ for (const machine_header_manifest& m: tq.machines)
+ {
+ if (m.name == tr.task->machine)
+ break;
+
+ ++i;
+ }
+
+ if (i == ms.size ())
+ {
+ error << "task from " << url << " for unknown machine "
+ << tr.task->machine;
+
+ if (ops.dump_task ())
+ return 0;
+
+ continue;
+ }
+
+ task_manifest& t (*tr.task);
+
+ if (ops.dump_task ())
+ {
+ serialize_manifest (t, cout, "stdout", "task");
+ return 0;
+ }
+
+ // If we have our own repository certificate fingerprints, then use them
+ // to replace what we have received from the controller.
+ //
+ if (!ops.trust ().empty ())
+ t.trust = ops.trust ();
+
+ const dir_path& d (ds[i]); // The -<toolchain> directory.
+ const bootstrapped_machine_manifest& m (ms[i]);
+
+ result_manifest r (perform_task (d, m, t));
+
+ if (ops.dump_result ())
+ {
+ serialize_manifest (r, cout, "stdout", "result");
+ return 0;
+ }
+
+ // Prepare answer to the private key challenge.
+ //
+ optional<vector<char>> challenge;
+
+ if (tr.challenge)
+ try
+ {
+ assert (ops.auth_key_specified ());
+
+ openssl os (trace,
+ fdstream_mode::text, path ("-"), 2,
+ ops.openssl (), "rsautl",
+ ops.openssl_option (), "-sign", "-inkey", ops.auth_key ());
+
+ os.out << *tr.challenge;
+ os.out.close ();
+
+ challenge = os.in.read_binary ();
+ os.in.close ();
+
+ if (!os.wait ())
+ throw_generic_error (EIO);
+ }
+ catch (const system_error& e)
+ {
+ // The task response challenge is valid (verified by manifest parser),
+ // so there is something wrong with setup, and so the failure is fatal.
+ //
+ fail << "unable to sign task response challenge: " << e;
+ }
+
+ // Upload the result.
+ //
+ result_request_manifest rq {tr.session, move (challenge), move (r)};
+ {
+ const string& u (*tr.result_url);
+
+ try
+ {
+ http_curl c (trace,
+ path ("-"),
+ nullfd, // Not expecting any data in response.
+ curl::post,
+ u,
+ "--header", "Content-Type: text/manifest",
+ "--max-time", ops.request_timeout ());
+
+ // This is tricky/hairy: we may fail hard writing the input before
+ // seeing that curl exited with an error and failing softly.
+ //
+ bool f (false);
+
+ try
+ {
+ serialize_manifest (rq, c.out, u, "task request");
+ }
+ catch (const failed&) {f = true;}
+
+ c.out.close ();
+
+ if (!c.wait () || f)
+ throw_generic_error (EIO);
+ }
+ catch (const system_error& e)
+ {
+ error << "unable to upload result to " << u << ": " << e;
+ continue;
+ }
+ }
+
+ l2 ([&]{trace << "built " << t.name << '/' << t.version << " "
+ << "on " << t.machine << " "
+ << "for " << url;});
+ }
+}
+catch (const failed&)
+{
+ return 1; // Diagnostics has already been issued.
+}
+catch (const cli::exception& e)
+{
+ error << e;
+ return 1;
+}
+
+namespace bbot
+{
+ static unsigned int rand_seed; // Seed for rand_r();
+
+ size_t
+ genrand ()
+ {
+ if (rand_seed == 0)
+ rand_seed = static_cast<unsigned int> (
+ chrono::system_clock::now ().time_since_epoch ().count ());
+
+ return static_cast<size_t> (rand_r (&rand_seed));
+ }
+
+ // Note: Linux-specific implementation.
+ //
+ string
+ iface_addr (const string& i)
+ {
+ if (i.size () >= IFNAMSIZ)
+ throw invalid_argument ("interface nama too long");
+
+ auto_fd fd (socket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0));
+
+ if (fd.get () == -1)
+ throw_system_error (errno);
+
+ ifreq ifr;
+ ifr.ifr_addr.sa_family = AF_INET;
+ strcpy (ifr.ifr_name, i.c_str ());
+
+ if (ioctl (fd.get (), SIOCGIFADDR, &ifr) == -1)
+ throw_system_error (errno);
+
+ char buf[3 * 4 + 3 + 1]; // IPv4 address.
+ if (inet_ntop (AF_INET,
+ &reinterpret_cast<sockaddr_in*> (&ifr.ifr_addr)->sin_addr,
+ buf,
+ sizeof (buf)) == nullptr)
+ throw_system_error (errno);
+
+ return buf;
+ }
+}
diff --git a/bbot/agent/agent.hxx b/bbot/agent/agent.hxx
new file mode 100644
index 0000000..96876bc
--- /dev/null
+++ b/bbot/agent/agent.hxx
@@ -0,0 +1,45 @@
+// file : bbot/agent/agent.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+#ifndef BBOT_AGENT_AGENT_HXX
+#define BBOT_AGENT_AGENT_HXX
+
+#include <sys/types.h> // uid_t
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+
+#include <bbot/agent/agent-options.hxx>
+
+namespace bbot
+{
+ extern agent_options ops;
+
+ extern const string bs_prot; // Bootstrap protocol version.
+
+ extern string tc_name; // Toolchain name.
+ extern uint16_t tc_num; // Toolchain number.
+ extern standard_version tc_ver; // Toolchain version.
+ extern string tc_id; // Toolchain id.
+
+ extern string hname; // Our host name.
+ extern uid_t uid; // Our effective user id.
+ extern string uname; // Our effective user name.
+
+ // Random number generator (currently not MT-safe and limited to RAND_MAX).
+ //
+ size_t
+ genrand ();
+
+ template <typename T>
+ inline T
+ genrand () {return static_cast<T> (genrand ());}
+
+ // Return the IPv4 address of an interface.
+ //
+ string
+ iface_addr (const string&);
+}
+
+#endif // BBOT_AGENT_AGENT_HXX
diff --git a/bbot/agent/machine-manifest.cxx b/bbot/agent/machine-manifest.cxx
new file mode 100644
index 0000000..3312d1b
--- /dev/null
+++ b/bbot/agent/machine-manifest.cxx
@@ -0,0 +1,355 @@
+// file : bbot/agent/machine-manifest.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+#include <bbot/agent/machine-manifest.hxx>
+
+#include <sstream>
+
+#include <libbutl/tab-parser.hxx>
+#include <libbutl/string-parser.hxx>
+#include <libbutl/manifest-parser.hxx>
+#include <libbutl/manifest-serializer.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+ using parser = manifest_parser;
+ using parsing = manifest_parsing;
+ using serializer = manifest_serializer;
+ using serialization = manifest_serialization;
+ using name_value = manifest_name_value;
+
+ // machine_type
+ //
+ string
+ to_string (machine_type t)
+ {
+ switch (t)
+ {
+ case machine_type::kvm: return "kvm";
+ case machine_type::nspawn: return "nspawn";
+ }
+
+ assert (false);
+ return string ();
+ }
+
+ machine_type
+ to_machine_type (const string& t)
+ {
+ if (t == "kvm") return machine_type::kvm;
+ else if (t == "nspawn") return machine_type::nspawn;
+ else throw invalid_argument ("invalid machine type '" + t + "'");
+ }
+
+ // machine_manifest
+ //
+ machine_manifest::
+ machine_manifest (parser& p, bool iu)
+ : machine_manifest (p, p.next (), iu)
+ {
+ // Make sure this is the end.
+ //
+ name_value nv (p.next ());
+ if (!nv.empty ())
+ throw parsing (p.name (), nv.name_line, nv.name_column,
+ "single machine manifest expected");
+ }
+
+ machine_manifest::
+ machine_manifest (parser& p, name_value nv, bool iu)
+ : machine_header_manifest (p, move (nv), unknown_name_mode::stop, &nv)
+ {
+ auto bad_name = [&p, &nv] (const string& d)
+ {
+ throw parsing (p.name (), nv.name_line, nv.name_column, d);
+ };
+
+ // Offsets are used to tie an error to the specific position inside a
+ // manifest value (possibly a multiline one).
+ //
+ auto bad_value = [&p, &nv] (
+ const string& d, uint64_t column_offset = 0, uint64_t line_offset = 0)
+ {
+ throw parsing (p.name (),
+ nv.value_line + line_offset,
+ (line_offset == 0 ? nv.value_column : 1) + column_offset,
+ d);
+ };
+
+ optional<machine_type> type;
+
+ for (; !nv.empty (); nv = p.next ())
+ {
+ string& n (nv.name);
+ string& v (nv.value);
+
+ if (n == "type")
+ {
+ if (type)
+ bad_name ("machine type redefinition");
+
+ try
+ {
+ type = to_machine_type (v);
+ }
+ catch (const invalid_argument&)
+ {
+ bad_value ("invalid machine type");
+ }
+ }
+ else if (n == "mac")
+ {
+ if (mac)
+ bad_name ("machine mac redefinition");
+
+ // @@ Should we check that the value is a valid mac?
+ //
+ mac = move (v);
+ }
+ else if (n == "options")
+ {
+ if (options)
+ bad_name ("machine options redefinition");
+
+ strings op;
+
+ // Note that when reporting errors we combine the manifest value
+ // position with the respective error position.
+ //
+ try
+ {
+ istringstream is (v);
+ tab_parser parser (is, "");
+
+ tab_fields tl;
+ while (!(tl = parser.next ()).empty ())
+ {
+ for (auto& tf: tl)
+ op.emplace_back (move (tf.value));
+ }
+ }
+ catch (const tab_parsing& e)
+ {
+ bad_value ("invalid machine options: " + e.description,
+ e.column - 1,
+ e.line - 1);
+ }
+
+ if (op.empty ())
+ bad_value ("empty machine options");
+
+ options = move (op);
+ }
+ else if (!iu)
+ bad_name ("unknown name '" + n + "' in machine manifest");
+ }
+
+ // Verify all non-optional values were specified.
+ //
+ if (!type)
+ bad_value ("no machine type specified");
+
+ this->type = *type;
+ }
+
+ void machine_manifest::
+ serialize (serializer& s) const
+ {
+ // @@ Should we check that all non-optional values are specified and all
+ // values are valid?
+ //
+
+ machine_header_manifest::serialize (s, false);
+
+ s.next ("type", to_string (type));
+
+ if (mac)
+ s.next ("mac", *mac);
+
+ // Recompose options string as a space-separated option list,
+ //
+ if (options)
+ {
+ string v;
+ for (auto b (options->cbegin ()), i (b), e (options->cend ()); i != e;
+ ++i)
+ {
+ if (i != b)
+ v += ' ';
+
+ v += *i;
+ }
+
+ s.next ("options", v);
+ }
+
+ s.next ("", ""); // End of manifest.
+ }
+
+ strings machine_manifest::
+ unquoted_options () const
+ {
+ return options
+ ? string_parser::unquote (*options)
+ : strings ();
+ }
+
+ // toolchain_manifest
+ //
+ toolchain_manifest::
+ toolchain_manifest (parser& p, bool iu)
+ : toolchain_manifest (p, p.next (), iu)
+ {
+ // Make sure this is the end.
+ //
+ name_value nv (p.next ());
+ if (!nv.empty ())
+ throw parsing (p.name (), nv.name_line, nv.name_column,
+ "single toolchain manifest expected");
+ }
+
+ toolchain_manifest::
+ toolchain_manifest (parser& p, name_value nv, bool iu)
+ {
+ auto bad_name = [&p, &nv] (const string& d)
+ {
+ throw parsing (p.name (), nv.name_line, nv.name_column, d);
+ };
+
+ auto bad_value = [&p, &nv] (const string& d)
+ {
+ throw parsing (p.name (), nv.value_line, nv.value_column, d);
+ };
+
+ // Make sure this is the start and we support the version.
+ //
+ if (!nv.name.empty ())
+ bad_name ("start of toolchain manifest expected");
+
+ if (nv.value != "1")
+ bad_value ("unsupported format version");
+
+ // Parse the toolchain manifest.
+ //
+ for (nv = p.next (); !nv.empty (); nv = p.next ())
+ {
+ string& n (nv.name);
+ string& v (nv.value);
+
+ if (n == "id")
+ {
+ if (!id.empty ())
+ bad_name ("toolchain id redefinition");
+
+ if (v.empty ())
+ bad_value ("empty toolchain id");
+
+ id = move (v);
+ }
+ else if (!iu)
+ bad_name ("unknown name '" + n + "' in toolchain manifest");
+ }
+
+ // Verify all non-optional values were specified.
+ //
+ if (id.empty ())
+ bad_value ("no toolchain id specified");
+ }
+
+ void toolchain_manifest::
+ serialize (serializer& s) const
+ {
+ // @@ Should we check that all non-optional values are specified?
+ //
+ s.next ("", "1"); // Start of manifest.
+ s.next ("id", id);
+ s.next ("", ""); // End of manifest.
+ }
+
+ // bootstrapped_machine_manifest
+ //
+ bootstrapped_machine_manifest::
+ bootstrapped_machine_manifest (parser& p, bool iu)
+ {
+ name_value nv (p.next ());
+
+ auto bad_name = [&p, &nv] (const string& d)
+ {
+ throw parsing (p.name (), nv.name_line, nv.name_column, d);
+ };
+
+ auto bad_value = [&p, &nv] (const string& d)
+ {
+ throw parsing (p.name (), nv.value_line, nv.value_column, d);
+ };
+
+ // Make sure this is the start and we support the version.
+ //
+ if (!nv.name.empty ())
+ bad_name ("start of bootstrapped machine manifest expected");
+
+ if (nv.value != "1")
+ bad_value ("unsupported format version");
+
+ // Parse the bootstrapped machine manifest. Currently there is no values
+ // expected.
+ //
+ for (nv = p.next (); !nv.empty (); nv = p.next ())
+ {
+ if (!iu)
+ bad_name ("unknown name '" + nv.name +
+ "' in bootstrapped machine manifest");
+ }
+
+ nv = p.next ();
+ if (nv.empty ())
+ bad_value ("machine manifest expected");
+
+ machine = machine_manifest (p, nv, iu);
+
+ if (!machine.mac)
+ bad_name ("mac address must be present in machine manifest");
+
+ nv = p.next ();
+ if (nv.empty ())
+ bad_value ("toolchain manifest expected");
+
+ toolchain = toolchain_manifest (p, nv, iu);
+
+ nv = p.next ();
+ if (nv.empty ())
+ bad_value ("bootstrap manifest expected");
+
+ bootstrap = bootstrap_manifest (p, nv, iu);
+
+ // Make sure this is the end.
+ //
+ nv = p.next ();
+ if (!nv.empty ())
+ throw parsing (p.name (), nv.name_line, nv.name_column,
+ "single bootstrapped machine manifest expected");
+ }
+
+ void bootstrapped_machine_manifest::
+ serialize (serializer& s) const
+ {
+ // @@ Should we check that all non-optional values are specified?
+ //
+ s.next ("", "1"); // Start of manifest.
+ s.next ("", ""); // End of manifest.
+
+ if (!machine.mac)
+ throw serialization (s.name (),
+ "mac address must be present in machine manifest");
+
+ machine.serialize (s);
+ toolchain.serialize (s);
+ bootstrap.serialize (s);
+
+ s.next ("", ""); // End of stream.
+ }
+}
diff --git a/bbot/agent/machine-manifest.hxx b/bbot/agent/machine-manifest.hxx
new file mode 100644
index 0000000..37919ba
--- /dev/null
+++ b/bbot/agent/machine-manifest.hxx
@@ -0,0 +1,118 @@
+// file : bbot/agent/machine-manifest.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+#ifndef BBOT_AGENT_MACHINE_MANIFEST_HXX
+#define BBOT_AGENT_MACHINE_MANIFEST_HXX
+
+#include <map>
+
+#include <libbutl/manifest-forward.hxx>
+
+#include <libbbot/manifest.hxx> // machine_header
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+
+#include <bbot/bootstrap-manifest.hxx>
+
+namespace bbot
+{
+ // Machine type.
+ //
+ enum class machine_type {kvm, nspawn};
+
+ string
+ to_string (machine_type);
+
+ machine_type
+ to_machine_type (const string&); // Throws invalid_argument.
+
+ // Machine.
+ //
+ class machine_manifest: public machine_header_manifest
+ {
+ public:
+ machine_type type;
+ optional<string> mac; // Required in bootstrapped machine manifest.
+ optional<strings> options; // Note: could be quoted.
+
+ strings
+ unquoted_options () const; // Return empty if absent.
+
+ machine_manifest (std::string i,
+ std::string n,
+ std::string s,
+ machine_type t,
+ optional<string> m,
+ optional<strings> o)
+ : machine_header_manifest (std::move (i),
+ std::move (n),
+ std::move (s)),
+ type (t),
+ mac (std::move (m)),
+ options (std::move (o)) {}
+
+ public:
+ machine_manifest () = default; // VC export.
+ machine_manifest (butl::manifest_parser&, bool ignore_unknown = false);
+ machine_manifest (butl::manifest_parser&,
+ butl::manifest_name_value start,
+ bool ignore_unknown = false);
+
+ void
+ serialize (butl::manifest_serializer&) const;
+ };
+
+ // Toolchain.
+ //
+ class toolchain_manifest
+ {
+ public:
+
+ // Toolchain id (SHAXXX).
+ //
+ string id;
+
+ explicit
+ toolchain_manifest (string i): id (i) {}
+
+ public:
+ toolchain_manifest () = default; // VC export.
+ toolchain_manifest (butl::manifest_parser&, bool ignore_unknown = false);
+ toolchain_manifest (butl::manifest_parser&,
+ butl::manifest_name_value start,
+ bool ignore_unknown = false);
+
+ void
+ serialize (butl::manifest_serializer&) const;
+ };
+
+ // The manifest stored in <name>-<toolchain>/ consists of the machine
+ // manifest (original), toolchain manifest, and bootstrap manifest.
+ //
+ class bootstrapped_machine_manifest
+ {
+ public:
+ machine_manifest machine;
+ toolchain_manifest toolchain;
+ bootstrap_manifest bootstrap;
+
+ bootstrapped_machine_manifest (machine_manifest m,
+ toolchain_manifest t,
+ bootstrap_manifest b)
+ : machine (move (m)), toolchain (move (t)), bootstrap (move (b)) {}
+
+ public:
+ bootstrapped_machine_manifest () = default; // VC export.
+ bootstrapped_machine_manifest (butl::manifest_parser&,
+ bool ignore_unknown = false);
+
+ void
+ serialize (butl::manifest_serializer&) const;
+ };
+
+ using bootstrapped_machine_manifests = vector<bootstrapped_machine_manifest>;
+}
+
+#endif // BBOT_AGENT_MACHINE_MANIFEST_HXX
diff --git a/bbot/agent/machine.cxx b/bbot/agent/machine.cxx
new file mode 100644
index 0000000..422c623
--- /dev/null
+++ b/bbot/agent/machine.cxx
@@ -0,0 +1,474 @@
+// file : bbot/agent/machine.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+#include <bbot/agent/machine.hxx>
+
+#include <unistd.h> // sleep()
+
+#include <sys/un.h> // sockaddr_un
+#include <sys/socket.h>
+
+#include <cstdio> // snprintf()
+#include <cstring> // strcpy()
+
+#include <bbot/agent/agent.hxx>
+#include <bbot/agent/machine-manifest.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+ // Forward TFTP requests (UDP/69) coming from the machine to the specified
+ // port.
+ //
+ // This allows the machine to connect to any "unknown" IP (e.g., link-local
+ // 196.254.111.222) port 69 and end up being redirected to out TFTP server.
+ //
+ static void
+ iptables (tracer& t,
+ const char* a,
+ const string& tap,
+ const string& br,
+ uint16_t port,
+ bool ignore_errors = false)
+ {
+ string addr (iface_addr (br));
+
+ auto_fd fdn (ignore_errors ? fdnull () : nullfd);
+ int ofd (ignore_errors ? fdn.get () : 2);
+
+ process_exit::code_type e;
+
+ e = run_io_exit (t, 0, ofd, ofd,
+ "sudo", "iptables",
+ "-t", "nat",
+ a, "PREROUTING",
+ "-m", "udp",
+ "-p", "udp",
+ "-m", "physdev",
+ "-i", br,
+ "--physdev-in", tap,
+ "--dport", 69,
+ "-j", "DNAT",
+ "--to-destination", addr + ':' + to_string (port));
+
+ if (e != 0 && !ignore_errors)
+ fail << "process iptables terminated with non-zero exit code";
+
+ // Nobody really knows whether this is really needed (really)...
+ //
+ e = run_io_exit (t, 0, ofd, ofd,
+ "sudo", "iptables",
+ a, "FORWARD",
+ "-m", "udp",
+ "-p", "udp",
+ "-m", "physdev",
+ "-o", br,
+ "--physdev-out", tap,
+ "-d", addr,
+ "--dport", port,
+ "-m", "state",
+ "--state", "NEW,ESTABLISHED,RELATED",
+ "-j", "ACCEPT");
+
+ if (e != 0 && !ignore_errors)
+ fail << "process iptables terminated with non-zero exit code";
+ }
+
+ static string
+ create_tap (const string& br, uint16_t port)
+ {
+ string t ("tap" + to_string (tc_num));
+
+ tracer trace ("create_tap", t.c_str ());
+
+ // First try to delete it in case there is one from a previous run.
+ //
+ iptables (trace, "-D", t, br, port, true); // Ignore errors.
+ run_exit (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap");
+
+ run (trace, "sudo", "ip", "tuntap", "add", t, "mode", "tap", "user", uid);
+ run (trace, "sudo", "ip", "link", "set", t, "up");
+ run (trace, "sudo", "ip", "link", "set", t, "master", br);
+
+ iptables (trace, "-A", t, br, port); // Add.
+
+ return t;
+ }
+
+ static void
+ destroy_tap (const string& t, const string& br, uint16_t port)
+ {
+ tracer trace ("destroy_tap", t.c_str ());
+ iptables (trace, "-D", t, br, port); // Delete.
+ run (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap");
+ }
+
+ class tap
+ {
+ public:
+ string iface;
+
+ string bridge; // Bridge interface to which this tap belongs
+ uint16_t port; // UDP port to forward TFTP traffic to.
+
+ tap (string b, uint16_t p)
+ : iface (create_tap (b, p)), bridge (move (b)), port (p) {}
+
+ ~tap ()
+ {
+ if (!iface.empty ())
+ {
+ try {destroy ();} catch (...) {}
+ }
+ }
+
+ void
+ destroy ()
+ {
+ destroy_tap (iface, bridge, port);
+ iface.clear ();
+ }
+ };
+
+ static string
+ generate_mac ()
+ {
+ // The last two bits of the first byte are special: bit 1 indicates a
+ // multicast address (which we don't want) while bit 1 -- local assignment
+ // (which we do want).
+ //
+ char r[6 * 2 + 5 + 1];
+ snprintf (r, sizeof (r),
+ "%02x:%02x:%02x:%02x:%02x:%02x",
+ (genrand<uint8_t> () & 0xFE) | 0x02,
+ genrand<uint8_t> (),
+ genrand<uint8_t> (),
+ genrand<uint8_t> (),
+ genrand<uint8_t> (),
+ genrand<uint8_t> ());
+ return r;
+ }
+
+ class kvm_machine: public machine
+ {
+ public:
+ kvm_machine (const dir_path&,
+ const machine_manifest&,
+ const optional<string>& mac,
+ const string& br_iface,
+ uint16_t tftp_port);
+
+ virtual bool
+ shutdown (size_t& seconds) override;
+
+ virtual void
+ forcedown (bool fail_hard) override;
+
+ virtual void
+ suspend () override;
+
+ bool
+ wait (size_t& seconds, bool fail_hard) override;
+
+ using machine::wait;
+
+ virtual void
+ print_info (diag_record&) override;
+
+ private:
+ void
+ monitor_command (const string&, bool fail_hard = true);
+
+ private:
+ path kvm; // Hypervisor binary.
+ tap net; // Tap network interface.
+ string vnc; // QEMU VNC TCP addr:port.
+ path monitor; // QEMU monitor UNIX socket.
+ process proc;
+ };
+
+ kvm_machine::
+ kvm_machine (const dir_path& md,
+ const machine_manifest& mm,
+ const optional<string>& omac,
+ const string& br,
+ uint16_t port)
+ : machine (mm.mac ? *mm.mac : // Fixed mac from machine manifest.
+ omac ? *omac : // Generated mac from previous bootstrap.
+ generate_mac ()),
+ kvm ("kvm"),
+ net (br, port),
+ vnc ("127.0.0.1:" + to_string (5900 + tc_num)),
+ monitor ("/tmp/" + tc_name + "-monitor")
+ {
+ tracer trace ("kvm_machine", md.string ().c_str ());
+
+ if (sizeof (sockaddr_un::sun_path) <= monitor.size ())
+ throw invalid_argument ("monitor unix socket path too long");
+
+ // Map logical CPUs to sockets/cores/threads. Failed that, QEMU just makes
+ // it a machine with that number of sockets and some operating systems
+ // (like Windows) only can do two.
+ //
+ size_t cpu (ops.cpu ());
+
+ size_t sockets (cpu <= 8 ? 1 : cpu <= 64 ? 2 : 4);
+ size_t cores (cpu / sockets);
+ size_t threads (cores <= 4 ? 1 : 2);
+ cores /= threads;
+
+
+ // We probably don't want to commit all the available RAM to the VM since
+ // some of it could be used on the host side for caching, etc. So the
+ // heuristics that we will use is 4G or 1G per CPU, whichever is greater
+ // and the rest divide equally between the host and the VM.
+ //
+ size_t ram ((cpu < 4 ? 4 : cpu) * 1024 * 1024); // Kb.
+
+ if (ram > ops.ram ())
+ ram = ops.ram ();
+ else
+ ram += (ops.ram () - ram) / 2;
+
+ // If we have options, use that instead of the default network and
+ // disk configuration.
+ //
+ strings os;
+
+ if (mm.options)
+ {
+ os = mm.unquoted_options ();
+
+ // Pre-process ifname=? and mac=?.
+ //
+ auto sub = [] (string& o, const char* s, const string& r)
+ {
+ size_t p (o.find (s));
+
+ if (p != string::npos)
+ {
+ p = o.find ('?', p + 1);
+ assert (p != string::npos);
+ o.replace (p, 1, r);
+ }
+ };
+
+ for (string& o: os)
+ {
+ sub (o, "ifname=?", net.iface);
+ sub (o, "mac=?", mac);
+ }
+ }
+ else
+ {
+ auto add = [&os] (string o, string v)
+ {
+ os.push_back (move (o));
+ os.push_back (move (v));
+ };
+
+ // Network.
+ //
+ add ("-netdev", "tap,id=net0,script=no,ifname=" + net.iface);
+ add ("-device", "virtio-net-pci,netdev=net0,mac=" + mac);
+
+ // Disk.
+ //
+ add ("-drive", "if=none,id=disk0,file=disk.img,format=raw");
+ add ("-device", "virtio-blk-pci,scsi=off,drive=disk0");
+
+ //"-drive", "if=none,id=disk0,format=raw,file=disk.img"
+ //"-device", "virtio-scsi-pci,id=scsi"
+ //"-device", "scsi-hd,drive=disk0"
+ }
+
+ // Start the VM.
+ //
+ // Notes:
+ //
+ // 1. echo system_powerdown | socat - UNIX-CONNECT:.../monitor
+ //
+ proc = run_io_start (
+ trace,
+ fdnull (),
+ 2,
+ 2,
+ md, // Run from the machine's directory.
+ kvm,
+ "-boot", "c", // Boot from disk.
+ "-no-reboot", // Exit on VM reboot.
+ "-m", to_string (ram / 1024) + "M",
+ "-cpu", "host",
+ "-smp", (to_string (cpu) +
+ ",sockets=" + to_string (sockets) +
+ ",cores=" + to_string (cores) +
+ ",threads=" + to_string (threads)),
+ os,
+ "-vnc", "127.0.0.1:" + to_string (tc_num), // 5900 + tc_num
+ "-monitor", "unix:" + monitor.string () + ",server,nowait");
+ }
+
+ // Connect to the QEMU monitor via the UNIX socket and send system_reset.
+ // You may be wondering why not system_powerdown? The reason is that while
+ // not all OS know how to power-down the machine, pretty much all of them
+ // can reboot. So combined with the -no-reboot option above, we get the
+ // same result in a more robust way.
+ //
+ // Note that this setup has one side effect: if the VM decided to reboot,
+ // say, during bootstrap, then we will interpret it as a shutdown. Current
+ // thinking saying this is good since we don't want our VMs to reboot
+ // uncontrollably for security and predictability reasons (e.g., we don't
+ // want Windows to decide to install updates -- this stuff should all be
+ // disabled during the VM preparation).
+ //
+ // Actually, this turned out not to be entirely accurate: reset appears to
+ // be a "hard reset" while powerdown causes a clean shutdown. So we use
+ // powerdown to implement shutdown() and reset/-no-reboot for implement
+ // forcedown().
+ //
+ bool kvm_machine::
+ shutdown (size_t& seconds)
+ {
+ monitor_command ("system_powerdown");
+
+ // Wait for up to the specified number if seconds for the machine to
+ // shutdown.
+ //
+ return wait (seconds);
+ }
+
+ void kvm_machine::
+ forcedown (bool fh)
+ {
+ monitor_command ("system_reset", fh);
+ wait (fh);
+ }
+
+ void kvm_machine::
+ suspend ()
+ {
+ monitor_command ("stop");
+ }
+
+ void kvm_machine::
+ print_info (diag_record& dr)
+ {
+ dr << info << "qemu pid: " << proc.id ()
+ << info << "qemu vnc: " << vnc
+ << info << "qemu monitor: unix:" << monitor;
+ }
+
+ bool kvm_machine::
+ wait (size_t& sec, bool fh)
+ {
+ try
+ {
+ tracer trace ("kvm_machine::wait");
+
+ bool t;
+ for (; !(t = proc.try_wait ()) && sec != 0; --sec)
+ sleep (1);
+
+ if (t)
+ {
+ run_io_finish (trace, proc, kvm, fh);
+ net.destroy (); //@@ Always fails hard.
+ try_rmfile (monitor, true); // QEMU doesn't seem to remove it.
+ }
+
+ return t;
+ }
+ catch (const process_error& e)
+ {
+ fail (fh) << "unable to execute " << kvm << ": " << e << endf;
+ }
+ }
+
+ void kvm_machine::
+ monitor_command (const string& c, bool fh)
+ {
+ try
+ {
+ sockaddr_un addr;
+ addr.sun_family = AF_LOCAL;
+ strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor
+
+ auto_fd sock (socket (AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0));
+
+ if (sock.get () == -1)
+ throw_system_error (errno);
+
+ if (connect (sock.get (),
+ reinterpret_cast<sockaddr*> (&addr),
+ sizeof (addr)) == -1)
+ throw_system_error (errno);
+
+ // Read until we get something.
+ //
+ auto readsome = [&sock] ()
+ {
+ ifdstream ifs (move (sock),
+ fdstream_mode::non_blocking,
+ ostream::badbit);
+
+ char buf[256];
+ for (streamsize n (0), m (0);
+ n == 0 || m != 0;
+ m = ifs.readsome (buf, sizeof (buf) - 1))
+ {
+ if (m != 0)
+ {
+ n += m;
+
+ //buf[m] = '\0';
+ //text << buf;
+ }
+ }
+
+ sock = ifs.release ();
+ };
+
+ // Read QEMU welcome.
+ //
+ readsome ();
+
+ // Write our command.
+ //
+ {
+ ofdstream ofs (move (sock), fdstream_mode::blocking);
+ ofs << c << endl;
+ sock = ofs.release ();
+ }
+
+ // Read QEMU reply (may hit eof).
+ //
+ readsome ();
+ return;
+ }
+ catch (const system_error& e)
+ {
+ fail (fh) << "unable to communicate with qemu monitor: " << e;
+ }
+ }
+
+ unique_ptr<machine>
+ start_machine (const dir_path& md,
+ const machine_manifest& mm,
+ const optional<string>& mac,
+ const string& br_iface,
+ uint16_t tftp_port)
+ {
+ switch (mm.type)
+ {
+ case machine_type::kvm:
+ return make_unique<kvm_machine> (md, mm, mac, br_iface, tftp_port);
+ case machine_type::nspawn:
+ assert (false); //@@ TODO
+ }
+
+ return nullptr;
+ }
+}
diff --git a/bbot/agent/machine.hxx b/bbot/agent/machine.hxx
new file mode 100644
index 0000000..e352e42
--- /dev/null
+++ b/bbot/agent/machine.hxx
@@ -0,0 +1,84 @@
+// file : bbot/agent/machine.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+#ifndef BBOT_AGENT_MACHINE_HXX
+#define BBOT_AGENT_MACHINE_HXX
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+
+namespace bbot
+{
+ // A running build machine (container, vm, etc).
+ //
+ // Note that if the machine is destroyed while it is still running, the
+ // destructor will block until the machine process terminates.
+ //
+ // Some functions can fail softly if the fail_hard argument is false.
+ //
+ class machine
+ {
+ public:
+ // Shut the machine down cleanly waiting up to the specified number of
+ // seconds for completion. Update the timeout and return false if the
+ // machine is still running, true if the machine exited successfully, and
+ // throw failed otherwise.
+ //
+ virtual bool
+ shutdown (size_t& seconds) = 0;
+
+ // Force the machine down.
+ //
+ virtual void
+ forcedown (bool fail_hard = true) = 0;
+
+ // Suspend the machine.
+ //
+ virtual void
+ suspend () = 0;
+
+ // Wait for the machine to terminate up to the specified number of
+ // seconds. Update the timeout and return false if the machine is still
+ // running, true if the machine exited successfully, and throw failed
+ // otherwise.
+ //
+ virtual bool
+ wait (size_t& seconds, bool fail_hard = true) = 0;
+
+ bool
+ wait (bool fail_hard = true)
+ {
+ size_t sec (~0); // Wait indefinitely.
+ return wait (sec, fail_hard);
+ }
+
+ // Print information about the machine (as info diagnostics) that can be
+ // useful for debugging (e.g., how to connect/login, etc).
+ //
+ virtual void
+ print_info (diag_record&) = 0;
+
+ public:
+ const string mac; // MAC address (inside the machine).
+
+ public:
+ virtual
+ ~machine () = default;
+
+ protected:
+ machine (string m)
+ : mac (move (m)) {}
+ };
+
+ class machine_manifest;
+
+ unique_ptr<machine>
+ start_machine (const dir_path&,
+ const machine_manifest&,
+ const optional<string>& mac,
+ const string& br_iface,
+ uint16_t tftp_port);
+}
+
+#endif // BBOT_AGENT_MACHINE_HXX
diff --git a/bbot/agent/tftp.cxx b/bbot/agent/tftp.cxx
new file mode 100644
index 0000000..27c1577
--- /dev/null
+++ b/bbot/agent/tftp.cxx
@@ -0,0 +1,137 @@
+// file : bbot/agent/tftp.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+#include <bbot/agent/tftp.hxx>
+
+#include <arpa/inet.h> // htonl()
+#include <netinet/in.h> // sockaddr_in
+#include <sys/socket.h>
+#include <sys/select.h>
+
+#include <cstring> // memset()
+
+#include <bbot/agent/agent.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+ tftp_server::
+ tftp_server (const string& map, uint16_t port)
+ {
+ int fd (socket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0));
+
+ if (fd == -1)
+ throw_system_error (errno);
+
+ fd_.reset (fd);
+
+ // Bind to ephemeral port.
+ //
+ sockaddr_in addr;
+ memset (&addr, 0, sizeof (addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl (INADDR_ANY);
+ addr.sin_port = htons (port);
+
+ // Not to confuse with std::bind().
+ //
+ if (::bind (fd,
+ reinterpret_cast<sockaddr*> (&addr),
+ sizeof (sockaddr_in)) == -1)
+ throw_system_error (errno);
+
+ // Create the map file.
+ //
+ map_ = auto_rmfile (path::temp_path ("bbot-agent-tftp-map"));
+ ofdstream ofs (map_.path ());
+ ofs << map << endl;
+ ofs.close ();
+ }
+
+ uint16_t tftp_server::
+ port () const
+ {
+ sockaddr_in addr;
+ socklen_t size (sizeof (addr));
+
+ if (getsockname (fd_.get (),
+ reinterpret_cast<sockaddr*> (&addr),
+ &size) == -1)
+ throw_system_error (errno);
+
+ assert (size == sizeof (addr));
+ return ntohs (addr.sin_port);
+ }
+
+ bool tftp_server::
+ serve (size_t& sec, size_t inc)
+ {
+ tracer trace ("tftp_server::serve");
+
+ if (inc == 0 || inc > sec)
+ inc = sec;
+
+ int fd (fd_.get ());
+
+ // Note: Linux updates the timeout value which we rely upon.
+ //
+ timeval timeout {static_cast<long> (inc), 0};
+
+ fd_set rd;
+ FD_ZERO (&rd);
+
+ for (;;)
+ {
+ FD_SET (fd, &rd);
+
+ int r (select (fd + 1, &rd, nullptr, nullptr, &timeout));
+
+ if (r == -1)
+ {
+ if (errno == EINTR)
+ continue;
+
+ throw_system_error (errno);
+ }
+ else if (r == 0) // Timeout.
+ {
+ sec -= inc;
+ return false;
+ }
+
+ if (FD_ISSET (fd, &rd))
+ {
+ // The inetd "protocol" is to pass the socket as stdin/stdout file
+ // descriptors.
+ //
+ // Notes/issues:
+ //
+ // 1. Writes diagnostics to syslog.
+ //
+ run_io (trace,
+ fddup (fd),
+ fddup (fd),
+ 2,
+ "sudo", // Required for --secure (chroot).
+ "/usr/sbin/in.tftpd", // Standard installation location.
+ "--timeout", 1, // Wait for more requests.
+ "--permissive", // Use inherited umask.
+ "--create", // Allow creating new files (PUT).
+ "--map-file", map_.path (), // Path remapping rules.
+ "--user", uname, // Run as our effective user.
+ "--secure", // Chroot to data directory.
+ ops.tftp ());
+
+ // This is not really accurate since tftpd will, for example, serve
+ // an upload request until it is complete. But it's close anough for
+ // our needs.
+ //
+ sec -= (inc - static_cast<size_t> (timeout.tv_sec));
+ return true;
+ }
+ }
+ }
+}
diff --git a/bbot/agent/tftp.hxx b/bbot/agent/tftp.hxx
new file mode 100644
index 0000000..76b4d1c
--- /dev/null
+++ b/bbot/agent/tftp.hxx
@@ -0,0 +1,47 @@
+// file : bbot/agent/tftp.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : TBC; see accompanying LICENSE file
+
+#ifndef BBOT_AGENT_TFTP_HXX
+#define BBOT_AGENT_TFTP_HXX
+
+#include <bbot/types.hxx>
+#include <bbot/utility.hxx>
+
+namespace bbot
+{
+ // A TFTP server "wrapper" over tftpd-hpa.
+ //
+ // In a nutshell, we are pretending to be inetd and when a request arrives,
+ // spawn tftpd-hpa to handle it.
+ //
+ class tftp_server
+ {
+ public:
+ // The map argument specifies the path mapping rules, one per line (see
+ // the tftpd-hpa --map-file|-m option for details). If port is 0, then
+ // it is automatically assigned.
+ //
+ tftp_server (const string& map, uint16_t port);
+
+ // Return the assigned port.
+ //
+ uint16_t
+ port () const;
+
+ // Wait for a TFTP request for up to the specified number of seconds. If
+ // increment is not 0, then wait in the specified incremenets (i.e., wait
+ // for up to that number of seconds; useful when one needs to also
+ // periodically check for something else). Update the timeout value as
+ // well as return true if a request was served and false otherwise.
+ //
+ bool
+ serve (size_t& seconds, size_t increment = 0);
+
+ private:
+ auto_fd fd_;
+ auto_rmfile map_;
+ };
+}
+
+#endif // BBOT_AGENT_TFTP_HXX