From c8ace1ee0a6cab5fd4ea2f084ea436cfa513637d Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 13 Jul 2017 22:50:15 +0300 Subject: Make use of wildcards in buildfiles --- bbot/agent.cli | 232 ------ bbot/agent.cxx | 1247 ------------------------------ bbot/agent.hxx | 45 -- bbot/agent/agent.cli | 232 ++++++ bbot/agent/agent.cxx | 1248 +++++++++++++++++++++++++++++++ bbot/agent/agent.hxx | 45 ++ bbot/agent/machine-manifest.cxx | 355 +++++++++ bbot/agent/machine-manifest.hxx | 118 +++ bbot/agent/machine.cxx | 474 ++++++++++++ bbot/agent/machine.hxx | 84 +++ bbot/agent/tftp.cxx | 137 ++++ bbot/agent/tftp.hxx | 47 ++ bbot/buildfile | 50 +- bbot/machine-manifest.cxx | 355 --------- bbot/machine-manifest.hxx | 118 --- bbot/machine.cxx | 474 ------------ bbot/machine.hxx | 84 --- bbot/tftp.cxx | 137 ---- bbot/tftp.hxx | 47 -- bbot/worker.cli | 102 --- bbot/worker.cxx | 655 ---------------- bbot/worker/worker.cli | 102 +++ bbot/worker/worker.cxx | 656 ++++++++++++++++ buildfile | 4 +- doc/buildfile | 23 +- doc/cli.sh | 2 +- tests/agent/buildfile | 2 +- unit-tests/bootstrap-manifest/buildfile | 4 +- unit-tests/machine-manifest/buildfile | 4 +- unit-tests/machine-manifest/driver.cxx | 2 +- 30 files changed, 3534 insertions(+), 3551 deletions(-) delete mode 100644 bbot/agent.cli delete mode 100644 bbot/agent.cxx delete mode 100644 bbot/agent.hxx create mode 100644 bbot/agent/agent.cli create mode 100644 bbot/agent/agent.cxx create mode 100644 bbot/agent/agent.hxx create mode 100644 bbot/agent/machine-manifest.cxx create mode 100644 bbot/agent/machine-manifest.hxx create mode 100644 bbot/agent/machine.cxx create mode 100644 bbot/agent/machine.hxx create mode 100644 bbot/agent/tftp.cxx create mode 100644 bbot/agent/tftp.hxx delete mode 100644 bbot/machine-manifest.cxx delete mode 100644 bbot/machine-manifest.hxx delete mode 100644 bbot/machine.cxx delete mode 100644 bbot/machine.hxx delete mode 100644 bbot/tftp.cxx delete mode 100644 bbot/tftp.hxx delete mode 100644 bbot/worker.cli delete mode 100644 bbot/worker.cxx create mode 100644 bbot/worker/worker.cli create mode 100644 bbot/worker/worker.cxx diff --git a/bbot/agent.cli b/bbot/agent.cli deleted file mode 100644 index a5dbe01..0000000 --- a/bbot/agent.cli +++ /dev/null @@ -1,232 +0,0 @@ -// file : bbot/agent.cli -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -include ; - -"\section=1" -"\name=bbot-agent" -"\summary=build bot agent" - -namespace bbot -{ - { - " ", - - " - \h|SYNOPSIS| - - \cb{bbot-agent --help}\n - \cb{bbot-agent --version}\n - \c{\b{bbot-agent} [] ...} - - \h|DESCRIPTION| - - \cb{bbot-agent} @@ TODO. - - Note that on termination \cb{bbot-agent} may leave a working machine - snapshot behind. It is expected that the caller (normally Build OS - monitor) cleans them up before restarting the agent. - " - } - - class agent_options - { - "\h|OPTIONS|" - - bool --help {"Print usage information and exit."} - bool --version {"Print version and exit."} - - bool --systemd-daemon - { - "Run as a simple systemd daemon." - } - - path --auth-key - { - "", - "Private key for the public key-based agent authentication. If not - specified, then the agent will not be able to request tasks from - controllers that require authentication. - - The file is expected to contain a single PEM-encoded private key - without a password. A suitable key can be generated using the - following command: - - \ - $ openssl genrsa 4096 >key.pem - \ - " - } - - path --openssl = "openssl" - { - "", - "The openssl program to be used for crypto operations. You can also - specify additional options that should be passed to the openssl program - with \cb{--openssl-option}. If the openssl program is not explicitly - specified, then \cb{bbot-agent} will use \cb{openssl} by default." - } - - strings --openssl-option - { - "", - "Additional option to be passed to the openssl program (see - \cb{--openssl} for details). Repeat this option to specify multiple - openssl options." - } - - size_t --cpu = 1 - { - "", - "Number of CPUs (threads) to use, 1 by default." - } - - size_t --ram (1024 * 1024) // 1G - { - "", - "Amount of RAM (in kB) to use, 1G by default." - } - - string --toolchain-name = "default" - { - "", - "Toolchain name, \cb{default} by default." - } - - uint16_t --toolchain-num = 1 - { - "", - "Toolchain number, 1 by default." - } - - standard_version --toolchain-ver - { - "", - "Toolchain version. If unspecified, then the agent's version will be - used (which will be imprecise for snapshot versions)." - } - - string --toolchain-id - { - "", - "Toolchain id. If unspecified or empty, then no re-bootstrapping on - toolchain changes will be performed (which is primarily useful for - testing)." - } - - strings --trust - { - "", - "Trust repository certificate with a SHA256 ." - } - - dir_path --machines = "/build/machines/" - { - "", - "The location of the build machines, \cb{/build/machines/} by default." - } - - dir_path --tftp = "/build/tftp/" - { - "", - "The location of the TFTP server root, \cb{/build/tftp/} by default." - } - - uint16_t --tftp-port = 23400 - { - "", - "TFTP server port base, 23400 by default. The actual port is calculated - by adding the toolchain number \c{--toolchain-num} to this value." - } - - size_t --bootstrap-timeout = 900 - { - "", - "Maximum number of seconds to wait for machine bootstrap completion, - 900 (15 minutes) by default." - } - - size_t --bootstrap-retries = 2 - { - "", - "Number of time to retry a mis-booted bootstrap, 2 by default." - } - - size_t --build-timeout = 1800 - { - "", - "Maximum number of seconds to wait for build completion, 1800 (30 - minutes) by default." - } - - size_t --build-retries = 2 - { - "", - "Number of time to retry a mis-booted build, 2 by default." - } - - size_t --request-timeout = 300 - { - "", - "Maximum number of seconds to wait for controller request completion, - 300 (5 minutes) by default." - } - - uint16_t --verbose = 1 - { - "", - "Set the diagnostics verbosity to between 0 and 6 with level 1 - being the default." - } - - // Testing options. - // - bool --dump-machines - { - "Dump the available machines to \cb{stdout}, (re)-bootstrapping any if - necessary, and exit." - } - - bool --dump-task - { - "Dump the received build task to \cb{stdout} and exit." - } - - bool --dump-result - { - "Dump the obtained build result to \cb{stdout} and exit." - } - - bool --fake-bootstrap - { - "Fake the machine bootstrap process by creating the expected bootstrapped - machine manifest." - } - - bool --fake-build - { - "Fake the package building process by creating the aborted build result." - } - - path --fake-machine - { - "", - "Fake the machine enumeration process by reading the machine header - manifest from (or \cb{stdin} if is '\cb{-}')." - } - - path --fake-request - { - "", - "Fake the task request process by reading the task manifest from - (or \cb{stdin} if is '\cb{-}')." - } - }; - - " - \h|EXIT STATUS| - - Non-zero exit status is returned in case of an error. - " -} diff --git a/bbot/agent.cxx b/bbot/agent.cxx deleted file mode 100644 index d71f7b4..0000000 --- a/bbot/agent.cxx +++ /dev/null @@ -1,1247 +0,0 @@ -// file : bbot/agent.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#include - -#include // getpwuid() -#include // PATH_MAX -#include // signal() -#include // rand_r() -#include // sleep(), realink(), getuid(), fsync() - -#include // ifreq -#include // sockaddr_in -#include // inet_ntop() -#include -#include - -#include -#include - -#include -#include -#include -#include // dir_iterator - -#include - -#include -#include -#include - -#include -#include -#include -#include - -using namespace std; -using namespace butl; -using namespace bbot; - -namespace bbot -{ - agent_options ops; - - const string bs_prot ("1"); - - string tc_name; - uint16_t tc_num; - standard_version tc_ver; - string tc_id; - - string hname; - uid_t uid; - string uname; -} - -static void -file_sync (const path& f) -{ - auto_fd fd (fdopen (f, fdopen_mode::in)); - if (fsync (fd.get ()) != 0) - throw_system_error (errno); -} - -// The btrfs tool likes to print informational messages, like "Created -// snapshot such and such". Luckily, it writes them to stdout while proper -// diagnostics to stderr. -// -template -inline void -run_btrfs (tracer& t, A&&... a) -{ - if (verb >= 4) - run_io (t, fdnull (), 2, 2, "btrfs", forward (a)...); - else - run_io (t, fdnull (), fdnull (), 2, "btrfs", forward (a)...); -} - -template -inline butl::process_exit::code_type -btrfs_exit (tracer& t, A&&... a) -{ - return verb >= 4 - ? run_io_exit (t, fdnull (), 2, 2, "btrfs", forward (a)...) - : run_io_exit (t, fdnull (), fdnull (), 2, "btrfs", forward (a)...); -} - -// Bootstrap the machine. Return the bootstrapped machine manifest if -// successful and nullopt otherwise (in which case the machine directory -// should be cleaned and the machine ignored for now). -// -static optional -bootstrap_machine (const dir_path& md, - const machine_manifest& mm, - optional obmm) -{ - tracer trace ("bootstrap_machine", md.string ().c_str ()); - - bootstrapped_machine_manifest r { - mm, - toolchain_manifest {tc_id.empty () ? "bogus" : tc_id}, - bootstrap_manifest { - bootstrap_manifest::versions_type { - {"bbot", standard_version (BBOT_VERSION_STR)}, - {"libbbot", standard_version (LIBBBOT_VERSION_STR)}, - {"libbpkg", standard_version (LIBBPKG_VERSION_STR)}, - {"libbutl", standard_version (LIBBUTL_VERSION_STR)} - } - } - }; - - if (ops.fake_bootstrap ()) - { - r.machine.mac = "de:ad:be:ef:de:ad"; - } - else - try - { - string br ("br1"); // Using private bridge for now. - - // Start the TFTP server (server chroot is --tftp). Map: - // - // GET requests to .../toolchains//* - // PUT requests to .../bootstrap//* - // - auto_rmdir arm ((dir_path (ops.tftp ()) /= "bootstrap") /= tc_name); - try_mkdir_p (arm.path ()); - - // Bootstrap result manifest. - // - path mf (arm.path () / "manifest"); - try_rmfile (mf); - - // Note that unlike build, here we use the same VM snapshot for retries, - // which is not ideal. - // - for (size_t retry (0);; ++retry) - { - tftp_server tftpd ("Gr ^/?(.+)$ /toolchains/" + tc_name + "/\\1\n" + - "Pr ^/?(.+)$ /bootstrap/" + tc_name + "/\\1\n", - ops.tftp_port () + tc_num); - - l3 ([&]{trace << "tftp server on port " << tftpd.port ();}); - - // Start the machine. - // - unique_ptr m ( - start_machine (md, - mm, - obmm ? obmm->machine.mac : nullopt, - br, - tftpd.port ())); - - { - // If we are terminating with an exception then force the machine down. - // Failed that, the machine's destructor will block waiting for its - // completion. - // - auto mg ( - make_exception_guard ( - [&m, &md] () - { - info << "trying to force machine " << md << " down"; - try {m->forcedown ();} catch (const failed&) {} - })); - - // What happens if the bootstrap process hangs? The simple thing would - // be to force the machine down after some timeout and then fail. But - // that won't be very helpful for investigating the cause. So instead - // the plan is to suspend it after some timeout, issue diagnostics - // (without failing and which Build OS monitor will relay to the - // admin), and wait for the external intervention. - // - auto soft_fail = [&md, &m] (const char* msg) - { - { - diag_record dr (error); - dr << msg << " for machine " << md << ", suspending"; - m->print_info (dr); - } - m->suspend (); - m->wait (); - info << "resuming after machine suspension"; - return nullopt; - }; - - // The first request should be the toolchain download. Wait for up to - // 5 minutes for that to arrive. In a sense we use it as an indication - // that the machine has booted and the bootstrap process has started. - // Why wait so long you may wonder? Well, we may be using a new MAC - // address and operating systems like Windows may need to digest that. - // - size_t to; - const size_t startup_to (5 * 60); - const size_t bootstrap_to (ops.bootstrap_timeout ()); - const size_t shutdown_to (5 * 60); - - // This can mean two things: machine mis-configuration or what we - // euphemistically call a "mis-boot": the VM failed to boot for some - // unknown/random reason. Mac OS is particularly know for suffering - // from this. So the strategy is to retry it a couple of times and - // then suspend for investigation. - // - if (!tftpd.serve ((to = startup_to))) - { - if (retry > ops.bootstrap_retries ()) - return soft_fail ("bootstrap startup timeout"); - - warn << "machine " << mm.name << " appears to have " - << "mis-booted, retrying"; - - try {m->forcedown (false);} catch (const failed&) {} - continue; - } - - l3 ([&]{trace << "completed startup in " << startup_to - to << "s";}); - - // Next the bootstrap process may download additional toolchain - // archives, build things, and then upload the result manifest. So on - // our side we serve TFTP requests while periodically checking for the - // manifest file. To workaround some obscure filesystem races (the - // file's mtime/size is updated several seconds later; maybe tmpfs - // issue?), we periodically re-check. - // - for (to = bootstrap_to; to != 0; tftpd.serve (to, 2)) - { - if (file_exists (mf)) - { - file_sync (mf); - if (!file_empty (mf)) - break; - } - } - - if (to == 0) - return soft_fail ("bootstrap timeout"); - - l3 ([&]{trace << "completed bootstrap in " << bootstrap_to - to << "s";}); - - // Shut the machine down cleanly. - // - if (!m->shutdown ((to = shutdown_to))) - return soft_fail ("bootstrap shutdown timeout"); - - l3 ([&]{trace << "completed shutdown in " << shutdown_to - to << "s";}); - } - - // Parse the result manifest. - // - r.bootstrap = parse_manifest (mf, "bootstrap"); - - r.machine.mac = m->mac; // Save the MAC address. - - break; - } - } - catch (const system_error& e) - { - fail << "bootstrap error: " << e; - } - - serialize_manifest (r, md / "manifest", "bootstrapped machine"); - return r; -} - -// Return available machines and their directories as a parallel array. -// -static pair -enumerate_machines (const dir_path& machines) -try -{ - tracer trace ("enumerate_machines", machines.string ().c_str ()); - - bootstrapped_machine_manifests rm; - dir_paths rd; - - if (ops.fake_machine_specified ()) - { - auto mh ( - parse_manifest ( - ops.fake_machine (), "machine header")); - - rm.push_back ( - bootstrapped_machine_manifest { - machine_manifest { - mh.id, - mh.name, - mh.summary, - machine_type::kvm, - string ("de:ad:be:ef:de:ad"), - nullopt}, - toolchain_manifest {tc_id}, - bootstrap_manifest {} - }); - - rd.push_back (dir_path (ops.machines ()) /= mh.name); // For diagnostics. - - return make_pair (move (rm), move (rd)); - } - - // The first level are machine volumes. - // - for (const dir_entry& ve: dir_iterator (machines)) - { - const string vn (ve.path ().string ()); - - // Ignore hidden directories. - // - if (ve.type () != entry_type::directory || vn[0] == '.') - continue; - - const dir_path vd (dir_path (machines) /= vn); - - // Inside we have machines. - // - try - { - for (const dir_entry& me: dir_iterator (vd)) - { - const string mn (me.path ().string ()); - - if (me.type () != entry_type::directory || mn[0] == '.') - continue; - - const dir_path md (dir_path (vd) /= mn); - - // Our endgoal here is to obtain a bootstrapped snapshot of this - // machine while watching out for potential race conditions (machines - // being added/upgraded/removed; see the manual for details). - // - // So here is our overall plan: - // - // 1. Resolve current subvolume link for our bootstrap protocol. - // - // 2. If there is no link, cleanup and ignore this machine. - // - // 3. Try to create a snapshot of current subvolume (this operation is - // atomic). If failed (e.g., someone changed the link and removed - // the subvolume in the meantime), retry from #1. - // - // 4. Compare the snapshot to the already bootstrapped version (if - // any) and see if we need to re-bootstrap. If so, use the snapshot - // as a starting point. Rename to bootstrapped at the end (atomic). - // - dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -

- dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // - - bool te (dir_exists (tp)); - - auto delete_t = [&tp, &trace] () - { - run_btrfs (trace, "property", "set", "-ts", tp, "ro", "false"); - run_btrfs (trace, "subvolume", "delete", tp); - }; - - for (size_t retry (0);; ++retry) - { - if (retry != 0) - sleep (1); - - // Resolve the link to subvolume path. - // - dir_path sp; // -

. - try - { - char b [PATH_MAX + 1]; - ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b))); - - if (r == -1) - { - if (errno != ENOENT) - throw_generic_error (errno); - } - else if (static_cast (r) >= sizeof (b)) - throw_generic_error (EINVAL); - else - { - b[r] = '\0'; - sp = dir_path (b); - if (sp.relative ()) - sp = md / sp; - } - } - catch (const system_error& e) - { - fail << "unable to read subvolume link " << lp << ": " << e; - } - - // If the resolution fails, then this means there is no current - // machine subvolume (for this bootstrap protocol). In this case we - // clean up our toolchain subvolume (-) and ignore - // this machine. - // - if (sp.empty ()) - { - if (te) - delete_t (); - - l3 ([&]{trace << "skipping " << md << ": no subvolume link";}); - break; - } - - // -- - // - const dir_path xp ( - dir_path (md) /= path::traits::temp_name (mn + '-' + tc_name)); - - if (btrfs_exit (trace, "subvolume", "snapshot", sp, xp) != 0) - { - if (retry >= 10) - fail << "unable to snapshot subvolume " << sp; - - continue; - } - - // Load the (original) machine manifest. - // - auto mm ( - parse_manifest (sp / "manifest", "machine")); - - // If we already have -, see if it needs to be re- - // bootstrapped. Things that render it obsolete: - // - // 1. New machine revision (compare machine ids). - // 2. New toolchain (compare toolchain ids). - // 3. New bbot/libbbot (compare versions). - // - // The last case has a complication: what should we do if we have - // bootstrapped a newer version of bbot? This would mean that we are - // about to be stopped and upgraded (and the upgraded version will - // probably be able to use the result). So we simply ignore this - // machine for this run. - - // Return -1 if older, 0 if the same, and +1 if newer. - // - auto compare_bbot = [] (const bootstrap_manifest& m) -> int - { - auto cmp = [&m] (const string& n, const char* v) -> int - { - standard_version sv (v); - auto i = m.versions.find (n); - - return (i == m.versions.end () || i->second < sv - ? -1 - : i->second > sv ? 1 : 0); - }; - - // Start from the top assuming a new dependency cannot be added - // without changing the dependent's version. - // - int r; - return - (r = cmp ("bbot", BBOT_VERSION_STR)) != 0 ? r : - (r = cmp ("libbbot", LIBBBOT_VERSION_STR)) != 0 ? r : - (r = cmp ("libbpkg", LIBBPKG_VERSION_STR)) != 0 ? r : - (r = cmp ("libbutl", LIBBUTL_VERSION_STR)) != 0 ? r : 0; - }; - - optional bmm; - if (te) - { - bmm = parse_manifest ( - tp / "manifest", "bootstrapped machine"); - - if (bmm->machine.id != mm.id) - { - l3 ([&]{trace << "re-bootstrapping " << tp << ": new machine";}); - te = false; - } - - if (!tc_id.empty () && bmm->toolchain.id != tc_id) - { - l3 ([&]{trace << "re-bootstrapping " << tp << ": new toolchain";}); - te = false; - } - - if (int i = compare_bbot (bmm->bootstrap)) - { - if (i < 0) - { - l3 ([&]{trace << "re-bootstrapping " << tp << ": new bbot";}); - te = false; - } - else - { - l3 ([&]{trace << "ignoring " << tp << ": old bbot";}); - run_btrfs (trace, "subvolume", "delete", xp); - break; - } - } - - if (!te) - delete_t (); - } - else - l3 ([&]{trace << "bootstrapping " << tp;}); - - if (!te) - { - // Use the -- snapshot that we have made to - // bootstrap the new machine. Then atomically rename it to - // -. - // - bmm = bootstrap_machine (xp, mm, move (bmm)); - - if (!bmm) - { - l3 ([&]{trace << "ignoring " << tp << ": failed to bootstrap";}); - run_btrfs (trace, "subvolume", "delete", xp); - break; - } - - try - { - mvdir (xp, tp); - } - catch (const system_error& e) - { - fail << "unable to rename " << xp << " to " << tp; - } - - l2 ([&]{trace << "bootstrapped " << bmm->machine.name;}); - - // Check the bootstrapped bbot version as above and ignore this - // machine if it's newer than us. - // - if (int i = compare_bbot (bmm->bootstrap)) - { - if (i > 0) - { - l3 ([&]{trace << "ignoring " << tp << ": old bbot";}); - break; - } - else - warn << "bootstrapped " << tp << " bbot worker is older " - << "than agent; assuming test setup"; - } - } - else - run_btrfs (trace, "subvolume", "delete", xp); - - // Add the machine to the lists. - // - rm.push_back (move (*bmm)); - rd.push_back (move (tp)); - - break; - } - } - } - catch (const system_error& e) - { - fail << "unable to iterate over " << vd << ": " << e << endf; - } - } - - return make_pair (move (rm), move (rd)); -} -catch (const system_error& e) -{ - fail << "unable to iterate over " << machines << ": " << e << endf; -} - -static result_manifest -perform_task (const dir_path& md, - const bootstrapped_machine_manifest& mm, - const task_manifest& tm) -try -{ - tracer trace ("perform_task", md.string ().c_str ()); - - result_manifest r { - tm.name, - tm.version, - result_status::abort, - operation_results {}}; - - if (ops.fake_build ()) - return r; - - // The overall plan is as follows: - // - // 1. Snapshot the (bootstrapped) machine. - // - // 2. Save the task manifest to the TFTP directory (to be accessed by the - // worker). - // - // 3. Start the TFTP server and the machine. - // - // 4. Serve TFTP requests while watching out for the result manifest. - // - // 5. Clean up (force the machine down and delete the snapshot). - // - - // TFTP server mapping (server chroot is --tftp): - // - // GET requests to .../build//get/* - // PUT requests to .../build//put/* - // - auto_rmdir arm ((dir_path (ops.tftp ()) /= "build") /= tc_name); - - dir_path gd (dir_path (arm.path ()) /= "get"); - dir_path pd (dir_path (arm.path ()) /= "put"); - - try_mkdir_p (gd); - try_mkdir_p (pd); - - path tf (gd / "manifest"); // Task manifest file. - path rf (pd / "manifest"); // Result manifest file. - - serialize_manifest (tm, tf, "task"); - - if (ops.fake_machine_specified ()) - { - // Simply wait for the file to appear. - // - for (size_t i (0); !file_exists (rf); sleep (1)) - if (i++ % 10 == 0) - l3 ([&]{trace << "waiting for result manifest";}); - - r = parse_manifest (rf, "result"); - } - else - { - try_rmfile (rf); - - // -- - // - const dir_path xp ( - md.directory () /= path::traits::temp_name (md.leaf ().string ())); - - string br ("br1"); // Using private bridge for now. - - for (size_t retry (0);; ++retry) - { - if (retry != 0) - run_btrfs (trace, "subvolume", "delete", xp); - - run_btrfs (trace, "subvolume", "snapshot", md, xp); - - // Start the TFTP server. - // - tftp_server tftpd ("Gr ^/?(.+)$ /build/" + tc_name + "/get/\\1\n" + - "Pr ^/?(.+)$ /build/" + tc_name + "/put/\\1\n", - ops.tftp_port () + tc_num); - - l3 ([&]{trace << "tftp server on port " << tftpd.port ();}); - - // Start the machine. - // - unique_ptr m ( - start_machine (xp, - mm.machine, - mm.machine.mac, - br, - tftpd.port ())); - - // Note: the machine handling logic is similar to bootstrap. - // - { - auto mg ( - make_exception_guard ( - [&m, &xp] () - { - info << "trying to force machine " << xp << " down"; - try {m->forcedown ();} catch (const failed&) {} - })); - - auto soft_fail = [&xp, &m, &r] (const char* msg) - { - { - diag_record dr (error); - dr << msg << " for machine " << xp << ", suspending"; - m->print_info (dr); - } - m->suspend (); - m->wait (); - info << "resuming after machine suspension"; - return r; - }; - - // The first request should be the task manifest download. Wait for up - // to 60 seconds for that to arrive. In a sense we use it as an - // indication that the machine has booted and the worker process has - // started. - // - size_t to; - const size_t startup_to (60); - const size_t build_to (ops.build_timeout ()); - - if (!tftpd.serve ((to = startup_to))) - { - if (retry > ops.build_retries ()) - return soft_fail ("build startup timeout"); - - warn << "machine " << mm.machine.name << " appears to have " - << "mis-booted, retrying"; - - try {m->forcedown (false);} catch (const failed&) {} - continue; - } - - l3 ([&]{trace << "completed startup in " << startup_to - to << "s";}); - - // Next the worker builds things and then uploads the result manifest. - // So on our side we serve TFTP requests while checking for the - // manifest file. To workaround some obscure filesystem races (the - // file's mtime/size is updated several seconds later; maybe tmpfs - // issue?), we periodically re-check. - // - for (to = build_to; to != 0; tftpd.serve (to, 2)) - { - if (file_exists (rf)) - { - file_sync (rf); - if (!file_empty (rf)) - break; - } - } - - if (to == 0) - return soft_fail ("build timeout"); - - l3 ([&]{trace << "completed build in " << build_to - to << "s";}); - - // Parse the result manifest. - // - try - { - r = parse_manifest (rf, "result", false); - } - catch (const failed&) - { - r.status = result_status::abnormal; // Soft-fail below. - } - - if (r.status == result_status::abnormal) - { - // If the build terminated abnormally, suspend the machine for - // investigation. - // - return soft_fail ("build terminated abnormally"); - } - else - { - // Force the machine down (there is no need wasting time on clean - // shutdown since the next step is to drop the snapshot). Also fail - // softly if things go badly. - // - try {m->forcedown (false);} catch (const failed&) {} - } - } - - run_btrfs (trace, "subvolume", "delete", xp); - break; - } - } - - // Update package name/version if the returned value as "unknown". - // - if (r.version == bpkg::version ("0")) - { - assert (r.status == result_status::abnormal); - - r.name = tm.name; - r.version = tm.version; - } - - return r; -} -catch (const system_error& e) -{ - fail << "build error: " << e << endf; -} - -extern "C" void -handle_signal (int sig) -{ - switch (sig) - { - case SIGHUP: exit (3); // Unimplemented feature. - case SIGTERM: exit (0); - default: assert (false); - } -} - -int -main (int argc, char* argv[]) -try -{ - cli::argv_scanner scan (argc, argv, true); - ops.parse (scan); - - verb = ops.verbose (); - - if (ops.systemd_daemon ()) - systemd_diagnostics (true); // With critical errors. - - tracer trace ("main"); - - uid = getuid (); - uname = getpwuid (uid)->pw_name; - - { - char buf[HOST_NAME_MAX + 1]; - - if (gethostname (buf, sizeof (buf)) == -1) - fail << "unable to obtain hostname: " - << system_error (errno, generic_category ()); // Sanitize. - - hname = buf; - } - - // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if - // the pipe reading end is closed. Note that by default this signal - // terminates a process. Also note that there is no way to disable this - // behavior on a file descriptor basis or for the write() function call. - // - if (signal (SIGPIPE, SIG_IGN) == SIG_ERR) - fail << "unable to ignore broken pipe (SIGPIPE) signal: " - << system_error (errno, generic_category ()); // Sanitize. - - // Version. - // - if (ops.version ()) - { - cout << "bbot-agent " << BBOT_VERSION_ID << endl - << "libbbot " << LIBBBOT_VERSION_ID << endl - << "libbpkg " << LIBBBOT_VERSION_ID << endl - << "libbutl " << LIBBUTL_VERSION_ID << endl - << "Copyright (c) 2014-2017 Code Synthesis Ltd" << endl - << "TBC; All rights reserved" << endl; - - return 0; - } - - // Help. - // - if (ops.help ()) - { - pager p ("bbot-agent help", false); - print_bbot_agent_usage (p.stream ()); - - // If the pager failed, assume it has issued some diagnostics. - // - return p.wait () ? 0 : 1; - } - - tc_name = ops.toolchain_name (); - tc_num = ops.toolchain_num (); - tc_ver = (ops.toolchain_ver_specified () - ? ops.toolchain_ver () - : standard_version (BBOT_VERSION_STR)); - tc_id = ops.toolchain_id (); - - - // Controller URLs. - // - if (argc < 2 && - !ops.dump_machines () && - !ops.fake_request_specified ()) - { - fail << "controller url expected" << - info << "run " << argv[0] << " --help for details"; - } - - strings controllers; - - for (int i (1); i != argc; ++i) - controllers.push_back (argv[i]); - - // Handle SIGHUP and SIGTERM. - // - if (signal (SIGHUP, &handle_signal) == SIG_ERR || - signal (SIGTERM, &handle_signal) == SIG_ERR) - fail << "unable to set signal handler: " - << system_error (errno, generic_category ()); // Sanitize. - - optional fingerprint; - - if (ops.auth_key_specified ()) - try - { - // Note that the process always prints to STDERR, so we redirect it to the - // null device. We also check for the key file existence to print more - // meaningful error message if that's not the case. - // - if (!file_exists (ops.auth_key ())) - throw_generic_error (ENOENT); - - openssl os (trace, - ops.auth_key (), path ("-"), fdnull (), - ops.openssl (), "rsa", - ops.openssl_option (), "-pubout", "-outform", "DER"); - - vector k (os.in.read_binary ()); - os.in.close (); - - if (!os.wait ()) - throw_generic_error (EIO); - - fingerprint = sha256 (k.data (), k.size ()).string (); - } - catch (const system_error& e) - { - fail << "unable to obtain authentication public key: " << e; - } - - if (ops.systemd_daemon ()) - { - diag_record dr; - - dr << info << "bbot agent " << BBOT_VERSION_ID; - - if (fingerprint) - dr << info << "auth key fp " << *fingerprint; - - dr << - info << "toolchain name " << tc_name << - info << "toolchain num " << tc_num << - info << "toolchain ver " << tc_ver.string () << - info << "toolchain id " << tc_id << - info << "CPU(s) " << ops.cpu () << - info << "RAM(kB) " << ops.ram (); - - for (const string& u: controllers) - dr << info << "controller url " << u; - } - - // The work loop. The steps we go through are: - // - // 1. Enumerate the available machines, (re-)bootstrapping any if necessary. - // - // 2. Poll controller(s) for build tasks. - // - // 3. If no build tasks are available, go to #1 (after sleeping a bit). - // - // 4. If a build task is returned, do it, upload the result, and go to #1 - // (immediately). - // - for (bool sleep (false);; ::sleep (sleep ? 60 : 0), sleep = false) - { - // Enumerate the machines. - // - auto mp (enumerate_machines (ops.machines ())); - bootstrapped_machine_manifests& ms (mp.first); - dir_paths& ds (mp.second); - - // Prepare task request. - // - task_request_manifest tq { - hname, - tc_name, - tc_ver, - fingerprint, - machine_header_manifests {} - }; - - for (const bootstrapped_machine_manifest& m: ms) - tq.machines.emplace_back (m.machine.id, - m.machine.name, - m.machine.summary); - - if (ops.dump_machines ()) - { - for (const machine_header_manifest& m: tq.machines) - serialize_manifest (m, cout, "stdout", "machine"); - - return 0; - } - - if (tq.machines.empty ()) - { - warn << "no build machines for toolchain " << tc_name; - sleep = true; - continue; - } - - // Send task requests. - // - // - string url; - task_response_manifest tr; - - if (ops.fake_request_specified ()) - { - auto t (parse_manifest (ops.fake_request (), "task")); - - tr = task_response_manifest { - "fake-session", // Dummy session. - nullopt, // No challenge. - url, // Empty result URL. - move (t)}; - - url = "http://example.org"; - } - else - { - for (const string& u: controllers) - { - try - { - http_curl c (trace, - path ("-"), - path ("-"), - curl::post, - u, - "--header", "Content-Type: text/manifest", - "--max-time", ops.request_timeout ()); - - // This is tricky/hairy: we may fail hard parsing the output before - // seeing that curl exited with an error and failing softly. - // - bool f (false); - - try - { - serialize_manifest (tq, c.out, u, "task request", false); - } - catch (const failed&) {f = true;} - - c.out.close (); - - if (!f) - try - { - tr = parse_manifest ( - c.in, u, "task response", false); - } - catch (const failed&) {f = true;} - - c.in.close (); - - if (!c.wait () || f) - throw_generic_error (EIO); - } - catch (const system_error& e) - { - error << "unable to request task from " << u << ": " << e; - continue; - } - - if (tr.challenge && !fingerprint) // Controller misbehaves. - { - error << "unexpected challenge from " << u << ": " << *tr.challenge; - continue; - } - - if (!tr.session.empty ()) // Got a task. - { - url = u; - - task_manifest& t (*tr.task); - l2 ([&]{trace << "task for " << t.name << '/' << t.version << " " - << "on " << t.machine << " " - << "from " << url;}); - break; - } - } - } - - if (tr.session.empty ()) // No task from any of the controllers. - { - l2 ([&]{trace << "no tasks from any controllers, sleeping";}); - sleep = true; - continue; - } - - // We have a build task. - // - // First find the index of the machine we were asked to use (and also - // verify it is one of those we sent). - // - size_t i (0); - for (const machine_header_manifest& m: tq.machines) - { - if (m.name == tr.task->machine) - break; - - ++i; - } - - if (i == ms.size ()) - { - error << "task from " << url << " for unknown machine " - << tr.task->machine; - - if (ops.dump_task ()) - return 0; - - continue; - } - - task_manifest& t (*tr.task); - - if (ops.dump_task ()) - { - serialize_manifest (t, cout, "stdout", "task"); - return 0; - } - - // If we have our own repository certificate fingerprints, then use them - // to replace what we have received from the controller. - // - if (!ops.trust ().empty ()) - t.trust = ops.trust (); - - const dir_path& d (ds[i]); // The - directory. - const bootstrapped_machine_manifest& m (ms[i]); - - result_manifest r (perform_task (d, m, t)); - - if (ops.dump_result ()) - { - serialize_manifest (r, cout, "stdout", "result"); - return 0; - } - - // Prepare answer to the private key challenge. - // - optional> challenge; - - if (tr.challenge) - try - { - assert (ops.auth_key_specified ()); - - openssl os (trace, - fdstream_mode::text, path ("-"), 2, - ops.openssl (), "rsautl", - ops.openssl_option (), "-sign", "-inkey", ops.auth_key ()); - - os.out << *tr.challenge; - os.out.close (); - - challenge = os.in.read_binary (); - os.in.close (); - - if (!os.wait ()) - throw_generic_error (EIO); - } - catch (const system_error& e) - { - // The task response challenge is valid (verified by manifest parser), - // so there is something wrong with setup, and so the failure is fatal. - // - fail << "unable to sign task response challenge: " << e; - } - - // Upload the result. - // - result_request_manifest rq {tr.session, move (challenge), move (r)}; - { - const string& u (*tr.result_url); - - try - { - http_curl c (trace, - path ("-"), - nullfd, // Not expecting any data in response. - curl::post, - u, - "--header", "Content-Type: text/manifest", - "--max-time", ops.request_timeout ()); - - // This is tricky/hairy: we may fail hard writing the input before - // seeing that curl exited with an error and failing softly. - // - bool f (false); - - try - { - serialize_manifest (rq, c.out, u, "task request"); - } - catch (const failed&) {f = true;} - - c.out.close (); - - if (!c.wait () || f) - throw_generic_error (EIO); - } - catch (const system_error& e) - { - error << "unable to upload result to " << u << ": " << e; - continue; - } - } - - l2 ([&]{trace << "built " << t.name << '/' << t.version << " " - << "on " << t.machine << " " - << "for " << url;}); - } -} -catch (const failed&) -{ - return 1; // Diagnostics has already been issued. -} -catch (const cli::exception& e) -{ - error << e; - return 1; -} - -namespace bbot -{ - static unsigned int rand_seed; // Seed for rand_r(); - - size_t - genrand () - { - if (rand_seed == 0) - rand_seed = static_cast ( - chrono::system_clock::now ().time_since_epoch ().count ()); - - return static_cast (rand_r (&rand_seed)); - } - - // Note: Linux-specific implementation. - // - string - iface_addr (const string& i) - { - if (i.size () >= IFNAMSIZ) - throw invalid_argument ("interface nama too long"); - - auto_fd fd (socket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0)); - - if (fd.get () == -1) - throw_system_error (errno); - - ifreq ifr; - ifr.ifr_addr.sa_family = AF_INET; - strcpy (ifr.ifr_name, i.c_str ()); - - if (ioctl (fd.get (), SIOCGIFADDR, &ifr) == -1) - throw_system_error (errno); - - char buf[3 * 4 + 3 + 1]; // IPv4 address. - if (inet_ntop (AF_INET, - &reinterpret_cast (&ifr.ifr_addr)->sin_addr, - buf, - sizeof (buf)) == nullptr) - throw_system_error (errno); - - return buf; - } -} diff --git a/bbot/agent.hxx b/bbot/agent.hxx deleted file mode 100644 index f009a64..0000000 --- a/bbot/agent.hxx +++ /dev/null @@ -1,45 +0,0 @@ -// file : bbot/agent.hxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#ifndef BBOT_AGENT_HXX -#define BBOT_AGENT_HXX - -#include // uid_t - -#include -#include - -#include - -namespace bbot -{ - extern agent_options ops; - - extern const string bs_prot; // Bootstrap protocol version. - - extern string tc_name; // Toolchain name. - extern uint16_t tc_num; // Toolchain number. - extern standard_version tc_ver; // Toolchain version. - extern string tc_id; // Toolchain id. - - extern string hname; // Our host name. - extern uid_t uid; // Our effective user id. - extern string uname; // Our effective user name. - - // Random number generator (currently not MT-safe and limited to RAND_MAX). - // - size_t - genrand (); - - template - inline T - genrand () {return static_cast (genrand ());} - - // Return the IPv4 address of an interface. - // - string - iface_addr (const string&); -} - -#endif // BBOT_AGENT_HXX diff --git a/bbot/agent/agent.cli b/bbot/agent/agent.cli new file mode 100644 index 0000000..a5dbe01 --- /dev/null +++ b/bbot/agent/agent.cli @@ -0,0 +1,232 @@ +// file : bbot/agent.cli +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +include ; + +"\section=1" +"\name=bbot-agent" +"\summary=build bot agent" + +namespace bbot +{ + { + " ", + + " + \h|SYNOPSIS| + + \cb{bbot-agent --help}\n + \cb{bbot-agent --version}\n + \c{\b{bbot-agent} [] ...} + + \h|DESCRIPTION| + + \cb{bbot-agent} @@ TODO. + + Note that on termination \cb{bbot-agent} may leave a working machine + snapshot behind. It is expected that the caller (normally Build OS + monitor) cleans them up before restarting the agent. + " + } + + class agent_options + { + "\h|OPTIONS|" + + bool --help {"Print usage information and exit."} + bool --version {"Print version and exit."} + + bool --systemd-daemon + { + "Run as a simple systemd daemon." + } + + path --auth-key + { + "", + "Private key for the public key-based agent authentication. If not + specified, then the agent will not be able to request tasks from + controllers that require authentication. + + The file is expected to contain a single PEM-encoded private key + without a password. A suitable key can be generated using the + following command: + + \ + $ openssl genrsa 4096 >key.pem + \ + " + } + + path --openssl = "openssl" + { + "", + "The openssl program to be used for crypto operations. You can also + specify additional options that should be passed to the openssl program + with \cb{--openssl-option}. If the openssl program is not explicitly + specified, then \cb{bbot-agent} will use \cb{openssl} by default." + } + + strings --openssl-option + { + "", + "Additional option to be passed to the openssl program (see + \cb{--openssl} for details). Repeat this option to specify multiple + openssl options." + } + + size_t --cpu = 1 + { + "", + "Number of CPUs (threads) to use, 1 by default." + } + + size_t --ram (1024 * 1024) // 1G + { + "", + "Amount of RAM (in kB) to use, 1G by default." + } + + string --toolchain-name = "default" + { + "", + "Toolchain name, \cb{default} by default." + } + + uint16_t --toolchain-num = 1 + { + "", + "Toolchain number, 1 by default." + } + + standard_version --toolchain-ver + { + "", + "Toolchain version. If unspecified, then the agent's version will be + used (which will be imprecise for snapshot versions)." + } + + string --toolchain-id + { + "", + "Toolchain id. If unspecified or empty, then no re-bootstrapping on + toolchain changes will be performed (which is primarily useful for + testing)." + } + + strings --trust + { + "", + "Trust repository certificate with a SHA256 ." + } + + dir_path --machines = "/build/machines/" + { + "

", + "The location of the build machines, \cb{/build/machines/} by default." + } + + dir_path --tftp = "/build/tftp/" + { + "", + "The location of the TFTP server root, \cb{/build/tftp/} by default." + } + + uint16_t --tftp-port = 23400 + { + "", + "TFTP server port base, 23400 by default. The actual port is calculated + by adding the toolchain number \c{--toolchain-num} to this value." + } + + size_t --bootstrap-timeout = 900 + { + "", + "Maximum number of seconds to wait for machine bootstrap completion, + 900 (15 minutes) by default." + } + + size_t --bootstrap-retries = 2 + { + "", + "Number of time to retry a mis-booted bootstrap, 2 by default." + } + + size_t --build-timeout = 1800 + { + "", + "Maximum number of seconds to wait for build completion, 1800 (30 + minutes) by default." + } + + size_t --build-retries = 2 + { + "", + "Number of time to retry a mis-booted build, 2 by default." + } + + size_t --request-timeout = 300 + { + "", + "Maximum number of seconds to wait for controller request completion, + 300 (5 minutes) by default." + } + + uint16_t --verbose = 1 + { + "", + "Set the diagnostics verbosity to between 0 and 6 with level 1 + being the default." + } + + // Testing options. + // + bool --dump-machines + { + "Dump the available machines to \cb{stdout}, (re)-bootstrapping any if + necessary, and exit." + } + + bool --dump-task + { + "Dump the received build task to \cb{stdout} and exit." + } + + bool --dump-result + { + "Dump the obtained build result to \cb{stdout} and exit." + } + + bool --fake-bootstrap + { + "Fake the machine bootstrap process by creating the expected bootstrapped + machine manifest." + } + + bool --fake-build + { + "Fake the package building process by creating the aborted build result." + } + + path --fake-machine + { + "", + "Fake the machine enumeration process by reading the machine header + manifest from (or \cb{stdin} if is '\cb{-}')." + } + + path --fake-request + { + "", + "Fake the task request process by reading the task manifest from + (or \cb{stdin} if is '\cb{-}')." + } + }; + + " + \h|EXIT STATUS| + + Non-zero exit status is returned in case of an error. + " +} diff --git a/bbot/agent/agent.cxx b/bbot/agent/agent.cxx new file mode 100644 index 0000000..ff697db --- /dev/null +++ b/bbot/agent/agent.cxx @@ -0,0 +1,1248 @@ +// file : bbot/agent/agent.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#include + +#include // getpwuid() +#include // PATH_MAX +#include // signal() +#include // rand_r() +#include // sleep(), realink(), getuid(), fsync() + +#include // ifreq +#include // sockaddr_in +#include // inet_ntop() +#include +#include + +#include +#include + +#include +#include +#include +#include // dir_iterator + +#include + +#include +#include +#include + +#include + +#include +#include +#include + +using namespace std; +using namespace butl; +using namespace bbot; + +namespace bbot +{ + agent_options ops; + + const string bs_prot ("1"); + + string tc_name; + uint16_t tc_num; + standard_version tc_ver; + string tc_id; + + string hname; + uid_t uid; + string uname; +} + +static void +file_sync (const path& f) +{ + auto_fd fd (fdopen (f, fdopen_mode::in)); + if (fsync (fd.get ()) != 0) + throw_system_error (errno); +} + +// The btrfs tool likes to print informational messages, like "Created +// snapshot such and such". Luckily, it writes them to stdout while proper +// diagnostics to stderr. +// +template +inline void +run_btrfs (tracer& t, A&&... a) +{ + if (verb >= 4) + run_io (t, fdnull (), 2, 2, "btrfs", forward (a)...); + else + run_io (t, fdnull (), fdnull (), 2, "btrfs", forward (a)...); +} + +template +inline butl::process_exit::code_type +btrfs_exit (tracer& t, A&&... a) +{ + return verb >= 4 + ? run_io_exit (t, fdnull (), 2, 2, "btrfs", forward (a)...) + : run_io_exit (t, fdnull (), fdnull (), 2, "btrfs", forward (a)...); +} + +// Bootstrap the machine. Return the bootstrapped machine manifest if +// successful and nullopt otherwise (in which case the machine directory +// should be cleaned and the machine ignored for now). +// +static optional +bootstrap_machine (const dir_path& md, + const machine_manifest& mm, + optional obmm) +{ + tracer trace ("bootstrap_machine", md.string ().c_str ()); + + bootstrapped_machine_manifest r { + mm, + toolchain_manifest {tc_id.empty () ? "bogus" : tc_id}, + bootstrap_manifest { + bootstrap_manifest::versions_type { + {"bbot", standard_version (BBOT_VERSION_STR)}, + {"libbbot", standard_version (LIBBBOT_VERSION_STR)}, + {"libbpkg", standard_version (LIBBPKG_VERSION_STR)}, + {"libbutl", standard_version (LIBBUTL_VERSION_STR)} + } + } + }; + + if (ops.fake_bootstrap ()) + { + r.machine.mac = "de:ad:be:ef:de:ad"; + } + else + try + { + string br ("br1"); // Using private bridge for now. + + // Start the TFTP server (server chroot is --tftp). Map: + // + // GET requests to .../toolchains//* + // PUT requests to .../bootstrap//* + // + auto_rmdir arm ((dir_path (ops.tftp ()) /= "bootstrap") /= tc_name); + try_mkdir_p (arm.path ()); + + // Bootstrap result manifest. + // + path mf (arm.path () / "manifest"); + try_rmfile (mf); + + // Note that unlike build, here we use the same VM snapshot for retries, + // which is not ideal. + // + for (size_t retry (0);; ++retry) + { + tftp_server tftpd ("Gr ^/?(.+)$ /toolchains/" + tc_name + "/\\1\n" + + "Pr ^/?(.+)$ /bootstrap/" + tc_name + "/\\1\n", + ops.tftp_port () + tc_num); + + l3 ([&]{trace << "tftp server on port " << tftpd.port ();}); + + // Start the machine. + // + unique_ptr m ( + start_machine (md, + mm, + obmm ? obmm->machine.mac : nullopt, + br, + tftpd.port ())); + + { + // If we are terminating with an exception then force the machine down. + // Failed that, the machine's destructor will block waiting for its + // completion. + // + auto mg ( + make_exception_guard ( + [&m, &md] () + { + info << "trying to force machine " << md << " down"; + try {m->forcedown ();} catch (const failed&) {} + })); + + // What happens if the bootstrap process hangs? The simple thing would + // be to force the machine down after some timeout and then fail. But + // that won't be very helpful for investigating the cause. So instead + // the plan is to suspend it after some timeout, issue diagnostics + // (without failing and which Build OS monitor will relay to the + // admin), and wait for the external intervention. + // + auto soft_fail = [&md, &m] (const char* msg) + { + { + diag_record dr (error); + dr << msg << " for machine " << md << ", suspending"; + m->print_info (dr); + } + m->suspend (); + m->wait (); + info << "resuming after machine suspension"; + return nullopt; + }; + + // The first request should be the toolchain download. Wait for up to + // 5 minutes for that to arrive. In a sense we use it as an indication + // that the machine has booted and the bootstrap process has started. + // Why wait so long you may wonder? Well, we may be using a new MAC + // address and operating systems like Windows may need to digest that. + // + size_t to; + const size_t startup_to (5 * 60); + const size_t bootstrap_to (ops.bootstrap_timeout ()); + const size_t shutdown_to (5 * 60); + + // This can mean two things: machine mis-configuration or what we + // euphemistically call a "mis-boot": the VM failed to boot for some + // unknown/random reason. Mac OS is particularly know for suffering + // from this. So the strategy is to retry it a couple of times and + // then suspend for investigation. + // + if (!tftpd.serve ((to = startup_to))) + { + if (retry > ops.bootstrap_retries ()) + return soft_fail ("bootstrap startup timeout"); + + warn << "machine " << mm.name << " appears to have " + << "mis-booted, retrying"; + + try {m->forcedown (false);} catch (const failed&) {} + continue; + } + + l3 ([&]{trace << "completed startup in " << startup_to - to << "s";}); + + // Next the bootstrap process may download additional toolchain + // archives, build things, and then upload the result manifest. So on + // our side we serve TFTP requests while periodically checking for the + // manifest file. To workaround some obscure filesystem races (the + // file's mtime/size is updated several seconds later; maybe tmpfs + // issue?), we periodically re-check. + // + for (to = bootstrap_to; to != 0; tftpd.serve (to, 2)) + { + if (file_exists (mf)) + { + file_sync (mf); + if (!file_empty (mf)) + break; + } + } + + if (to == 0) + return soft_fail ("bootstrap timeout"); + + l3 ([&]{trace << "completed bootstrap in " << bootstrap_to - to << "s";}); + + // Shut the machine down cleanly. + // + if (!m->shutdown ((to = shutdown_to))) + return soft_fail ("bootstrap shutdown timeout"); + + l3 ([&]{trace << "completed shutdown in " << shutdown_to - to << "s";}); + } + + // Parse the result manifest. + // + r.bootstrap = parse_manifest (mf, "bootstrap"); + + r.machine.mac = m->mac; // Save the MAC address. + + break; + } + } + catch (const system_error& e) + { + fail << "bootstrap error: " << e; + } + + serialize_manifest (r, md / "manifest", "bootstrapped machine"); + return r; +} + +// Return available machines and their directories as a parallel array. +// +static pair +enumerate_machines (const dir_path& machines) +try +{ + tracer trace ("enumerate_machines", machines.string ().c_str ()); + + bootstrapped_machine_manifests rm; + dir_paths rd; + + if (ops.fake_machine_specified ()) + { + auto mh ( + parse_manifest ( + ops.fake_machine (), "machine header")); + + rm.push_back ( + bootstrapped_machine_manifest { + machine_manifest { + mh.id, + mh.name, + mh.summary, + machine_type::kvm, + string ("de:ad:be:ef:de:ad"), + nullopt}, + toolchain_manifest {tc_id}, + bootstrap_manifest {} + }); + + rd.push_back (dir_path (ops.machines ()) /= mh.name); // For diagnostics. + + return make_pair (move (rm), move (rd)); + } + + // The first level are machine volumes. + // + for (const dir_entry& ve: dir_iterator (machines)) + { + const string vn (ve.path ().string ()); + + // Ignore hidden directories. + // + if (ve.type () != entry_type::directory || vn[0] == '.') + continue; + + const dir_path vd (dir_path (machines) /= vn); + + // Inside we have machines. + // + try + { + for (const dir_entry& me: dir_iterator (vd)) + { + const string mn (me.path ().string ()); + + if (me.type () != entry_type::directory || mn[0] == '.') + continue; + + const dir_path md (dir_path (vd) /= mn); + + // Our endgoal here is to obtain a bootstrapped snapshot of this + // machine while watching out for potential race conditions (machines + // being added/upgraded/removed; see the manual for details). + // + // So here is our overall plan: + // + // 1. Resolve current subvolume link for our bootstrap protocol. + // + // 2. If there is no link, cleanup and ignore this machine. + // + // 3. Try to create a snapshot of current subvolume (this operation is + // atomic). If failed (e.g., someone changed the link and removed + // the subvolume in the meantime), retry from #1. + // + // 4. Compare the snapshot to the already bootstrapped version (if + // any) and see if we need to re-bootstrap. If so, use the snapshot + // as a starting point. Rename to bootstrapped at the end (atomic). + // + dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -

+ dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // - + bool te (dir_exists (tp)); + + auto delete_t = [&tp, &trace] () + { + run_btrfs (trace, "property", "set", "-ts", tp, "ro", "false"); + run_btrfs (trace, "subvolume", "delete", tp); + }; + + for (size_t retry (0);; ++retry) + { + if (retry != 0) + sleep (1); + + // Resolve the link to subvolume path. + // + dir_path sp; // -

. + try + { + char b [PATH_MAX + 1]; + ssize_t r (readlink (lp.string ().c_str (), b, sizeof (b))); + + if (r == -1) + { + if (errno != ENOENT) + throw_generic_error (errno); + } + else if (static_cast (r) >= sizeof (b)) + throw_generic_error (EINVAL); + else + { + b[r] = '\0'; + sp = dir_path (b); + if (sp.relative ()) + sp = md / sp; + } + } + catch (const system_error& e) + { + fail << "unable to read subvolume link " << lp << ": " << e; + } + + // If the resolution fails, then this means there is no current + // machine subvolume (for this bootstrap protocol). In this case we + // clean up our toolchain subvolume (-) and ignore + // this machine. + // + if (sp.empty ()) + { + if (te) + delete_t (); + + l3 ([&]{trace << "skipping " << md << ": no subvolume link";}); + break; + } + + // -- + // + const dir_path xp ( + dir_path (md) /= path::traits::temp_name (mn + '-' + tc_name)); + + if (btrfs_exit (trace, "subvolume", "snapshot", sp, xp) != 0) + { + if (retry >= 10) + fail << "unable to snapshot subvolume " << sp; + + continue; + } + + // Load the (original) machine manifest. + // + auto mm ( + parse_manifest (sp / "manifest", "machine")); + + // If we already have -, see if it needs to be re- + // bootstrapped. Things that render it obsolete: + // + // 1. New machine revision (compare machine ids). + // 2. New toolchain (compare toolchain ids). + // 3. New bbot/libbbot (compare versions). + // + // The last case has a complication: what should we do if we have + // bootstrapped a newer version of bbot? This would mean that we are + // about to be stopped and upgraded (and the upgraded version will + // probably be able to use the result). So we simply ignore this + // machine for this run. + + // Return -1 if older, 0 if the same, and +1 if newer. + // + auto compare_bbot = [] (const bootstrap_manifest& m) -> int + { + auto cmp = [&m] (const string& n, const char* v) -> int + { + standard_version sv (v); + auto i = m.versions.find (n); + + return (i == m.versions.end () || i->second < sv + ? -1 + : i->second > sv ? 1 : 0); + }; + + // Start from the top assuming a new dependency cannot be added + // without changing the dependent's version. + // + int r; + return + (r = cmp ("bbot", BBOT_VERSION_STR)) != 0 ? r : + (r = cmp ("libbbot", LIBBBOT_VERSION_STR)) != 0 ? r : + (r = cmp ("libbpkg", LIBBPKG_VERSION_STR)) != 0 ? r : + (r = cmp ("libbutl", LIBBUTL_VERSION_STR)) != 0 ? r : 0; + }; + + optional bmm; + if (te) + { + bmm = parse_manifest ( + tp / "manifest", "bootstrapped machine"); + + if (bmm->machine.id != mm.id) + { + l3 ([&]{trace << "re-bootstrapping " << tp << ": new machine";}); + te = false; + } + + if (!tc_id.empty () && bmm->toolchain.id != tc_id) + { + l3 ([&]{trace << "re-bootstrapping " << tp << ": new toolchain";}); + te = false; + } + + if (int i = compare_bbot (bmm->bootstrap)) + { + if (i < 0) + { + l3 ([&]{trace << "re-bootstrapping " << tp << ": new bbot";}); + te = false; + } + else + { + l3 ([&]{trace << "ignoring " << tp << ": old bbot";}); + run_btrfs (trace, "subvolume", "delete", xp); + break; + } + } + + if (!te) + delete_t (); + } + else + l3 ([&]{trace << "bootstrapping " << tp;}); + + if (!te) + { + // Use the -- snapshot that we have made to + // bootstrap the new machine. Then atomically rename it to + // -. + // + bmm = bootstrap_machine (xp, mm, move (bmm)); + + if (!bmm) + { + l3 ([&]{trace << "ignoring " << tp << ": failed to bootstrap";}); + run_btrfs (trace, "subvolume", "delete", xp); + break; + } + + try + { + mvdir (xp, tp); + } + catch (const system_error& e) + { + fail << "unable to rename " << xp << " to " << tp; + } + + l2 ([&]{trace << "bootstrapped " << bmm->machine.name;}); + + // Check the bootstrapped bbot version as above and ignore this + // machine if it's newer than us. + // + if (int i = compare_bbot (bmm->bootstrap)) + { + if (i > 0) + { + l3 ([&]{trace << "ignoring " << tp << ": old bbot";}); + break; + } + else + warn << "bootstrapped " << tp << " bbot worker is older " + << "than agent; assuming test setup"; + } + } + else + run_btrfs (trace, "subvolume", "delete", xp); + + // Add the machine to the lists. + // + rm.push_back (move (*bmm)); + rd.push_back (move (tp)); + + break; + } + } + } + catch (const system_error& e) + { + fail << "unable to iterate over " << vd << ": " << e << endf; + } + } + + return make_pair (move (rm), move (rd)); +} +catch (const system_error& e) +{ + fail << "unable to iterate over " << machines << ": " << e << endf; +} + +static result_manifest +perform_task (const dir_path& md, + const bootstrapped_machine_manifest& mm, + const task_manifest& tm) +try +{ + tracer trace ("perform_task", md.string ().c_str ()); + + result_manifest r { + tm.name, + tm.version, + result_status::abort, + operation_results {}}; + + if (ops.fake_build ()) + return r; + + // The overall plan is as follows: + // + // 1. Snapshot the (bootstrapped) machine. + // + // 2. Save the task manifest to the TFTP directory (to be accessed by the + // worker). + // + // 3. Start the TFTP server and the machine. + // + // 4. Serve TFTP requests while watching out for the result manifest. + // + // 5. Clean up (force the machine down and delete the snapshot). + // + + // TFTP server mapping (server chroot is --tftp): + // + // GET requests to .../build//get/* + // PUT requests to .../build//put/* + // + auto_rmdir arm ((dir_path (ops.tftp ()) /= "build") /= tc_name); + + dir_path gd (dir_path (arm.path ()) /= "get"); + dir_path pd (dir_path (arm.path ()) /= "put"); + + try_mkdir_p (gd); + try_mkdir_p (pd); + + path tf (gd / "manifest"); // Task manifest file. + path rf (pd / "manifest"); // Result manifest file. + + serialize_manifest (tm, tf, "task"); + + if (ops.fake_machine_specified ()) + { + // Simply wait for the file to appear. + // + for (size_t i (0); !file_exists (rf); sleep (1)) + if (i++ % 10 == 0) + l3 ([&]{trace << "waiting for result manifest";}); + + r = parse_manifest (rf, "result"); + } + else + { + try_rmfile (rf); + + // -- + // + const dir_path xp ( + md.directory () /= path::traits::temp_name (md.leaf ().string ())); + + string br ("br1"); // Using private bridge for now. + + for (size_t retry (0);; ++retry) + { + if (retry != 0) + run_btrfs (trace, "subvolume", "delete", xp); + + run_btrfs (trace, "subvolume", "snapshot", md, xp); + + // Start the TFTP server. + // + tftp_server tftpd ("Gr ^/?(.+)$ /build/" + tc_name + "/get/\\1\n" + + "Pr ^/?(.+)$ /build/" + tc_name + "/put/\\1\n", + ops.tftp_port () + tc_num); + + l3 ([&]{trace << "tftp server on port " << tftpd.port ();}); + + // Start the machine. + // + unique_ptr m ( + start_machine (xp, + mm.machine, + mm.machine.mac, + br, + tftpd.port ())); + + // Note: the machine handling logic is similar to bootstrap. + // + { + auto mg ( + make_exception_guard ( + [&m, &xp] () + { + info << "trying to force machine " << xp << " down"; + try {m->forcedown ();} catch (const failed&) {} + })); + + auto soft_fail = [&xp, &m, &r] (const char* msg) + { + { + diag_record dr (error); + dr << msg << " for machine " << xp << ", suspending"; + m->print_info (dr); + } + m->suspend (); + m->wait (); + info << "resuming after machine suspension"; + return r; + }; + + // The first request should be the task manifest download. Wait for up + // to 60 seconds for that to arrive. In a sense we use it as an + // indication that the machine has booted and the worker process has + // started. + // + size_t to; + const size_t startup_to (60); + const size_t build_to (ops.build_timeout ()); + + if (!tftpd.serve ((to = startup_to))) + { + if (retry > ops.build_retries ()) + return soft_fail ("build startup timeout"); + + warn << "machine " << mm.machine.name << " appears to have " + << "mis-booted, retrying"; + + try {m->forcedown (false);} catch (const failed&) {} + continue; + } + + l3 ([&]{trace << "completed startup in " << startup_to - to << "s";}); + + // Next the worker builds things and then uploads the result manifest. + // So on our side we serve TFTP requests while checking for the + // manifest file. To workaround some obscure filesystem races (the + // file's mtime/size is updated several seconds later; maybe tmpfs + // issue?), we periodically re-check. + // + for (to = build_to; to != 0; tftpd.serve (to, 2)) + { + if (file_exists (rf)) + { + file_sync (rf); + if (!file_empty (rf)) + break; + } + } + + if (to == 0) + return soft_fail ("build timeout"); + + l3 ([&]{trace << "completed build in " << build_to - to << "s";}); + + // Parse the result manifest. + // + try + { + r = parse_manifest (rf, "result", false); + } + catch (const failed&) + { + r.status = result_status::abnormal; // Soft-fail below. + } + + if (r.status == result_status::abnormal) + { + // If the build terminated abnormally, suspend the machine for + // investigation. + // + return soft_fail ("build terminated abnormally"); + } + else + { + // Force the machine down (there is no need wasting time on clean + // shutdown since the next step is to drop the snapshot). Also fail + // softly if things go badly. + // + try {m->forcedown (false);} catch (const failed&) {} + } + } + + run_btrfs (trace, "subvolume", "delete", xp); + break; + } + } + + // Update package name/version if the returned value as "unknown". + // + if (r.version == bpkg::version ("0")) + { + assert (r.status == result_status::abnormal); + + r.name = tm.name; + r.version = tm.version; + } + + return r; +} +catch (const system_error& e) +{ + fail << "build error: " << e << endf; +} + +extern "C" void +handle_signal (int sig) +{ + switch (sig) + { + case SIGHUP: exit (3); // Unimplemented feature. + case SIGTERM: exit (0); + default: assert (false); + } +} + +int +main (int argc, char* argv[]) +try +{ + cli::argv_scanner scan (argc, argv, true); + ops.parse (scan); + + verb = ops.verbose (); + + if (ops.systemd_daemon ()) + systemd_diagnostics (true); // With critical errors. + + tracer trace ("main"); + + uid = getuid (); + uname = getpwuid (uid)->pw_name; + + { + char buf[HOST_NAME_MAX + 1]; + + if (gethostname (buf, sizeof (buf)) == -1) + fail << "unable to obtain hostname: " + << system_error (errno, generic_category ()); // Sanitize. + + hname = buf; + } + + // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if + // the pipe reading end is closed. Note that by default this signal + // terminates a process. Also note that there is no way to disable this + // behavior on a file descriptor basis or for the write() function call. + // + if (signal (SIGPIPE, SIG_IGN) == SIG_ERR) + fail << "unable to ignore broken pipe (SIGPIPE) signal: " + << system_error (errno, generic_category ()); // Sanitize. + + // Version. + // + if (ops.version ()) + { + cout << "bbot-agent " << BBOT_VERSION_ID << endl + << "libbbot " << LIBBBOT_VERSION_ID << endl + << "libbpkg " << LIBBBOT_VERSION_ID << endl + << "libbutl " << LIBBUTL_VERSION_ID << endl + << "Copyright (c) 2014-2017 Code Synthesis Ltd" << endl + << "TBC; All rights reserved" << endl; + + return 0; + } + + // Help. + // + if (ops.help ()) + { + pager p ("bbot-agent help", false); + print_bbot_agent_usage (p.stream ()); + + // If the pager failed, assume it has issued some diagnostics. + // + return p.wait () ? 0 : 1; + } + + tc_name = ops.toolchain_name (); + tc_num = ops.toolchain_num (); + tc_ver = (ops.toolchain_ver_specified () + ? ops.toolchain_ver () + : standard_version (BBOT_VERSION_STR)); + tc_id = ops.toolchain_id (); + + + // Controller URLs. + // + if (argc < 2 && + !ops.dump_machines () && + !ops.fake_request_specified ()) + { + fail << "controller url expected" << + info << "run " << argv[0] << " --help for details"; + } + + strings controllers; + + for (int i (1); i != argc; ++i) + controllers.push_back (argv[i]); + + // Handle SIGHUP and SIGTERM. + // + if (signal (SIGHUP, &handle_signal) == SIG_ERR || + signal (SIGTERM, &handle_signal) == SIG_ERR) + fail << "unable to set signal handler: " + << system_error (errno, generic_category ()); // Sanitize. + + optional fingerprint; + + if (ops.auth_key_specified ()) + try + { + // Note that the process always prints to STDERR, so we redirect it to the + // null device. We also check for the key file existence to print more + // meaningful error message if that's not the case. + // + if (!file_exists (ops.auth_key ())) + throw_generic_error (ENOENT); + + openssl os (trace, + ops.auth_key (), path ("-"), fdnull (), + ops.openssl (), "rsa", + ops.openssl_option (), "-pubout", "-outform", "DER"); + + vector k (os.in.read_binary ()); + os.in.close (); + + if (!os.wait ()) + throw_generic_error (EIO); + + fingerprint = sha256 (k.data (), k.size ()).string (); + } + catch (const system_error& e) + { + fail << "unable to obtain authentication public key: " << e; + } + + if (ops.systemd_daemon ()) + { + diag_record dr; + + dr << info << "bbot agent " << BBOT_VERSION_ID; + + if (fingerprint) + dr << info << "auth key fp " << *fingerprint; + + dr << + info << "toolchain name " << tc_name << + info << "toolchain num " << tc_num << + info << "toolchain ver " << tc_ver.string () << + info << "toolchain id " << tc_id << + info << "CPU(s) " << ops.cpu () << + info << "RAM(kB) " << ops.ram (); + + for (const string& u: controllers) + dr << info << "controller url " << u; + } + + // The work loop. The steps we go through are: + // + // 1. Enumerate the available machines, (re-)bootstrapping any if necessary. + // + // 2. Poll controller(s) for build tasks. + // + // 3. If no build tasks are available, go to #1 (after sleeping a bit). + // + // 4. If a build task is returned, do it, upload the result, and go to #1 + // (immediately). + // + for (bool sleep (false);; ::sleep (sleep ? 60 : 0), sleep = false) + { + // Enumerate the machines. + // + auto mp (enumerate_machines (ops.machines ())); + bootstrapped_machine_manifests& ms (mp.first); + dir_paths& ds (mp.second); + + // Prepare task request. + // + task_request_manifest tq { + hname, + tc_name, + tc_ver, + fingerprint, + machine_header_manifests {} + }; + + for (const bootstrapped_machine_manifest& m: ms) + tq.machines.emplace_back (m.machine.id, + m.machine.name, + m.machine.summary); + + if (ops.dump_machines ()) + { + for (const machine_header_manifest& m: tq.machines) + serialize_manifest (m, cout, "stdout", "machine"); + + return 0; + } + + if (tq.machines.empty ()) + { + warn << "no build machines for toolchain " << tc_name; + sleep = true; + continue; + } + + // Send task requests. + // + // + string url; + task_response_manifest tr; + + if (ops.fake_request_specified ()) + { + auto t (parse_manifest (ops.fake_request (), "task")); + + tr = task_response_manifest { + "fake-session", // Dummy session. + nullopt, // No challenge. + url, // Empty result URL. + move (t)}; + + url = "http://example.org"; + } + else + { + for (const string& u: controllers) + { + try + { + http_curl c (trace, + path ("-"), + path ("-"), + curl::post, + u, + "--header", "Content-Type: text/manifest", + "--max-time", ops.request_timeout ()); + + // This is tricky/hairy: we may fail hard parsing the output before + // seeing that curl exited with an error and failing softly. + // + bool f (false); + + try + { + serialize_manifest (tq, c.out, u, "task request", false); + } + catch (const failed&) {f = true;} + + c.out.close (); + + if (!f) + try + { + tr = parse_manifest ( + c.in, u, "task response", false); + } + catch (const failed&) {f = true;} + + c.in.close (); + + if (!c.wait () || f) + throw_generic_error (EIO); + } + catch (const system_error& e) + { + error << "unable to request task from " << u << ": " << e; + continue; + } + + if (tr.challenge && !fingerprint) // Controller misbehaves. + { + error << "unexpected challenge from " << u << ": " << *tr.challenge; + continue; + } + + if (!tr.session.empty ()) // Got a task. + { + url = u; + + task_manifest& t (*tr.task); + l2 ([&]{trace << "task for " << t.name << '/' << t.version << " " + << "on " << t.machine << " " + << "from " << url;}); + break; + } + } + } + + if (tr.session.empty ()) // No task from any of the controllers. + { + l2 ([&]{trace << "no tasks from any controllers, sleeping";}); + sleep = true; + continue; + } + + // We have a build task. + // + // First find the index of the machine we were asked to use (and also + // verify it is one of those we sent). + // + size_t i (0); + for (const machine_header_manifest& m: tq.machines) + { + if (m.name == tr.task->machine) + break; + + ++i; + } + + if (i == ms.size ()) + { + error << "task from " << url << " for unknown machine " + << tr.task->machine; + + if (ops.dump_task ()) + return 0; + + continue; + } + + task_manifest& t (*tr.task); + + if (ops.dump_task ()) + { + serialize_manifest (t, cout, "stdout", "task"); + return 0; + } + + // If we have our own repository certificate fingerprints, then use them + // to replace what we have received from the controller. + // + if (!ops.trust ().empty ()) + t.trust = ops.trust (); + + const dir_path& d (ds[i]); // The - directory. + const bootstrapped_machine_manifest& m (ms[i]); + + result_manifest r (perform_task (d, m, t)); + + if (ops.dump_result ()) + { + serialize_manifest (r, cout, "stdout", "result"); + return 0; + } + + // Prepare answer to the private key challenge. + // + optional> challenge; + + if (tr.challenge) + try + { + assert (ops.auth_key_specified ()); + + openssl os (trace, + fdstream_mode::text, path ("-"), 2, + ops.openssl (), "rsautl", + ops.openssl_option (), "-sign", "-inkey", ops.auth_key ()); + + os.out << *tr.challenge; + os.out.close (); + + challenge = os.in.read_binary (); + os.in.close (); + + if (!os.wait ()) + throw_generic_error (EIO); + } + catch (const system_error& e) + { + // The task response challenge is valid (verified by manifest parser), + // so there is something wrong with setup, and so the failure is fatal. + // + fail << "unable to sign task response challenge: " << e; + } + + // Upload the result. + // + result_request_manifest rq {tr.session, move (challenge), move (r)}; + { + const string& u (*tr.result_url); + + try + { + http_curl c (trace, + path ("-"), + nullfd, // Not expecting any data in response. + curl::post, + u, + "--header", "Content-Type: text/manifest", + "--max-time", ops.request_timeout ()); + + // This is tricky/hairy: we may fail hard writing the input before + // seeing that curl exited with an error and failing softly. + // + bool f (false); + + try + { + serialize_manifest (rq, c.out, u, "task request"); + } + catch (const failed&) {f = true;} + + c.out.close (); + + if (!c.wait () || f) + throw_generic_error (EIO); + } + catch (const system_error& e) + { + error << "unable to upload result to " << u << ": " << e; + continue; + } + } + + l2 ([&]{trace << "built " << t.name << '/' << t.version << " " + << "on " << t.machine << " " + << "for " << url;}); + } +} +catch (const failed&) +{ + return 1; // Diagnostics has already been issued. +} +catch (const cli::exception& e) +{ + error << e; + return 1; +} + +namespace bbot +{ + static unsigned int rand_seed; // Seed for rand_r(); + + size_t + genrand () + { + if (rand_seed == 0) + rand_seed = static_cast ( + chrono::system_clock::now ().time_since_epoch ().count ()); + + return static_cast (rand_r (&rand_seed)); + } + + // Note: Linux-specific implementation. + // + string + iface_addr (const string& i) + { + if (i.size () >= IFNAMSIZ) + throw invalid_argument ("interface nama too long"); + + auto_fd fd (socket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0)); + + if (fd.get () == -1) + throw_system_error (errno); + + ifreq ifr; + ifr.ifr_addr.sa_family = AF_INET; + strcpy (ifr.ifr_name, i.c_str ()); + + if (ioctl (fd.get (), SIOCGIFADDR, &ifr) == -1) + throw_system_error (errno); + + char buf[3 * 4 + 3 + 1]; // IPv4 address. + if (inet_ntop (AF_INET, + &reinterpret_cast (&ifr.ifr_addr)->sin_addr, + buf, + sizeof (buf)) == nullptr) + throw_system_error (errno); + + return buf; + } +} diff --git a/bbot/agent/agent.hxx b/bbot/agent/agent.hxx new file mode 100644 index 0000000..96876bc --- /dev/null +++ b/bbot/agent/agent.hxx @@ -0,0 +1,45 @@ +// file : bbot/agent/agent.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#ifndef BBOT_AGENT_AGENT_HXX +#define BBOT_AGENT_AGENT_HXX + +#include // uid_t + +#include +#include + +#include + +namespace bbot +{ + extern agent_options ops; + + extern const string bs_prot; // Bootstrap protocol version. + + extern string tc_name; // Toolchain name. + extern uint16_t tc_num; // Toolchain number. + extern standard_version tc_ver; // Toolchain version. + extern string tc_id; // Toolchain id. + + extern string hname; // Our host name. + extern uid_t uid; // Our effective user id. + extern string uname; // Our effective user name. + + // Random number generator (currently not MT-safe and limited to RAND_MAX). + // + size_t + genrand (); + + template + inline T + genrand () {return static_cast (genrand ());} + + // Return the IPv4 address of an interface. + // + string + iface_addr (const string&); +} + +#endif // BBOT_AGENT_AGENT_HXX diff --git a/bbot/agent/machine-manifest.cxx b/bbot/agent/machine-manifest.cxx new file mode 100644 index 0000000..3312d1b --- /dev/null +++ b/bbot/agent/machine-manifest.cxx @@ -0,0 +1,355 @@ +// file : bbot/agent/machine-manifest.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#include + +#include + +#include +#include +#include +#include + +using namespace std; +using namespace butl; + +namespace bbot +{ + using parser = manifest_parser; + using parsing = manifest_parsing; + using serializer = manifest_serializer; + using serialization = manifest_serialization; + using name_value = manifest_name_value; + + // machine_type + // + string + to_string (machine_type t) + { + switch (t) + { + case machine_type::kvm: return "kvm"; + case machine_type::nspawn: return "nspawn"; + } + + assert (false); + return string (); + } + + machine_type + to_machine_type (const string& t) + { + if (t == "kvm") return machine_type::kvm; + else if (t == "nspawn") return machine_type::nspawn; + else throw invalid_argument ("invalid machine type '" + t + "'"); + } + + // machine_manifest + // + machine_manifest:: + machine_manifest (parser& p, bool iu) + : machine_manifest (p, p.next (), iu) + { + // Make sure this is the end. + // + name_value nv (p.next ()); + if (!nv.empty ()) + throw parsing (p.name (), nv.name_line, nv.name_column, + "single machine manifest expected"); + } + + machine_manifest:: + machine_manifest (parser& p, name_value nv, bool iu) + : machine_header_manifest (p, move (nv), unknown_name_mode::stop, &nv) + { + auto bad_name = [&p, &nv] (const string& d) + { + throw parsing (p.name (), nv.name_line, nv.name_column, d); + }; + + // Offsets are used to tie an error to the specific position inside a + // manifest value (possibly a multiline one). + // + auto bad_value = [&p, &nv] ( + const string& d, uint64_t column_offset = 0, uint64_t line_offset = 0) + { + throw parsing (p.name (), + nv.value_line + line_offset, + (line_offset == 0 ? nv.value_column : 1) + column_offset, + d); + }; + + optional type; + + for (; !nv.empty (); nv = p.next ()) + { + string& n (nv.name); + string& v (nv.value); + + if (n == "type") + { + if (type) + bad_name ("machine type redefinition"); + + try + { + type = to_machine_type (v); + } + catch (const invalid_argument&) + { + bad_value ("invalid machine type"); + } + } + else if (n == "mac") + { + if (mac) + bad_name ("machine mac redefinition"); + + // @@ Should we check that the value is a valid mac? + // + mac = move (v); + } + else if (n == "options") + { + if (options) + bad_name ("machine options redefinition"); + + strings op; + + // Note that when reporting errors we combine the manifest value + // position with the respective error position. + // + try + { + istringstream is (v); + tab_parser parser (is, ""); + + tab_fields tl; + while (!(tl = parser.next ()).empty ()) + { + for (auto& tf: tl) + op.emplace_back (move (tf.value)); + } + } + catch (const tab_parsing& e) + { + bad_value ("invalid machine options: " + e.description, + e.column - 1, + e.line - 1); + } + + if (op.empty ()) + bad_value ("empty machine options"); + + options = move (op); + } + else if (!iu) + bad_name ("unknown name '" + n + "' in machine manifest"); + } + + // Verify all non-optional values were specified. + // + if (!type) + bad_value ("no machine type specified"); + + this->type = *type; + } + + void machine_manifest:: + serialize (serializer& s) const + { + // @@ Should we check that all non-optional values are specified and all + // values are valid? + // + + machine_header_manifest::serialize (s, false); + + s.next ("type", to_string (type)); + + if (mac) + s.next ("mac", *mac); + + // Recompose options string as a space-separated option list, + // + if (options) + { + string v; + for (auto b (options->cbegin ()), i (b), e (options->cend ()); i != e; + ++i) + { + if (i != b) + v += ' '; + + v += *i; + } + + s.next ("options", v); + } + + s.next ("", ""); // End of manifest. + } + + strings machine_manifest:: + unquoted_options () const + { + return options + ? string_parser::unquote (*options) + : strings (); + } + + // toolchain_manifest + // + toolchain_manifest:: + toolchain_manifest (parser& p, bool iu) + : toolchain_manifest (p, p.next (), iu) + { + // Make sure this is the end. + // + name_value nv (p.next ()); + if (!nv.empty ()) + throw parsing (p.name (), nv.name_line, nv.name_column, + "single toolchain manifest expected"); + } + + toolchain_manifest:: + toolchain_manifest (parser& p, name_value nv, bool iu) + { + auto bad_name = [&p, &nv] (const string& d) + { + throw parsing (p.name (), nv.name_line, nv.name_column, d); + }; + + auto bad_value = [&p, &nv] (const string& d) + { + throw parsing (p.name (), nv.value_line, nv.value_column, d); + }; + + // Make sure this is the start and we support the version. + // + if (!nv.name.empty ()) + bad_name ("start of toolchain manifest expected"); + + if (nv.value != "1") + bad_value ("unsupported format version"); + + // Parse the toolchain manifest. + // + for (nv = p.next (); !nv.empty (); nv = p.next ()) + { + string& n (nv.name); + string& v (nv.value); + + if (n == "id") + { + if (!id.empty ()) + bad_name ("toolchain id redefinition"); + + if (v.empty ()) + bad_value ("empty toolchain id"); + + id = move (v); + } + else if (!iu) + bad_name ("unknown name '" + n + "' in toolchain manifest"); + } + + // Verify all non-optional values were specified. + // + if (id.empty ()) + bad_value ("no toolchain id specified"); + } + + void toolchain_manifest:: + serialize (serializer& s) const + { + // @@ Should we check that all non-optional values are specified? + // + s.next ("", "1"); // Start of manifest. + s.next ("id", id); + s.next ("", ""); // End of manifest. + } + + // bootstrapped_machine_manifest + // + bootstrapped_machine_manifest:: + bootstrapped_machine_manifest (parser& p, bool iu) + { + name_value nv (p.next ()); + + auto bad_name = [&p, &nv] (const string& d) + { + throw parsing (p.name (), nv.name_line, nv.name_column, d); + }; + + auto bad_value = [&p, &nv] (const string& d) + { + throw parsing (p.name (), nv.value_line, nv.value_column, d); + }; + + // Make sure this is the start and we support the version. + // + if (!nv.name.empty ()) + bad_name ("start of bootstrapped machine manifest expected"); + + if (nv.value != "1") + bad_value ("unsupported format version"); + + // Parse the bootstrapped machine manifest. Currently there is no values + // expected. + // + for (nv = p.next (); !nv.empty (); nv = p.next ()) + { + if (!iu) + bad_name ("unknown name '" + nv.name + + "' in bootstrapped machine manifest"); + } + + nv = p.next (); + if (nv.empty ()) + bad_value ("machine manifest expected"); + + machine = machine_manifest (p, nv, iu); + + if (!machine.mac) + bad_name ("mac address must be present in machine manifest"); + + nv = p.next (); + if (nv.empty ()) + bad_value ("toolchain manifest expected"); + + toolchain = toolchain_manifest (p, nv, iu); + + nv = p.next (); + if (nv.empty ()) + bad_value ("bootstrap manifest expected"); + + bootstrap = bootstrap_manifest (p, nv, iu); + + // Make sure this is the end. + // + nv = p.next (); + if (!nv.empty ()) + throw parsing (p.name (), nv.name_line, nv.name_column, + "single bootstrapped machine manifest expected"); + } + + void bootstrapped_machine_manifest:: + serialize (serializer& s) const + { + // @@ Should we check that all non-optional values are specified? + // + s.next ("", "1"); // Start of manifest. + s.next ("", ""); // End of manifest. + + if (!machine.mac) + throw serialization (s.name (), + "mac address must be present in machine manifest"); + + machine.serialize (s); + toolchain.serialize (s); + bootstrap.serialize (s); + + s.next ("", ""); // End of stream. + } +} diff --git a/bbot/agent/machine-manifest.hxx b/bbot/agent/machine-manifest.hxx new file mode 100644 index 0000000..37919ba --- /dev/null +++ b/bbot/agent/machine-manifest.hxx @@ -0,0 +1,118 @@ +// file : bbot/agent/machine-manifest.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#ifndef BBOT_AGENT_MACHINE_MANIFEST_HXX +#define BBOT_AGENT_MACHINE_MANIFEST_HXX + +#include + +#include + +#include // machine_header + +#include +#include + +#include + +namespace bbot +{ + // Machine type. + // + enum class machine_type {kvm, nspawn}; + + string + to_string (machine_type); + + machine_type + to_machine_type (const string&); // Throws invalid_argument. + + // Machine. + // + class machine_manifest: public machine_header_manifest + { + public: + machine_type type; + optional mac; // Required in bootstrapped machine manifest. + optional options; // Note: could be quoted. + + strings + unquoted_options () const; // Return empty if absent. + + machine_manifest (std::string i, + std::string n, + std::string s, + machine_type t, + optional m, + optional o) + : machine_header_manifest (std::move (i), + std::move (n), + std::move (s)), + type (t), + mac (std::move (m)), + options (std::move (o)) {} + + public: + machine_manifest () = default; // VC export. + machine_manifest (butl::manifest_parser&, bool ignore_unknown = false); + machine_manifest (butl::manifest_parser&, + butl::manifest_name_value start, + bool ignore_unknown = false); + + void + serialize (butl::manifest_serializer&) const; + }; + + // Toolchain. + // + class toolchain_manifest + { + public: + + // Toolchain id (SHAXXX). + // + string id; + + explicit + toolchain_manifest (string i): id (i) {} + + public: + toolchain_manifest () = default; // VC export. + toolchain_manifest (butl::manifest_parser&, bool ignore_unknown = false); + toolchain_manifest (butl::manifest_parser&, + butl::manifest_name_value start, + bool ignore_unknown = false); + + void + serialize (butl::manifest_serializer&) const; + }; + + // The manifest stored in -/ consists of the machine + // manifest (original), toolchain manifest, and bootstrap manifest. + // + class bootstrapped_machine_manifest + { + public: + machine_manifest machine; + toolchain_manifest toolchain; + bootstrap_manifest bootstrap; + + bootstrapped_machine_manifest (machine_manifest m, + toolchain_manifest t, + bootstrap_manifest b) + : machine (move (m)), toolchain (move (t)), bootstrap (move (b)) {} + + public: + bootstrapped_machine_manifest () = default; // VC export. + bootstrapped_machine_manifest (butl::manifest_parser&, + bool ignore_unknown = false); + + void + serialize (butl::manifest_serializer&) const; + }; + + using bootstrapped_machine_manifests = vector; +} + +#endif // BBOT_AGENT_MACHINE_MANIFEST_HXX diff --git a/bbot/agent/machine.cxx b/bbot/agent/machine.cxx new file mode 100644 index 0000000..422c623 --- /dev/null +++ b/bbot/agent/machine.cxx @@ -0,0 +1,474 @@ +// file : bbot/agent/machine.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#include + +#include // sleep() + +#include // sockaddr_un +#include + +#include // snprintf() +#include // strcpy() + +#include +#include + +using namespace std; +using namespace butl; + +namespace bbot +{ + // Forward TFTP requests (UDP/69) coming from the machine to the specified + // port. + // + // This allows the machine to connect to any "unknown" IP (e.g., link-local + // 196.254.111.222) port 69 and end up being redirected to out TFTP server. + // + static void + iptables (tracer& t, + const char* a, + const string& tap, + const string& br, + uint16_t port, + bool ignore_errors = false) + { + string addr (iface_addr (br)); + + auto_fd fdn (ignore_errors ? fdnull () : nullfd); + int ofd (ignore_errors ? fdn.get () : 2); + + process_exit::code_type e; + + e = run_io_exit (t, 0, ofd, ofd, + "sudo", "iptables", + "-t", "nat", + a, "PREROUTING", + "-m", "udp", + "-p", "udp", + "-m", "physdev", + "-i", br, + "--physdev-in", tap, + "--dport", 69, + "-j", "DNAT", + "--to-destination", addr + ':' + to_string (port)); + + if (e != 0 && !ignore_errors) + fail << "process iptables terminated with non-zero exit code"; + + // Nobody really knows whether this is really needed (really)... + // + e = run_io_exit (t, 0, ofd, ofd, + "sudo", "iptables", + a, "FORWARD", + "-m", "udp", + "-p", "udp", + "-m", "physdev", + "-o", br, + "--physdev-out", tap, + "-d", addr, + "--dport", port, + "-m", "state", + "--state", "NEW,ESTABLISHED,RELATED", + "-j", "ACCEPT"); + + if (e != 0 && !ignore_errors) + fail << "process iptables terminated with non-zero exit code"; + } + + static string + create_tap (const string& br, uint16_t port) + { + string t ("tap" + to_string (tc_num)); + + tracer trace ("create_tap", t.c_str ()); + + // First try to delete it in case there is one from a previous run. + // + iptables (trace, "-D", t, br, port, true); // Ignore errors. + run_exit (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); + + run (trace, "sudo", "ip", "tuntap", "add", t, "mode", "tap", "user", uid); + run (trace, "sudo", "ip", "link", "set", t, "up"); + run (trace, "sudo", "ip", "link", "set", t, "master", br); + + iptables (trace, "-A", t, br, port); // Add. + + return t; + } + + static void + destroy_tap (const string& t, const string& br, uint16_t port) + { + tracer trace ("destroy_tap", t.c_str ()); + iptables (trace, "-D", t, br, port); // Delete. + run (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); + } + + class tap + { + public: + string iface; + + string bridge; // Bridge interface to which this tap belongs + uint16_t port; // UDP port to forward TFTP traffic to. + + tap (string b, uint16_t p) + : iface (create_tap (b, p)), bridge (move (b)), port (p) {} + + ~tap () + { + if (!iface.empty ()) + { + try {destroy ();} catch (...) {} + } + } + + void + destroy () + { + destroy_tap (iface, bridge, port); + iface.clear (); + } + }; + + static string + generate_mac () + { + // The last two bits of the first byte are special: bit 1 indicates a + // multicast address (which we don't want) while bit 1 -- local assignment + // (which we do want). + // + char r[6 * 2 + 5 + 1]; + snprintf (r, sizeof (r), + "%02x:%02x:%02x:%02x:%02x:%02x", + (genrand () & 0xFE) | 0x02, + genrand (), + genrand (), + genrand (), + genrand (), + genrand ()); + return r; + } + + class kvm_machine: public machine + { + public: + kvm_machine (const dir_path&, + const machine_manifest&, + const optional& mac, + const string& br_iface, + uint16_t tftp_port); + + virtual bool + shutdown (size_t& seconds) override; + + virtual void + forcedown (bool fail_hard) override; + + virtual void + suspend () override; + + bool + wait (size_t& seconds, bool fail_hard) override; + + using machine::wait; + + virtual void + print_info (diag_record&) override; + + private: + void + monitor_command (const string&, bool fail_hard = true); + + private: + path kvm; // Hypervisor binary. + tap net; // Tap network interface. + string vnc; // QEMU VNC TCP addr:port. + path monitor; // QEMU monitor UNIX socket. + process proc; + }; + + kvm_machine:: + kvm_machine (const dir_path& md, + const machine_manifest& mm, + const optional& omac, + const string& br, + uint16_t port) + : machine (mm.mac ? *mm.mac : // Fixed mac from machine manifest. + omac ? *omac : // Generated mac from previous bootstrap. + generate_mac ()), + kvm ("kvm"), + net (br, port), + vnc ("127.0.0.1:" + to_string (5900 + tc_num)), + monitor ("/tmp/" + tc_name + "-monitor") + { + tracer trace ("kvm_machine", md.string ().c_str ()); + + if (sizeof (sockaddr_un::sun_path) <= monitor.size ()) + throw invalid_argument ("monitor unix socket path too long"); + + // Map logical CPUs to sockets/cores/threads. Failed that, QEMU just makes + // it a machine with that number of sockets and some operating systems + // (like Windows) only can do two. + // + size_t cpu (ops.cpu ()); + + size_t sockets (cpu <= 8 ? 1 : cpu <= 64 ? 2 : 4); + size_t cores (cpu / sockets); + size_t threads (cores <= 4 ? 1 : 2); + cores /= threads; + + + // We probably don't want to commit all the available RAM to the VM since + // some of it could be used on the host side for caching, etc. So the + // heuristics that we will use is 4G or 1G per CPU, whichever is greater + // and the rest divide equally between the host and the VM. + // + size_t ram ((cpu < 4 ? 4 : cpu) * 1024 * 1024); // Kb. + + if (ram > ops.ram ()) + ram = ops.ram (); + else + ram += (ops.ram () - ram) / 2; + + // If we have options, use that instead of the default network and + // disk configuration. + // + strings os; + + if (mm.options) + { + os = mm.unquoted_options (); + + // Pre-process ifname=? and mac=?. + // + auto sub = [] (string& o, const char* s, const string& r) + { + size_t p (o.find (s)); + + if (p != string::npos) + { + p = o.find ('?', p + 1); + assert (p != string::npos); + o.replace (p, 1, r); + } + }; + + for (string& o: os) + { + sub (o, "ifname=?", net.iface); + sub (o, "mac=?", mac); + } + } + else + { + auto add = [&os] (string o, string v) + { + os.push_back (move (o)); + os.push_back (move (v)); + }; + + // Network. + // + add ("-netdev", "tap,id=net0,script=no,ifname=" + net.iface); + add ("-device", "virtio-net-pci,netdev=net0,mac=" + mac); + + // Disk. + // + add ("-drive", "if=none,id=disk0,file=disk.img,format=raw"); + add ("-device", "virtio-blk-pci,scsi=off,drive=disk0"); + + //"-drive", "if=none,id=disk0,format=raw,file=disk.img" + //"-device", "virtio-scsi-pci,id=scsi" + //"-device", "scsi-hd,drive=disk0" + } + + // Start the VM. + // + // Notes: + // + // 1. echo system_powerdown | socat - UNIX-CONNECT:.../monitor + // + proc = run_io_start ( + trace, + fdnull (), + 2, + 2, + md, // Run from the machine's directory. + kvm, + "-boot", "c", // Boot from disk. + "-no-reboot", // Exit on VM reboot. + "-m", to_string (ram / 1024) + "M", + "-cpu", "host", + "-smp", (to_string (cpu) + + ",sockets=" + to_string (sockets) + + ",cores=" + to_string (cores) + + ",threads=" + to_string (threads)), + os, + "-vnc", "127.0.0.1:" + to_string (tc_num), // 5900 + tc_num + "-monitor", "unix:" + monitor.string () + ",server,nowait"); + } + + // Connect to the QEMU monitor via the UNIX socket and send system_reset. + // You may be wondering why not system_powerdown? The reason is that while + // not all OS know how to power-down the machine, pretty much all of them + // can reboot. So combined with the -no-reboot option above, we get the + // same result in a more robust way. + // + // Note that this setup has one side effect: if the VM decided to reboot, + // say, during bootstrap, then we will interpret it as a shutdown. Current + // thinking saying this is good since we don't want our VMs to reboot + // uncontrollably for security and predictability reasons (e.g., we don't + // want Windows to decide to install updates -- this stuff should all be + // disabled during the VM preparation). + // + // Actually, this turned out not to be entirely accurate: reset appears to + // be a "hard reset" while powerdown causes a clean shutdown. So we use + // powerdown to implement shutdown() and reset/-no-reboot for implement + // forcedown(). + // + bool kvm_machine:: + shutdown (size_t& seconds) + { + monitor_command ("system_powerdown"); + + // Wait for up to the specified number if seconds for the machine to + // shutdown. + // + return wait (seconds); + } + + void kvm_machine:: + forcedown (bool fh) + { + monitor_command ("system_reset", fh); + wait (fh); + } + + void kvm_machine:: + suspend () + { + monitor_command ("stop"); + } + + void kvm_machine:: + print_info (diag_record& dr) + { + dr << info << "qemu pid: " << proc.id () + << info << "qemu vnc: " << vnc + << info << "qemu monitor: unix:" << monitor; + } + + bool kvm_machine:: + wait (size_t& sec, bool fh) + { + try + { + tracer trace ("kvm_machine::wait"); + + bool t; + for (; !(t = proc.try_wait ()) && sec != 0; --sec) + sleep (1); + + if (t) + { + run_io_finish (trace, proc, kvm, fh); + net.destroy (); //@@ Always fails hard. + try_rmfile (monitor, true); // QEMU doesn't seem to remove it. + } + + return t; + } + catch (const process_error& e) + { + fail (fh) << "unable to execute " << kvm << ": " << e << endf; + } + } + + void kvm_machine:: + monitor_command (const string& c, bool fh) + { + try + { + sockaddr_un addr; + addr.sun_family = AF_LOCAL; + strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor + + auto_fd sock (socket (AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0)); + + if (sock.get () == -1) + throw_system_error (errno); + + if (connect (sock.get (), + reinterpret_cast (&addr), + sizeof (addr)) == -1) + throw_system_error (errno); + + // Read until we get something. + // + auto readsome = [&sock] () + { + ifdstream ifs (move (sock), + fdstream_mode::non_blocking, + ostream::badbit); + + char buf[256]; + for (streamsize n (0), m (0); + n == 0 || m != 0; + m = ifs.readsome (buf, sizeof (buf) - 1)) + { + if (m != 0) + { + n += m; + + //buf[m] = '\0'; + //text << buf; + } + } + + sock = ifs.release (); + }; + + // Read QEMU welcome. + // + readsome (); + + // Write our command. + // + { + ofdstream ofs (move (sock), fdstream_mode::blocking); + ofs << c << endl; + sock = ofs.release (); + } + + // Read QEMU reply (may hit eof). + // + readsome (); + return; + } + catch (const system_error& e) + { + fail (fh) << "unable to communicate with qemu monitor: " << e; + } + } + + unique_ptr + start_machine (const dir_path& md, + const machine_manifest& mm, + const optional& mac, + const string& br_iface, + uint16_t tftp_port) + { + switch (mm.type) + { + case machine_type::kvm: + return make_unique (md, mm, mac, br_iface, tftp_port); + case machine_type::nspawn: + assert (false); //@@ TODO + } + + return nullptr; + } +} diff --git a/bbot/agent/machine.hxx b/bbot/agent/machine.hxx new file mode 100644 index 0000000..e352e42 --- /dev/null +++ b/bbot/agent/machine.hxx @@ -0,0 +1,84 @@ +// file : bbot/agent/machine.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#ifndef BBOT_AGENT_MACHINE_HXX +#define BBOT_AGENT_MACHINE_HXX + +#include +#include + +namespace bbot +{ + // A running build machine (container, vm, etc). + // + // Note that if the machine is destroyed while it is still running, the + // destructor will block until the machine process terminates. + // + // Some functions can fail softly if the fail_hard argument is false. + // + class machine + { + public: + // Shut the machine down cleanly waiting up to the specified number of + // seconds for completion. Update the timeout and return false if the + // machine is still running, true if the machine exited successfully, and + // throw failed otherwise. + // + virtual bool + shutdown (size_t& seconds) = 0; + + // Force the machine down. + // + virtual void + forcedown (bool fail_hard = true) = 0; + + // Suspend the machine. + // + virtual void + suspend () = 0; + + // Wait for the machine to terminate up to the specified number of + // seconds. Update the timeout and return false if the machine is still + // running, true if the machine exited successfully, and throw failed + // otherwise. + // + virtual bool + wait (size_t& seconds, bool fail_hard = true) = 0; + + bool + wait (bool fail_hard = true) + { + size_t sec (~0); // Wait indefinitely. + return wait (sec, fail_hard); + } + + // Print information about the machine (as info diagnostics) that can be + // useful for debugging (e.g., how to connect/login, etc). + // + virtual void + print_info (diag_record&) = 0; + + public: + const string mac; // MAC address (inside the machine). + + public: + virtual + ~machine () = default; + + protected: + machine (string m) + : mac (move (m)) {} + }; + + class machine_manifest; + + unique_ptr + start_machine (const dir_path&, + const machine_manifest&, + const optional& mac, + const string& br_iface, + uint16_t tftp_port); +} + +#endif // BBOT_AGENT_MACHINE_HXX diff --git a/bbot/agent/tftp.cxx b/bbot/agent/tftp.cxx new file mode 100644 index 0000000..27c1577 --- /dev/null +++ b/bbot/agent/tftp.cxx @@ -0,0 +1,137 @@ +// file : bbot/agent/tftp.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#include + +#include // htonl() +#include // sockaddr_in +#include +#include + +#include // memset() + +#include + +using namespace std; +using namespace butl; + +namespace bbot +{ + tftp_server:: + tftp_server (const string& map, uint16_t port) + { + int fd (socket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0)); + + if (fd == -1) + throw_system_error (errno); + + fd_.reset (fd); + + // Bind to ephemeral port. + // + sockaddr_in addr; + memset (&addr, 0, sizeof (addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl (INADDR_ANY); + addr.sin_port = htons (port); + + // Not to confuse with std::bind(). + // + if (::bind (fd, + reinterpret_cast (&addr), + sizeof (sockaddr_in)) == -1) + throw_system_error (errno); + + // Create the map file. + // + map_ = auto_rmfile (path::temp_path ("bbot-agent-tftp-map")); + ofdstream ofs (map_.path ()); + ofs << map << endl; + ofs.close (); + } + + uint16_t tftp_server:: + port () const + { + sockaddr_in addr; + socklen_t size (sizeof (addr)); + + if (getsockname (fd_.get (), + reinterpret_cast (&addr), + &size) == -1) + throw_system_error (errno); + + assert (size == sizeof (addr)); + return ntohs (addr.sin_port); + } + + bool tftp_server:: + serve (size_t& sec, size_t inc) + { + tracer trace ("tftp_server::serve"); + + if (inc == 0 || inc > sec) + inc = sec; + + int fd (fd_.get ()); + + // Note: Linux updates the timeout value which we rely upon. + // + timeval timeout {static_cast (inc), 0}; + + fd_set rd; + FD_ZERO (&rd); + + for (;;) + { + FD_SET (fd, &rd); + + int r (select (fd + 1, &rd, nullptr, nullptr, &timeout)); + + if (r == -1) + { + if (errno == EINTR) + continue; + + throw_system_error (errno); + } + else if (r == 0) // Timeout. + { + sec -= inc; + return false; + } + + if (FD_ISSET (fd, &rd)) + { + // The inetd "protocol" is to pass the socket as stdin/stdout file + // descriptors. + // + // Notes/issues: + // + // 1. Writes diagnostics to syslog. + // + run_io (trace, + fddup (fd), + fddup (fd), + 2, + "sudo", // Required for --secure (chroot). + "/usr/sbin/in.tftpd", // Standard installation location. + "--timeout", 1, // Wait for more requests. + "--permissive", // Use inherited umask. + "--create", // Allow creating new files (PUT). + "--map-file", map_.path (), // Path remapping rules. + "--user", uname, // Run as our effective user. + "--secure", // Chroot to data directory. + ops.tftp ()); + + // This is not really accurate since tftpd will, for example, serve + // an upload request until it is complete. But it's close anough for + // our needs. + // + sec -= (inc - static_cast (timeout.tv_sec)); + return true; + } + } + } +} diff --git a/bbot/agent/tftp.hxx b/bbot/agent/tftp.hxx new file mode 100644 index 0000000..76b4d1c --- /dev/null +++ b/bbot/agent/tftp.hxx @@ -0,0 +1,47 @@ +// file : bbot/agent/tftp.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#ifndef BBOT_AGENT_TFTP_HXX +#define BBOT_AGENT_TFTP_HXX + +#include +#include + +namespace bbot +{ + // A TFTP server "wrapper" over tftpd-hpa. + // + // In a nutshell, we are pretending to be inetd and when a request arrives, + // spawn tftpd-hpa to handle it. + // + class tftp_server + { + public: + // The map argument specifies the path mapping rules, one per line (see + // the tftpd-hpa --map-file|-m option for details). If port is 0, then + // it is automatically assigned. + // + tftp_server (const string& map, uint16_t port); + + // Return the assigned port. + // + uint16_t + port () const; + + // Wait for a TFTP request for up to the specified number of seconds. If + // increment is not 0, then wait in the specified incremenets (i.e., wait + // for up to that number of seconds; useful when one needs to also + // periodically check for something else). Update the timeout value as + // well as return true if a request was served and false otherwise. + // + bool + serve (size_t& seconds, size_t increment = 0); + + private: + auto_fd fd_; + auto_rmfile map_; + }; +} + +#endif // BBOT_AGENT_TFTP_HXX diff --git a/bbot/buildfile b/bbot/buildfile index 15ab966..a576cce 100644 --- a/bbot/buildfile +++ b/bbot/buildfile @@ -13,7 +13,7 @@ service{*}: extension = service service{*}: install = lib/systemd/system/ service{*}: install.mode = 644 -import libs = libbutl%lib{butl} +import libs = libbutl%lib{butl} import libs += libbbot%lib{bbot} # Agent is a systemd service. @@ -24,33 +24,18 @@ if ($cxx.target.class == "linux") { ./: exe{bbot-agent} service{'bbot-agent@'} - exe{bbot-agent}: \ - {hxx cxx}{ agent } {hxx ixx cxx}{ agent-options } \ - {hxx ixx cxx}{ common-options } \ - {hxx cxx}{ bootstrap-manifest } \ - {hxx cxx}{ diagnostics } \ - {hxx cxx}{ machine-manifest } \ - {hxx cxx}{ machine } \ - {hxx cxx}{ tftp } \ - {hxx }{ types } \ - {hxx cxx}{ types-parsers } \ - {hxx txx }{ utility } \ - {hxx }{ version } \ - $libs + exe{bbot-agent}: {hxx ixx txx cxx}{* +agent/* -**-options -version} \ + {hxx ixx cxx}{common-options agent/agent-options} \ + {hxx}{version} \ + $libs } ./: exe{bbot-worker} -exe{bbot-worker}: \ -{ cxx}{ worker } {hxx ixx cxx}{ worker-options } \ - {hxx ixx cxx}{ common-options } \ -{hxx cxx}{ bootstrap-manifest } \ -{hxx cxx}{ diagnostics } \ -{hxx }{ types } \ -{hxx cxx}{ types-parsers } \ -{hxx txx }{ utility } \ -{hxx }{ version } \ - $libs +exe{bbot-worker}: {hxx ixx txx cxx}{* +worker/* -**-options -version} \ + {hxx ixx cxx}{common-options worker/worker-options} \ + {hxx}{version} \ + $libs hxx{version}: in{version} $src_root/file{manifest} hxx{version}: dist = true @@ -60,20 +45,27 @@ hxx{version}: dist = true if $cli.configured { cli.cxx{common-options}: cli{common} - cli.cxx{agent-options}: cli{agent} - cli.cxx{worker-options}: cli{worker} + cli.cxx{agent/agent-options}: cli{agent/agent} + cli.cxx{worker/worker-options}: cli{worker/worker} - cli.options += -I $src_root --include-with-brackets --include-prefix bbot \ ---guard-prefix BBOT --cxx-prologue "#include " \ + cli.options += -I $src_root --include-with-brackets \ +--cxx-prologue "#include " \ --cli-namespace bbot::cli --generate-specifier --generate-parse - cli.cxx{common-options}: cli.options = $cli.options # No usage. + cli.cxx{common-options}: \ + cli.options += --include-prefix bbot --guard-prefix BBOT # No usage. # Usage options. # cli.options += --suppress-undocumented --long-usage --ansi-color \ --page-usage 'bbot::print_$name$_' --option-length 23 + cli.cxx{agent/agent-options}: \ + cli.options += --include-prefix bbot/agent --guard-prefix BBOT_AGENT + + cli.cxx{worker/worker-options}: \ + cli.options += --include-prefix bbot/worker --guard-prefix BBOT_WORKER + # Include generated cli files into the distribution. # cli.cxx{*}: dist = true diff --git a/bbot/machine-manifest.cxx b/bbot/machine-manifest.cxx deleted file mode 100644 index b7baf7e..0000000 --- a/bbot/machine-manifest.cxx +++ /dev/null @@ -1,355 +0,0 @@ -// file : bbot/machine-manifest.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#include - -#include - -#include -#include -#include -#include - -using namespace std; -using namespace butl; - -namespace bbot -{ - using parser = manifest_parser; - using parsing = manifest_parsing; - using serializer = manifest_serializer; - using serialization = manifest_serialization; - using name_value = manifest_name_value; - - // machine_type - // - string - to_string (machine_type t) - { - switch (t) - { - case machine_type::kvm: return "kvm"; - case machine_type::nspawn: return "nspawn"; - } - - assert (false); - return string (); - } - - machine_type - to_machine_type (const string& t) - { - if (t == "kvm") return machine_type::kvm; - else if (t == "nspawn") return machine_type::nspawn; - else throw invalid_argument ("invalid machine type '" + t + "'"); - } - - // machine_manifest - // - machine_manifest:: - machine_manifest (parser& p, bool iu) - : machine_manifest (p, p.next (), iu) - { - // Make sure this is the end. - // - name_value nv (p.next ()); - if (!nv.empty ()) - throw parsing (p.name (), nv.name_line, nv.name_column, - "single machine manifest expected"); - } - - machine_manifest:: - machine_manifest (parser& p, name_value nv, bool iu) - : machine_header_manifest (p, move (nv), unknown_name_mode::stop, &nv) - { - auto bad_name = [&p, &nv] (const string& d) - { - throw parsing (p.name (), nv.name_line, nv.name_column, d); - }; - - // Offsets are used to tie an error to the specific position inside a - // manifest value (possibly a multiline one). - // - auto bad_value = [&p, &nv] ( - const string& d, uint64_t column_offset = 0, uint64_t line_offset = 0) - { - throw parsing (p.name (), - nv.value_line + line_offset, - (line_offset == 0 ? nv.value_column : 1) + column_offset, - d); - }; - - optional type; - - for (; !nv.empty (); nv = p.next ()) - { - string& n (nv.name); - string& v (nv.value); - - if (n == "type") - { - if (type) - bad_name ("machine type redefinition"); - - try - { - type = to_machine_type (v); - } - catch (const invalid_argument&) - { - bad_value ("invalid machine type"); - } - } - else if (n == "mac") - { - if (mac) - bad_name ("machine mac redefinition"); - - // @@ Should we check that the value is a valid mac? - // - mac = move (v); - } - else if (n == "options") - { - if (options) - bad_name ("machine options redefinition"); - - strings op; - - // Note that when reporting errors we combine the manifest value - // position with the respective error position. - // - try - { - istringstream is (v); - tab_parser parser (is, ""); - - tab_fields tl; - while (!(tl = parser.next ()).empty ()) - { - for (auto& tf: tl) - op.emplace_back (move (tf.value)); - } - } - catch (const tab_parsing& e) - { - bad_value ("invalid machine options: " + e.description, - e.column - 1, - e.line - 1); - } - - if (op.empty ()) - bad_value ("empty machine options"); - - options = move (op); - } - else if (!iu) - bad_name ("unknown name '" + n + "' in machine manifest"); - } - - // Verify all non-optional values were specified. - // - if (!type) - bad_value ("no machine type specified"); - - this->type = *type; - } - - void machine_manifest:: - serialize (serializer& s) const - { - // @@ Should we check that all non-optional values are specified and all - // values are valid? - // - - machine_header_manifest::serialize (s, false); - - s.next ("type", to_string (type)); - - if (mac) - s.next ("mac", *mac); - - // Recompose options string as a space-separated option list, - // - if (options) - { - string v; - for (auto b (options->cbegin ()), i (b), e (options->cend ()); i != e; - ++i) - { - if (i != b) - v += ' '; - - v += *i; - } - - s.next ("options", v); - } - - s.next ("", ""); // End of manifest. - } - - strings machine_manifest:: - unquoted_options () const - { - return options - ? string_parser::unquote (*options) - : strings (); - } - - // toolchain_manifest - // - toolchain_manifest:: - toolchain_manifest (parser& p, bool iu) - : toolchain_manifest (p, p.next (), iu) - { - // Make sure this is the end. - // - name_value nv (p.next ()); - if (!nv.empty ()) - throw parsing (p.name (), nv.name_line, nv.name_column, - "single toolchain manifest expected"); - } - - toolchain_manifest:: - toolchain_manifest (parser& p, name_value nv, bool iu) - { - auto bad_name = [&p, &nv] (const string& d) - { - throw parsing (p.name (), nv.name_line, nv.name_column, d); - }; - - auto bad_value = [&p, &nv] (const string& d) - { - throw parsing (p.name (), nv.value_line, nv.value_column, d); - }; - - // Make sure this is the start and we support the version. - // - if (!nv.name.empty ()) - bad_name ("start of toolchain manifest expected"); - - if (nv.value != "1") - bad_value ("unsupported format version"); - - // Parse the toolchain manifest. - // - for (nv = p.next (); !nv.empty (); nv = p.next ()) - { - string& n (nv.name); - string& v (nv.value); - - if (n == "id") - { - if (!id.empty ()) - bad_name ("toolchain id redefinition"); - - if (v.empty ()) - bad_value ("empty toolchain id"); - - id = move (v); - } - else if (!iu) - bad_name ("unknown name '" + n + "' in toolchain manifest"); - } - - // Verify all non-optional values were specified. - // - if (id.empty ()) - bad_value ("no toolchain id specified"); - } - - void toolchain_manifest:: - serialize (serializer& s) const - { - // @@ Should we check that all non-optional values are specified? - // - s.next ("", "1"); // Start of manifest. - s.next ("id", id); - s.next ("", ""); // End of manifest. - } - - // bootstrapped_machine_manifest - // - bootstrapped_machine_manifest:: - bootstrapped_machine_manifest (parser& p, bool iu) - { - name_value nv (p.next ()); - - auto bad_name = [&p, &nv] (const string& d) - { - throw parsing (p.name (), nv.name_line, nv.name_column, d); - }; - - auto bad_value = [&p, &nv] (const string& d) - { - throw parsing (p.name (), nv.value_line, nv.value_column, d); - }; - - // Make sure this is the start and we support the version. - // - if (!nv.name.empty ()) - bad_name ("start of bootstrapped machine manifest expected"); - - if (nv.value != "1") - bad_value ("unsupported format version"); - - // Parse the bootstrapped machine manifest. Currently there is no values - // expected. - // - for (nv = p.next (); !nv.empty (); nv = p.next ()) - { - if (!iu) - bad_name ("unknown name '" + nv.name + - "' in bootstrapped machine manifest"); - } - - nv = p.next (); - if (nv.empty ()) - bad_value ("machine manifest expected"); - - machine = machine_manifest (p, nv, iu); - - if (!machine.mac) - bad_name ("mac address must be present in machine manifest"); - - nv = p.next (); - if (nv.empty ()) - bad_value ("toolchain manifest expected"); - - toolchain = toolchain_manifest (p, nv, iu); - - nv = p.next (); - if (nv.empty ()) - bad_value ("bootstrap manifest expected"); - - bootstrap = bootstrap_manifest (p, nv, iu); - - // Make sure this is the end. - // - nv = p.next (); - if (!nv.empty ()) - throw parsing (p.name (), nv.name_line, nv.name_column, - "single bootstrapped machine manifest expected"); - } - - void bootstrapped_machine_manifest:: - serialize (serializer& s) const - { - // @@ Should we check that all non-optional values are specified? - // - s.next ("", "1"); // Start of manifest. - s.next ("", ""); // End of manifest. - - if (!machine.mac) - throw serialization (s.name (), - "mac address must be present in machine manifest"); - - machine.serialize (s); - toolchain.serialize (s); - bootstrap.serialize (s); - - s.next ("", ""); // End of stream. - } -} diff --git a/bbot/machine-manifest.hxx b/bbot/machine-manifest.hxx deleted file mode 100644 index efcdda4..0000000 --- a/bbot/machine-manifest.hxx +++ /dev/null @@ -1,118 +0,0 @@ -// file : bbot/machine-manifest.hxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#ifndef BBOT_MACHINE_MANIFEST_HXX -#define BBOT_MACHINE_MANIFEST_HXX - -#include - -#include - -#include // machine_header - -#include -#include - -#include - -namespace bbot -{ - // Machine type. - // - enum class machine_type {kvm, nspawn}; - - string - to_string (machine_type); - - machine_type - to_machine_type (const string&); // Throws invalid_argument. - - // Machine. - // - class machine_manifest: public machine_header_manifest - { - public: - machine_type type; - optional mac; // Required in bootstrapped machine manifest. - optional options; // Note: could be quoted. - - strings - unquoted_options () const; // Return empty if absent. - - machine_manifest (std::string i, - std::string n, - std::string s, - machine_type t, - optional m, - optional o) - : machine_header_manifest (std::move (i), - std::move (n), - std::move (s)), - type (t), - mac (std::move (m)), - options (std::move (o)) {} - - public: - machine_manifest () = default; // VC export. - machine_manifest (butl::manifest_parser&, bool ignore_unknown = false); - machine_manifest (butl::manifest_parser&, - butl::manifest_name_value start, - bool ignore_unknown = false); - - void - serialize (butl::manifest_serializer&) const; - }; - - // Toolchain. - // - class toolchain_manifest - { - public: - - // Toolchain id (SHAXXX). - // - string id; - - explicit - toolchain_manifest (string i): id (i) {} - - public: - toolchain_manifest () = default; // VC export. - toolchain_manifest (butl::manifest_parser&, bool ignore_unknown = false); - toolchain_manifest (butl::manifest_parser&, - butl::manifest_name_value start, - bool ignore_unknown = false); - - void - serialize (butl::manifest_serializer&) const; - }; - - // The manifest stored in -/ consists of the machine - // manifest (original), toolchain manifest, and bootstrap manifest. - // - class bootstrapped_machine_manifest - { - public: - machine_manifest machine; - toolchain_manifest toolchain; - bootstrap_manifest bootstrap; - - bootstrapped_machine_manifest (machine_manifest m, - toolchain_manifest t, - bootstrap_manifest b) - : machine (move (m)), toolchain (move (t)), bootstrap (move (b)) {} - - public: - bootstrapped_machine_manifest () = default; // VC export. - bootstrapped_machine_manifest (butl::manifest_parser&, - bool ignore_unknown = false); - - void - serialize (butl::manifest_serializer&) const; - }; - - using bootstrapped_machine_manifests = vector; -} - -#endif // BBOT_MACHINE_MANIFEST_HXX diff --git a/bbot/machine.cxx b/bbot/machine.cxx deleted file mode 100644 index 0c8a0e5..0000000 --- a/bbot/machine.cxx +++ /dev/null @@ -1,474 +0,0 @@ -// file : bbot/machine.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#include - -#include // sleep() - -#include // sockaddr_un -#include - -#include // snprintf() -#include // strcpy() - -#include -#include - -using namespace std; -using namespace butl; - -namespace bbot -{ - // Forward TFTP requests (UDP/69) coming from the machine to the specified - // port. - // - // This allows the machine to connect to any "unknown" IP (e.g., link-local - // 196.254.111.222) port 69 and end up being redirected to out TFTP server. - // - static void - iptables (tracer& t, - const char* a, - const string& tap, - const string& br, - uint16_t port, - bool ignore_errors = false) - { - string addr (iface_addr (br)); - - auto_fd fdn (ignore_errors ? fdnull () : nullfd); - int ofd (ignore_errors ? fdn.get () : 2); - - process_exit::code_type e; - - e = run_io_exit (t, 0, ofd, ofd, - "sudo", "iptables", - "-t", "nat", - a, "PREROUTING", - "-m", "udp", - "-p", "udp", - "-m", "physdev", - "-i", br, - "--physdev-in", tap, - "--dport", 69, - "-j", "DNAT", - "--to-destination", addr + ':' + to_string (port)); - - if (e != 0 && !ignore_errors) - fail << "process iptables terminated with non-zero exit code"; - - // Nobody really knows whether this is really needed (really)... - // - e = run_io_exit (t, 0, ofd, ofd, - "sudo", "iptables", - a, "FORWARD", - "-m", "udp", - "-p", "udp", - "-m", "physdev", - "-o", br, - "--physdev-out", tap, - "-d", addr, - "--dport", port, - "-m", "state", - "--state", "NEW,ESTABLISHED,RELATED", - "-j", "ACCEPT"); - - if (e != 0 && !ignore_errors) - fail << "process iptables terminated with non-zero exit code"; - } - - static string - create_tap (const string& br, uint16_t port) - { - string t ("tap" + to_string (tc_num)); - - tracer trace ("create_tap", t.c_str ()); - - // First try to delete it in case there is one from a previous run. - // - iptables (trace, "-D", t, br, port, true); // Ignore errors. - run_exit (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); - - run (trace, "sudo", "ip", "tuntap", "add", t, "mode", "tap", "user", uid); - run (trace, "sudo", "ip", "link", "set", t, "up"); - run (trace, "sudo", "ip", "link", "set", t, "master", br); - - iptables (trace, "-A", t, br, port); // Add. - - return t; - } - - static void - destroy_tap (const string& t, const string& br, uint16_t port) - { - tracer trace ("destroy_tap", t.c_str ()); - iptables (trace, "-D", t, br, port); // Delete. - run (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); - } - - class tap - { - public: - string iface; - - string bridge; // Bridge interface to which this tap belongs - uint16_t port; // UDP port to forward TFTP traffic to. - - tap (string b, uint16_t p) - : iface (create_tap (b, p)), bridge (move (b)), port (p) {} - - ~tap () - { - if (!iface.empty ()) - { - try {destroy ();} catch (...) {} - } - } - - void - destroy () - { - destroy_tap (iface, bridge, port); - iface.clear (); - } - }; - - static string - generate_mac () - { - // The last two bits of the first byte are special: bit 1 indicates a - // multicast address (which we don't want) while bit 1 -- local assignment - // (which we do want). - // - char r[6 * 2 + 5 + 1]; - snprintf (r, sizeof (r), - "%02x:%02x:%02x:%02x:%02x:%02x", - (genrand () & 0xFE) | 0x02, - genrand (), - genrand (), - genrand (), - genrand (), - genrand ()); - return r; - } - - class kvm_machine: public machine - { - public: - kvm_machine (const dir_path&, - const machine_manifest&, - const optional& mac, - const string& br_iface, - uint16_t tftp_port); - - virtual bool - shutdown (size_t& seconds) override; - - virtual void - forcedown (bool fail_hard) override; - - virtual void - suspend () override; - - bool - wait (size_t& seconds, bool fail_hard) override; - - using machine::wait; - - virtual void - print_info (diag_record&) override; - - private: - void - monitor_command (const string&, bool fail_hard = true); - - private: - path kvm; // Hypervisor binary. - tap net; // Tap network interface. - string vnc; // QEMU VNC TCP addr:port. - path monitor; // QEMU monitor UNIX socket. - process proc; - }; - - kvm_machine:: - kvm_machine (const dir_path& md, - const machine_manifest& mm, - const optional& omac, - const string& br, - uint16_t port) - : machine (mm.mac ? *mm.mac : // Fixed mac from machine manifest. - omac ? *omac : // Generated mac from previous bootstrap. - generate_mac ()), - kvm ("kvm"), - net (br, port), - vnc ("127.0.0.1:" + to_string (5900 + tc_num)), - monitor ("/tmp/" + tc_name + "-monitor") - { - tracer trace ("kvm_machine", md.string ().c_str ()); - - if (sizeof (sockaddr_un::sun_path) <= monitor.size ()) - throw invalid_argument ("monitor unix socket path too long"); - - // Map logical CPUs to sockets/cores/threads. Failed that, QEMU just makes - // it a machine with that number of sockets and some operating systems - // (like Windows) only can do two. - // - size_t cpu (ops.cpu ()); - - size_t sockets (cpu <= 8 ? 1 : cpu <= 64 ? 2 : 4); - size_t cores (cpu / sockets); - size_t threads (cores <= 4 ? 1 : 2); - cores /= threads; - - - // We probably don't want to commit all the available RAM to the VM since - // some of it could be used on the host side for caching, etc. So the - // heuristics that we will use is 4G or 1G per CPU, whichever is greater - // and the rest divide equally between the host and the VM. - // - size_t ram ((cpu < 4 ? 4 : cpu) * 1024 * 1024); // Kb. - - if (ram > ops.ram ()) - ram = ops.ram (); - else - ram += (ops.ram () - ram) / 2; - - // If we have options, use that instead of the default network and - // disk configuration. - // - strings os; - - if (mm.options) - { - os = mm.unquoted_options (); - - // Pre-process ifname=? and mac=?. - // - auto sub = [] (string& o, const char* s, const string& r) - { - size_t p (o.find (s)); - - if (p != string::npos) - { - p = o.find ('?', p + 1); - assert (p != string::npos); - o.replace (p, 1, r); - } - }; - - for (string& o: os) - { - sub (o, "ifname=?", net.iface); - sub (o, "mac=?", mac); - } - } - else - { - auto add = [&os] (string o, string v) - { - os.push_back (move (o)); - os.push_back (move (v)); - }; - - // Network. - // - add ("-netdev", "tap,id=net0,script=no,ifname=" + net.iface); - add ("-device", "virtio-net-pci,netdev=net0,mac=" + mac); - - // Disk. - // - add ("-drive", "if=none,id=disk0,file=disk.img,format=raw"); - add ("-device", "virtio-blk-pci,scsi=off,drive=disk0"); - - //"-drive", "if=none,id=disk0,format=raw,file=disk.img" - //"-device", "virtio-scsi-pci,id=scsi" - //"-device", "scsi-hd,drive=disk0" - } - - // Start the VM. - // - // Notes: - // - // 1. echo system_powerdown | socat - UNIX-CONNECT:.../monitor - // - proc = run_io_start ( - trace, - fdnull (), - 2, - 2, - md, // Run from the machine's directory. - kvm, - "-boot", "c", // Boot from disk. - "-no-reboot", // Exit on VM reboot. - "-m", to_string (ram / 1024) + "M", - "-cpu", "host", - "-smp", (to_string (cpu) + - ",sockets=" + to_string (sockets) + - ",cores=" + to_string (cores) + - ",threads=" + to_string (threads)), - os, - "-vnc", "127.0.0.1:" + to_string (tc_num), // 5900 + tc_num - "-monitor", "unix:" + monitor.string () + ",server,nowait"); - } - - // Connect to the QEMU monitor via the UNIX socket and send system_reset. - // You may be wondering why not system_powerdown? The reason is that while - // not all OS know how to power-down the machine, pretty much all of them - // can reboot. So combined with the -no-reboot option above, we get the - // same result in a more robust way. - // - // Note that this setup has one side effect: if the VM decided to reboot, - // say, during bootstrap, then we will interpret it as a shutdown. Current - // thinking saying this is good since we don't want our VMs to reboot - // uncontrollably for security and predictability reasons (e.g., we don't - // want Windows to decide to install updates -- this stuff should all be - // disabled during the VM preparation). - // - // Actually, this turned out not to be entirely accurate: reset appears to - // be a "hard reset" while powerdown causes a clean shutdown. So we use - // powerdown to implement shutdown() and reset/-no-reboot for implement - // forcedown(). - // - bool kvm_machine:: - shutdown (size_t& seconds) - { - monitor_command ("system_powerdown"); - - // Wait for up to the specified number if seconds for the machine to - // shutdown. - // - return wait (seconds); - } - - void kvm_machine:: - forcedown (bool fh) - { - monitor_command ("system_reset", fh); - wait (fh); - } - - void kvm_machine:: - suspend () - { - monitor_command ("stop"); - } - - void kvm_machine:: - print_info (diag_record& dr) - { - dr << info << "qemu pid: " << proc.id () - << info << "qemu vnc: " << vnc - << info << "qemu monitor: unix:" << monitor; - } - - bool kvm_machine:: - wait (size_t& sec, bool fh) - { - try - { - tracer trace ("kvm_machine::wait"); - - bool t; - for (; !(t = proc.try_wait ()) && sec != 0; --sec) - sleep (1); - - if (t) - { - run_io_finish (trace, proc, kvm, fh); - net.destroy (); //@@ Always fails hard. - try_rmfile (monitor, true); // QEMU doesn't seem to remove it. - } - - return t; - } - catch (const process_error& e) - { - fail (fh) << "unable to execute " << kvm << ": " << e << endf; - } - } - - void kvm_machine:: - monitor_command (const string& c, bool fh) - { - try - { - sockaddr_un addr; - addr.sun_family = AF_LOCAL; - strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor - - auto_fd sock (socket (AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0)); - - if (sock.get () == -1) - throw_system_error (errno); - - if (connect (sock.get (), - reinterpret_cast (&addr), - sizeof (addr)) == -1) - throw_system_error (errno); - - // Read until we get something. - // - auto readsome = [&sock] () - { - ifdstream ifs (move (sock), - fdstream_mode::non_blocking, - ostream::badbit); - - char buf[256]; - for (streamsize n (0), m (0); - n == 0 || m != 0; - m = ifs.readsome (buf, sizeof (buf) - 1)) - { - if (m != 0) - { - n += m; - - //buf[m] = '\0'; - //text << buf; - } - } - - sock = ifs.release (); - }; - - // Read QEMU welcome. - // - readsome (); - - // Write our command. - // - { - ofdstream ofs (move (sock), fdstream_mode::blocking); - ofs << c << endl; - sock = ofs.release (); - } - - // Read QEMU reply (may hit eof). - // - readsome (); - return; - } - catch (const system_error& e) - { - fail (fh) << "unable to communicate with qemu monitor: " << e; - } - } - - unique_ptr - start_machine (const dir_path& md, - const machine_manifest& mm, - const optional& mac, - const string& br_iface, - uint16_t tftp_port) - { - switch (mm.type) - { - case machine_type::kvm: - return make_unique (md, mm, mac, br_iface, tftp_port); - case machine_type::nspawn: - assert (false); //@@ TODO - } - - return nullptr; - } -} diff --git a/bbot/machine.hxx b/bbot/machine.hxx deleted file mode 100644 index c15d618..0000000 --- a/bbot/machine.hxx +++ /dev/null @@ -1,84 +0,0 @@ -// file : bbot/machine.hxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#ifndef BBOT_MACHINE_HXX -#define BBOT_MACHINE_HXX - -#include -#include - -namespace bbot -{ - // A running build machine (container, vm, etc). - // - // Note that if the machine is destroyed while it is still running, the - // destructor will block until the machine process terminates. - // - // Some functions can fail softly if the fail_hard argument is false. - // - class machine - { - public: - // Shut the machine down cleanly waiting up to the specified number of - // seconds for completion. Update the timeout and return false if the - // machine is still running, true if the machine exited successfully, and - // throw failed otherwise. - // - virtual bool - shutdown (size_t& seconds) = 0; - - // Force the machine down. - // - virtual void - forcedown (bool fail_hard = true) = 0; - - // Suspend the machine. - // - virtual void - suspend () = 0; - - // Wait for the machine to terminate up to the specified number of - // seconds. Update the timeout and return false if the machine is still - // running, true if the machine exited successfully, and throw failed - // otherwise. - // - virtual bool - wait (size_t& seconds, bool fail_hard = true) = 0; - - bool - wait (bool fail_hard = true) - { - size_t sec (~0); // Wait indefinitely. - return wait (sec, fail_hard); - } - - // Print information about the machine (as info diagnostics) that can be - // useful for debugging (e.g., how to connect/login, etc). - // - virtual void - print_info (diag_record&) = 0; - - public: - const string mac; // MAC address (inside the machine). - - public: - virtual - ~machine () = default; - - protected: - machine (string m) - : mac (move (m)) {} - }; - - class machine_manifest; - - unique_ptr - start_machine (const dir_path&, - const machine_manifest&, - const optional& mac, - const string& br_iface, - uint16_t tftp_port); -} - -#endif // BBOT_MACHINE_HXX diff --git a/bbot/tftp.cxx b/bbot/tftp.cxx deleted file mode 100644 index 0da6c24..0000000 --- a/bbot/tftp.cxx +++ /dev/null @@ -1,137 +0,0 @@ -// file : bbot/tftp.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#include - -#include // htonl() -#include // sockaddr_in -#include -#include - -#include // memset() - -#include - -using namespace std; -using namespace butl; - -namespace bbot -{ - tftp_server:: - tftp_server (const string& map, uint16_t port) - { - int fd (socket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0)); - - if (fd == -1) - throw_system_error (errno); - - fd_.reset (fd); - - // Bind to ephemeral port. - // - sockaddr_in addr; - memset (&addr, 0, sizeof (addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl (INADDR_ANY); - addr.sin_port = htons (port); - - // Not to confuse with std::bind(). - // - if (::bind (fd, - reinterpret_cast (&addr), - sizeof (sockaddr_in)) == -1) - throw_system_error (errno); - - // Create the map file. - // - map_ = auto_rmfile (path::temp_path ("bbot-agent-tftp-map")); - ofdstream ofs (map_.path ()); - ofs << map << endl; - ofs.close (); - } - - uint16_t tftp_server:: - port () const - { - sockaddr_in addr; - socklen_t size (sizeof (addr)); - - if (getsockname (fd_.get (), - reinterpret_cast (&addr), - &size) == -1) - throw_system_error (errno); - - assert (size == sizeof (addr)); - return ntohs (addr.sin_port); - } - - bool tftp_server:: - serve (size_t& sec, size_t inc) - { - tracer trace ("tftp_server::serve"); - - if (inc == 0 || inc > sec) - inc = sec; - - int fd (fd_.get ()); - - // Note: Linux updates the timeout value which we rely upon. - // - timeval timeout {static_cast (inc), 0}; - - fd_set rd; - FD_ZERO (&rd); - - for (;;) - { - FD_SET (fd, &rd); - - int r (select (fd + 1, &rd, nullptr, nullptr, &timeout)); - - if (r == -1) - { - if (errno == EINTR) - continue; - - throw_system_error (errno); - } - else if (r == 0) // Timeout. - { - sec -= inc; - return false; - } - - if (FD_ISSET (fd, &rd)) - { - // The inetd "protocol" is to pass the socket as stdin/stdout file - // descriptors. - // - // Notes/issues: - // - // 1. Writes diagnostics to syslog. - // - run_io (trace, - fddup (fd), - fddup (fd), - 2, - "sudo", // Required for --secure (chroot). - "/usr/sbin/in.tftpd", // Standard installation location. - "--timeout", 1, // Wait for more requests. - "--permissive", // Use inherited umask. - "--create", // Allow creating new files (PUT). - "--map-file", map_.path (), // Path remapping rules. - "--user", uname, // Run as our effective user. - "--secure", // Chroot to data directory. - ops.tftp ()); - - // This is not really accurate since tftpd will, for example, serve - // an upload request until it is complete. But it's close anough for - // our needs. - // - sec -= (inc - static_cast (timeout.tv_sec)); - return true; - } - } - } -} diff --git a/bbot/tftp.hxx b/bbot/tftp.hxx deleted file mode 100644 index cdd0e3e..0000000 --- a/bbot/tftp.hxx +++ /dev/null @@ -1,47 +0,0 @@ -// file : bbot/tftp.hxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#ifndef BBOT_TFTP_HXX -#define BBOT_TFTP_HXX - -#include -#include - -namespace bbot -{ - // A TFTP server "wrapper" over tftpd-hpa. - // - // In a nutshell, we are pretending to be inetd and when a request arrives, - // spawn tftpd-hpa to handle it. - // - class tftp_server - { - public: - // The map argument specifies the path mapping rules, one per line (see - // the tftpd-hpa --map-file|-m option for details). If port is 0, then - // it is automatically assigned. - // - tftp_server (const string& map, uint16_t port); - - // Return the assigned port. - // - uint16_t - port () const; - - // Wait for a TFTP request for up to the specified number of seconds. If - // increment is not 0, then wait in the specified incremenets (i.e., wait - // for up to that number of seconds; useful when one needs to also - // periodically check for something else). Update the timeout value as - // well as return true if a request was served and false otherwise. - // - bool - serve (size_t& seconds, size_t increment = 0); - - private: - auto_fd fd_; - auto_rmfile map_; - }; -} - -#endif // BBOT_TFTP_HXX diff --git a/bbot/worker.cli b/bbot/worker.cli deleted file mode 100644 index 09e09a3..0000000 --- a/bbot/worker.cli +++ /dev/null @@ -1,102 +0,0 @@ -// file : bbot/worker.cli -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -include ; - -"\section=1" -"\name=bbot-worker" -"\summary=build bot worker" - -namespace bbot -{ - { - " ", - - " - \h|SYNOPSIS| - - \cb{bbot-worker --help}\n - \cb{bbot-worker --version}\n - \c{\b{bbot-worker --bootstrap} []}\n - \c{\b{bbot-worker --startup} []}\n - \c{\b{bbot-worker} [] ... ...} - - \h|DESCRIPTION| - - \cb{bbot-worker} @@ TODO. - - If the \cb{--bootstrap} mode option is specified, then the worker performs - the initial machine bootstrap and writes the bootstrap result manifest to - \c{stdout}. If the \cb{--startup} mode option is specified, then the - worker performs the environment setup and then re-executes in the build - mode. If neither of the mode options is specified, then the worker - proceeds to performing the build task. - " - } - - class worker_options - { - "\h|OPTIONS|" - - // - // NOTE: when adding new options, consider whether they should be - // propagated from startup to build. - // - - bool --help {"Print usage information and exit."} - bool --version {"Print version and exit."} - - bool --bootstrap - { - "Perform the inital machine bootstrap insteading of building." - } - - bool --startup - { - "Perform the environment setup and then re-execute for building." - } - - bool --systemd-daemon - { - "Run as a simple systemd daemon." - } - - dir_path --build - { - "

", - "The directory to perform the build in. If not specified, then the - current working directory is used." - } - - dir_path --environments - { - "", - "The directory containing the environment setup executables. If not - specified, then the user's home directory is used." - } - - uint16_t --verbose = 1 - { - "", - "Set the diagnostics verbosity to between 0 and 6 with level 1 - being the default." - } - - // Testing options. - // - string --tftp-host = "196.254.111.222" - { - "", - "The TFTP host address and, optionally, port to use to download the - build task and to upload the build result. By default the link-local - address 196.254.111.222 with the standard TFTP port (69) is used." - } - }; - - " - \h|EXIT STATUS| - - Non-zero exit status is returned in case of an error. - " -} diff --git a/bbot/worker.cxx b/bbot/worker.cxx deleted file mode 100644 index 81d9a50..0000000 --- a/bbot/worker.cxx +++ /dev/null @@ -1,655 +0,0 @@ -// file : bbot/worker.cxx -*- C++ -*- -// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd -// license : TBC; see accompanying LICENSE file - -#ifndef _WIN32 -# include // signal() -#else -# include // getenv(), _putenv() -#endif - -#include -#include - -#include -#include - -#include - -#include -#include - -#include -#include -#include - -using namespace std; -using namespace butl; -using namespace bbot; - -namespace bbot -{ - process_path argv0; - worker_options ops; - - dir_path env_dir; - - const size_t tftp_timeout (10); // 10 seconds. - const size_t tftp_retries (3); // Task request retries (see startup()). -} - -static dir_path -change_wd (const dir_path& d, bool create = false) -try -{ - if (create) - try_mkdir_p (d); - - dir_path r (dir_path::current_directory ()); - - dir_path::current_directory (d); - - return r; -} -catch (const system_error& e) -{ - fail << "unable to change current directory to " << d << ": " << e << endf; -} - -using regexes = vector; - -// Match lines read from the command's stderr against the regular expressions -// and return the warning result status (instead of success) in case of a -// match. -// -template -static result_status -run_bpkg (tracer& t, - string& log, const regexes& warn_detect, - const string& cmd, A&&... a) -{ - try - { - // Trace and log the command line. - // - auto cmdc = [&t, &log] (const char* c[], size_t n) - { - t (c, n); - - ostringstream os; - process::print (os, c, n); - log += os.str (); - log += '\n'; - }; - - fdpipe pipe (fdopen_pipe ()); // Text mode seems appropriate. - - process pr ( - process_start (cmdc, - fdnull (), // Never reads from stdout. - 2, // 1>&2 - pipe.out, - "bpkg", - "-v", - cmd, - forward (a)...)); - - pipe.out.close (); - - result_status r (result_status::success); - - // Log the diagnostics. - // - { - ifdstream is (move (pipe.in), fdstream_mode::skip); // Skip on exception. - - for (string l; is.peek () != ifdstream::traits_type::eof (); ) - { - getline (is, l); - log += l; - log += '\n'; - - // Match the log line with the warning-detecting regular expressions - // until the first match. - // - if (r != result_status::warning) - { - for (const auto& re: warn_detect) - { - // Only examine the first 512 bytes. Long lines (e.g., linker - // command lines) could trigger implementation-specific limitations - // (like stack overflow). Plus, it is a performance concern. - // - if (regex_search (l.begin (), - l.size () < 512 ? l.end () : l.begin () + 512, - re)) - { - r = result_status::warning; - break; - } - } - } - } - } - - if (pr.wait ()) - return r; - - log += "bpkg " + cmd; - const process_exit& e (*pr.exit); - - if (e.normal ()) - { - log += " exited with code " + to_string (e.code ()) + "\n"; - return result_status::error; - } - else - { - log += " terminated abnormally: " + e.description () + - (e.core () ? " (core dumped)" : "") + "\n"; - return result_status::abnormal; - } - } - catch (const process_error& e) - { - fail << "unable to execute bpkg " << cmd << ": " << e << endf; - } - catch (const io_error& e) - { - fail << "unable to read bpkg " << cmd << " diagnostics: " << e << endf; - } -} - -static void -build (size_t argc, const char* argv[]) -{ - tracer trace ("build"); - - // Our overall plan is as follows: - // - // 1. Parse the task manifest (it should be in CWD). - // - // 2. Run bpkg to create the configuration, add the repository, and - // configure, build, and test the package all while saving the logs in - // the result manifest. - // - // 3. Upload the result manifest. - // - // Note also that we are being "watched" by the startup version of us which - // will upload an appropriate result in case we exit with an error. So here - // for abnormal situations (like a failure to parse the manifest), we just - // fail. - // - task_manifest tm (parse_manifest (path ("manifest"), "task")); - - result_manifest rm { - tm.name, - tm.version, - result_status::success, - operation_results {} - }; - - auto add_result = [&rm] (string o) -> operation_result& - { - rm.results.push_back ( - operation_result {move (o), result_status::success, ""}); - - return rm.results.back (); - }; - - dir_path owd; - - for (;;) // The "breakout" loop. - { - // Regular expressions that detect different forms of build2 toolchain - // warnings. Accidently (or not), they also cover GCC and Clang warnings - // (for the English locale). - // - // The expressions will be matched multiple times, so let's make the - // matching faster, with the potential cost of making regular expressions - // creation slower. - // - regex::flag_type f (regex_constants::optimize); // ECMAScript is implied. - - regexes wre ({ - regex ("^warning: ", f), - regex ("^.+: warning: ", f)}); - - for (const auto& re: tm.unquoted_warning_regex ()) - wre.emplace_back (re, f); - - // Configure. - // - { - operation_result& r (add_result ("configure")); - - // bpkg create - // - const vector_view env (argv + 1, argc - 1); - - // Use target (if present) or machine as configuration directory name. - // - dir_path dir (tm.target ? tm.target->string () : tm.machine); - - r.status |= run_bpkg (trace, r.log, wre, - "create", - "-d", dir.string (), - "--wipe", - tm.unquoted_config (), - env); - - if (!r.status) - break; - - owd = change_wd (dir); - - // bpkg add - // - r.status |= run_bpkg (trace, r.log, wre, "add", tm.repository.string ()); - - if (!r.status) - break; - - // bpkg fetch - // - string t ("--trust-no"); - - cstrings ts; - for (const string& fp: tm.trust) - { - if (fp == "yes") - t = "--trust-yes"; - else - { - ts.push_back ("--trust"); - ts.push_back (fp.c_str ()); - } - } - - r.status |= run_bpkg (trace, r.log, wre, "fetch", ts, t); - - if (!r.status) - break; - - // bpkg build --configure-only / - // - r.status |= run_bpkg (trace, r.log, wre, - "build", - "--configure-only", - "--yes", - tm.name + '/' + tm.version.string ()); - - if (!r.status) - break; - - rm.status |= r.status; - } - - // Update. - // - { - operation_result& r (add_result ("update")); - - // bpkg update - // - r.status |= run_bpkg (trace, r.log, wre, "update", tm.name); - - if (!r.status) - break; - - rm.status |= r.status; - } - - // Test. - // - { - operation_result& r (add_result ("test")); - - // bpkg test - // - r.status |= run_bpkg (trace, r.log, wre, "test", tm.name); - - if (!r.status) - break; - - rm.status |= r.status; - } - - break; - } - - rm.status |= rm.results.back ().status; // Merge last in case of a break. - - if (!owd.empty ()) - change_wd (owd); - - // Upload the result. - // - const string url ("tftp://" + ops.tftp_host () + "/manifest"); - - try - { - tftp_curl c (trace, - path ("-"), - nullfd, - curl::put, - url, - "--max-time", tftp_timeout); - - serialize_manifest (rm, c.out, url, "result"); - c.out.close (); - - if (!c.wait ()) - throw_generic_error (EIO); - } - catch (const system_error& e) - { - fail << "unable to upload result manifest to " << url << ": " << e; - } -} - -static void -startup () -{ - tracer trace ("startup"); - - // Our overall plan is as follows: - // - // 1. Download the task manifest into the build directory (CWD). - // - // 2. Parse it and get the target. - // - // 3. Find the environment setup executable for this target. - // - // 4. Execute the environment setup executable. - // - // 5. If the environment setup executable fails, then upload the (failed) - // result ourselves. - // - const string url ("tftp://" + ops.tftp_host () + "/manifest"); - const path mf ("manifest"); - - // If we fail, try to upload the result manifest (abnormal termination). The - // idea is that the machine gets suspended and we can investigate what's - // going on by logging in and examining the diagnostics (e.g., via - // journalctl, etc). - // - task_manifest tm; - - try - { - // Download the task. - // - // We are downloading from our host so there shouldn't normally be any - // connectivity issues. Unless, of course, we are on Windows where all - // kinds of flakiness is business as usual. Note that having a long enough - // timeout is not enough: if we try to connect before the network is up, - // we will keep waiting forever, even after it is up. So we have to - // timeout and try again. This is also pretty bad (unlike, say during - // bootstrap which doesn't happen very often) since we are wasting the - // machine time. So we are going to log it as a warning and not merely a - // trace since if this is a common occurrence, then something has to be - // done about it. - // - for (size_t retry (1);; ++retry) - { - try - { - tftp_curl c (trace, - nullfd, - mf, - curl::get, - url, - "--max-time", tftp_timeout); - - if (!c.wait ()) - throw_generic_error (EIO); - - break; - } - catch (const system_error& e) - { - bool bail (retry > tftp_retries); - diag_record dr (bail ? error : warn); - - dr << "unable to download task manifest from " << url << " on " - << retry << " try: " << e; - - if (bail) - throw failed (); - } - } - - // Parse it. - // - tm = parse_manifest (mf, "task"); - - // Find the environment setup executable. - // - string tg; - process_path pp; - - if (tm.target) - { - tg = tm.target->string (); - - // While the executable path contains a directory (so the PATH search - // does not apply) we still use process::path_search() to automatically - // handle appending platform-specific executable extensions (.exe/.bat, - // etc). - // - pp = process::try_path_search (env_dir / tg, false); - } - - if (pp.empty ()) - pp = process::try_path_search (env_dir / "default", false); - - if (pp.empty ()) - fail << "no environment setup executable in " << env_dir << " " - << "for target '" << tg << "'"; - - // Run it. - // - strings os; - - if (ops.systemd_daemon ()) - os.push_back ("--systemd-daemon"); - - if (ops.verbose_specified ()) - { - os.push_back ("--verbose"); - os.push_back (to_string (ops.verbose ())); - } - - if (ops.tftp_host_specified ()) - { - os.push_back ("--tftp-host"); - os.push_back (ops.tftp_host ()); - } - - // Note that we use the effective (absolute) path instead of recall since - // we may have changed the CWD. - // - run (trace, pp, tg, argv0.effect_string (), os); - } - catch (const failed&) - { - // If we failed before being able to parse the task manifest, use the - // "unknown" values for the package name and version. - // - result_manifest rm { - tm.name.empty () ? "unknown" : tm.name, - tm.version.empty () ? bpkg::version ("0") : tm.version, - result_status::abnormal, - operation_results {} - }; - - try - { - tftp_curl c (trace, - path ("-"), - nullfd, - curl::put, - url, - "--max-time", tftp_timeout); - - serialize_manifest (rm, c.out, url, "result"); - c.out.close (); - - if (!c.wait ()) - throw_generic_error (EIO); - } - catch (const system_error& e) - { - error << "unable to upload result manifest to " << url << ": " << e; - } - - throw; - } -} - -static void -bootstrap () -{ - bootstrap_manifest bm { - bootstrap_manifest::versions_type { - {"bbot", standard_version (BBOT_VERSION_STR)}, - {"libbbot", standard_version (LIBBBOT_VERSION_STR)}, - {"libbpkg", standard_version (LIBBPKG_VERSION_STR)}, - {"libbutl", standard_version (LIBBUTL_VERSION_STR)} - } - }; - - serialize_manifest (bm, cout, "stdout", "bootstrap"); -} - -int -main (int argc, char* argv[]) -try -{ - // This is a little hack to make out baseutils for Windows work when called - // with absolute path. In a nutshell, MSYS2's exec*p() doesn't search in the - // parent's executable directory, only in PATH. And since we are running - // without a shell (that would read /etc/profile which sets PATH to some - // sensible values), we are only getting Win32 PATH values. And MSYS2 /bin - // is not one of them. So what we are going to do is add /bin at the end of - // PATH (which will be passed as is by the MSYS2 machinery). This will make - // MSYS2 search in /bin (where our baseutils live). And for everyone else - // this should be harmless since it is not a valid Win32 path. - // -#ifdef _WIN32 - { - string mp ("PATH="); - if (const char* p = getenv ("PATH")) - { - mp += p; - mp += ';'; - } - mp += "/bin"; - - _putenv (mp.c_str ()); - } -#endif - - // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if - // the pipe reading end is closed. Note that by default this signal - // terminates a process. Also note that there is no way to disable this - // behavior on a file descriptor basis or for the write() function call. - // -#ifndef _WIN32 - if (signal (SIGPIPE, SIG_IGN) == SIG_ERR) - fail << "unable to ignore broken pipe (SIGPIPE) signal: " - << system_error (errno, generic_category ()); // Sanitize. -#endif - - cli::argv_scanner scan (argc, argv, true); - ops.parse (scan); - - verb = ops.verbose (); - - if (ops.systemd_daemon ()) - systemd_diagnostics (false); - - // Version. - // - if (ops.version ()) - { - cout << "bbot-worker " << BBOT_VERSION_ID << endl - << "libbbot " << LIBBBOT_VERSION_ID << endl - << "libbpkg " << LIBBBOT_VERSION_ID << endl - << "libbutl " << LIBBUTL_VERSION_ID << endl - << "Copyright (c) 2014-2017 Code Synthesis Ltd" << endl - << "TBC; All rights reserved" << endl; - - return 0; - } - - // Help. - // - if (ops.help ()) - { - pager p ("bbot-worker help", false); - print_bbot_worker_usage (p.stream ()); - - // If the pager failed, assume it has issued some diagnostics. - // - return p.wait () ? 0 : 1; - } - - // Figure out our mode. - // - if (ops.bootstrap () && ops.startup ()) - fail << "--bootstrap and --startup are mutually exclusive"; - - enum class mode {boot, start, build} m (mode::build); - - if (ops.bootstrap ()) m = mode::boot; - if (ops.startup ()) m = mode::start; - - if (ops.systemd_daemon ()) - { - info << "bbot worker " << BBOT_VERSION_ID; - } - - // Figure out our path (used for re-exec). - // - argv0 = process::path_search (argv[0], true); - - // Sort out the build directory. - // - if (ops.build_specified ()) - change_wd (ops.build (), true); // Create if does not exist. - - // Sort out the environment directory. - // - try - { - env_dir = ops.environments_specified () - ? ops.environments () - : dir_path::home_directory (); - - if (!dir_exists (env_dir)) - throw_generic_error (ENOENT); - } - catch (const system_error& e) - { - fail << "invalid environment directory: " << e; - } - - switch (m) - { - case mode::boot: bootstrap (); break; - case mode::start: startup (); break; - case mode::build: build (static_cast (argc), - const_cast (argv)); break; - } -} -catch (const failed&) -{ - return 1; // Diagnostics has already been issued. -} -catch (const cli::exception& e) -{ - error << e; - return 1; -} diff --git a/bbot/worker/worker.cli b/bbot/worker/worker.cli new file mode 100644 index 0000000..09e09a3 --- /dev/null +++ b/bbot/worker/worker.cli @@ -0,0 +1,102 @@ +// file : bbot/worker.cli +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +include ; + +"\section=1" +"\name=bbot-worker" +"\summary=build bot worker" + +namespace bbot +{ + { + " ", + + " + \h|SYNOPSIS| + + \cb{bbot-worker --help}\n + \cb{bbot-worker --version}\n + \c{\b{bbot-worker --bootstrap} []}\n + \c{\b{bbot-worker --startup} []}\n + \c{\b{bbot-worker} [] ... ...} + + \h|DESCRIPTION| + + \cb{bbot-worker} @@ TODO. + + If the \cb{--bootstrap} mode option is specified, then the worker performs + the initial machine bootstrap and writes the bootstrap result manifest to + \c{stdout}. If the \cb{--startup} mode option is specified, then the + worker performs the environment setup and then re-executes in the build + mode. If neither of the mode options is specified, then the worker + proceeds to performing the build task. + " + } + + class worker_options + { + "\h|OPTIONS|" + + // + // NOTE: when adding new options, consider whether they should be + // propagated from startup to build. + // + + bool --help {"Print usage information and exit."} + bool --version {"Print version and exit."} + + bool --bootstrap + { + "Perform the inital machine bootstrap insteading of building." + } + + bool --startup + { + "Perform the environment setup and then re-execute for building." + } + + bool --systemd-daemon + { + "Run as a simple systemd daemon." + } + + dir_path --build + { + "", + "The directory to perform the build in. If not specified, then the + current working directory is used." + } + + dir_path --environments + { + "", + "The directory containing the environment setup executables. If not + specified, then the user's home directory is used." + } + + uint16_t --verbose = 1 + { + "", + "Set the diagnostics verbosity to between 0 and 6 with level 1 + being the default." + } + + // Testing options. + // + string --tftp-host = "196.254.111.222" + { + "", + "The TFTP host address and, optionally, port to use to download the + build task and to upload the build result. By default the link-local + address 196.254.111.222 with the standard TFTP port (69) is used." + } + }; + + " + \h|EXIT STATUS| + + Non-zero exit status is returned in case of an error. + " +} diff --git a/bbot/worker/worker.cxx b/bbot/worker/worker.cxx new file mode 100644 index 0000000..2aad8aa --- /dev/null +++ b/bbot/worker/worker.cxx @@ -0,0 +1,656 @@ +// file : bbot/worker.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#ifndef _WIN32 +# include // signal() +#else +# include // getenv(), _putenv() +#endif + +#include +#include + +#include +#include + +#include + +#include +#include + +#include +#include + +#include + +using namespace std; +using namespace butl; +using namespace bbot; + +namespace bbot +{ + process_path argv0; + worker_options ops; + + dir_path env_dir; + + const size_t tftp_timeout (10); // 10 seconds. + const size_t tftp_retries (3); // Task request retries (see startup()). +} + +static dir_path +change_wd (const dir_path& d, bool create = false) +try +{ + if (create) + try_mkdir_p (d); + + dir_path r (dir_path::current_directory ()); + + dir_path::current_directory (d); + + return r; +} +catch (const system_error& e) +{ + fail << "unable to change current directory to " << d << ": " << e << endf; +} + +using regexes = vector; + +// Match lines read from the command's stderr against the regular expressions +// and return the warning result status (instead of success) in case of a +// match. +// +template +static result_status +run_bpkg (tracer& t, + string& log, const regexes& warn_detect, + const string& cmd, A&&... a) +{ + try + { + // Trace and log the command line. + // + auto cmdc = [&t, &log] (const char* c[], size_t n) + { + t (c, n); + + ostringstream os; + process::print (os, c, n); + log += os.str (); + log += '\n'; + }; + + fdpipe pipe (fdopen_pipe ()); // Text mode seems appropriate. + + process pr ( + process_start (cmdc, + fdnull (), // Never reads from stdout. + 2, // 1>&2 + pipe.out, + "bpkg", + "-v", + cmd, + forward (a)...)); + + pipe.out.close (); + + result_status r (result_status::success); + + // Log the diagnostics. + // + { + ifdstream is (move (pipe.in), fdstream_mode::skip); // Skip on exception. + + for (string l; is.peek () != ifdstream::traits_type::eof (); ) + { + getline (is, l); + log += l; + log += '\n'; + + // Match the log line with the warning-detecting regular expressions + // until the first match. + // + if (r != result_status::warning) + { + for (const auto& re: warn_detect) + { + // Only examine the first 512 bytes. Long lines (e.g., linker + // command lines) could trigger implementation-specific limitations + // (like stack overflow). Plus, it is a performance concern. + // + if (regex_search (l.begin (), + l.size () < 512 ? l.end () : l.begin () + 512, + re)) + { + r = result_status::warning; + break; + } + } + } + } + } + + if (pr.wait ()) + return r; + + log += "bpkg " + cmd; + const process_exit& e (*pr.exit); + + if (e.normal ()) + { + log += " exited with code " + to_string (e.code ()) + "\n"; + return result_status::error; + } + else + { + log += " terminated abnormally: " + e.description () + + (e.core () ? " (core dumped)" : "") + "\n"; + return result_status::abnormal; + } + } + catch (const process_error& e) + { + fail << "unable to execute bpkg " << cmd << ": " << e << endf; + } + catch (const io_error& e) + { + fail << "unable to read bpkg " << cmd << " diagnostics: " << e << endf; + } +} + +static void +build (size_t argc, const char* argv[]) +{ + tracer trace ("build"); + + // Our overall plan is as follows: + // + // 1. Parse the task manifest (it should be in CWD). + // + // 2. Run bpkg to create the configuration, add the repository, and + // configure, build, and test the package all while saving the logs in + // the result manifest. + // + // 3. Upload the result manifest. + // + // Note also that we are being "watched" by the startup version of us which + // will upload an appropriate result in case we exit with an error. So here + // for abnormal situations (like a failure to parse the manifest), we just + // fail. + // + task_manifest tm (parse_manifest (path ("manifest"), "task")); + + result_manifest rm { + tm.name, + tm.version, + result_status::success, + operation_results {} + }; + + auto add_result = [&rm] (string o) -> operation_result& + { + rm.results.push_back ( + operation_result {move (o), result_status::success, ""}); + + return rm.results.back (); + }; + + dir_path owd; + + for (;;) // The "breakout" loop. + { + // Regular expressions that detect different forms of build2 toolchain + // warnings. Accidently (or not), they also cover GCC and Clang warnings + // (for the English locale). + // + // The expressions will be matched multiple times, so let's make the + // matching faster, with the potential cost of making regular expressions + // creation slower. + // + regex::flag_type f (regex_constants::optimize); // ECMAScript is implied. + + regexes wre ({ + regex ("^warning: ", f), + regex ("^.+: warning: ", f)}); + + for (const auto& re: tm.unquoted_warning_regex ()) + wre.emplace_back (re, f); + + // Configure. + // + { + operation_result& r (add_result ("configure")); + + // bpkg create + // + const vector_view env (argv + 1, argc - 1); + + // Use target (if present) or machine as configuration directory name. + // + dir_path dir (tm.target ? tm.target->string () : tm.machine); + + r.status |= run_bpkg (trace, r.log, wre, + "create", + "-d", dir.string (), + "--wipe", + tm.unquoted_config (), + env); + + if (!r.status) + break; + + owd = change_wd (dir); + + // bpkg add + // + r.status |= run_bpkg (trace, r.log, wre, "add", tm.repository.string ()); + + if (!r.status) + break; + + // bpkg fetch + // + string t ("--trust-no"); + + cstrings ts; + for (const string& fp: tm.trust) + { + if (fp == "yes") + t = "--trust-yes"; + else + { + ts.push_back ("--trust"); + ts.push_back (fp.c_str ()); + } + } + + r.status |= run_bpkg (trace, r.log, wre, "fetch", ts, t); + + if (!r.status) + break; + + // bpkg build --configure-only / + // + r.status |= run_bpkg (trace, r.log, wre, + "build", + "--configure-only", + "--yes", + tm.name + '/' + tm.version.string ()); + + if (!r.status) + break; + + rm.status |= r.status; + } + + // Update. + // + { + operation_result& r (add_result ("update")); + + // bpkg update + // + r.status |= run_bpkg (trace, r.log, wre, "update", tm.name); + + if (!r.status) + break; + + rm.status |= r.status; + } + + // Test. + // + { + operation_result& r (add_result ("test")); + + // bpkg test + // + r.status |= run_bpkg (trace, r.log, wre, "test", tm.name); + + if (!r.status) + break; + + rm.status |= r.status; + } + + break; + } + + rm.status |= rm.results.back ().status; // Merge last in case of a break. + + if (!owd.empty ()) + change_wd (owd); + + // Upload the result. + // + const string url ("tftp://" + ops.tftp_host () + "/manifest"); + + try + { + tftp_curl c (trace, + path ("-"), + nullfd, + curl::put, + url, + "--max-time", tftp_timeout); + + serialize_manifest (rm, c.out, url, "result"); + c.out.close (); + + if (!c.wait ()) + throw_generic_error (EIO); + } + catch (const system_error& e) + { + fail << "unable to upload result manifest to " << url << ": " << e; + } +} + +static void +startup () +{ + tracer trace ("startup"); + + // Our overall plan is as follows: + // + // 1. Download the task manifest into the build directory (CWD). + // + // 2. Parse it and get the target. + // + // 3. Find the environment setup executable for this target. + // + // 4. Execute the environment setup executable. + // + // 5. If the environment setup executable fails, then upload the (failed) + // result ourselves. + // + const string url ("tftp://" + ops.tftp_host () + "/manifest"); + const path mf ("manifest"); + + // If we fail, try to upload the result manifest (abnormal termination). The + // idea is that the machine gets suspended and we can investigate what's + // going on by logging in and examining the diagnostics (e.g., via + // journalctl, etc). + // + task_manifest tm; + + try + { + // Download the task. + // + // We are downloading from our host so there shouldn't normally be any + // connectivity issues. Unless, of course, we are on Windows where all + // kinds of flakiness is business as usual. Note that having a long enough + // timeout is not enough: if we try to connect before the network is up, + // we will keep waiting forever, even after it is up. So we have to + // timeout and try again. This is also pretty bad (unlike, say during + // bootstrap which doesn't happen very often) since we are wasting the + // machine time. So we are going to log it as a warning and not merely a + // trace since if this is a common occurrence, then something has to be + // done about it. + // + for (size_t retry (1);; ++retry) + { + try + { + tftp_curl c (trace, + nullfd, + mf, + curl::get, + url, + "--max-time", tftp_timeout); + + if (!c.wait ()) + throw_generic_error (EIO); + + break; + } + catch (const system_error& e) + { + bool bail (retry > tftp_retries); + diag_record dr (bail ? error : warn); + + dr << "unable to download task manifest from " << url << " on " + << retry << " try: " << e; + + if (bail) + throw failed (); + } + } + + // Parse it. + // + tm = parse_manifest (mf, "task"); + + // Find the environment setup executable. + // + string tg; + process_path pp; + + if (tm.target) + { + tg = tm.target->string (); + + // While the executable path contains a directory (so the PATH search + // does not apply) we still use process::path_search() to automatically + // handle appending platform-specific executable extensions (.exe/.bat, + // etc). + // + pp = process::try_path_search (env_dir / tg, false); + } + + if (pp.empty ()) + pp = process::try_path_search (env_dir / "default", false); + + if (pp.empty ()) + fail << "no environment setup executable in " << env_dir << " " + << "for target '" << tg << "'"; + + // Run it. + // + strings os; + + if (ops.systemd_daemon ()) + os.push_back ("--systemd-daemon"); + + if (ops.verbose_specified ()) + { + os.push_back ("--verbose"); + os.push_back (to_string (ops.verbose ())); + } + + if (ops.tftp_host_specified ()) + { + os.push_back ("--tftp-host"); + os.push_back (ops.tftp_host ()); + } + + // Note that we use the effective (absolute) path instead of recall since + // we may have changed the CWD. + // + run (trace, pp, tg, argv0.effect_string (), os); + } + catch (const failed&) + { + // If we failed before being able to parse the task manifest, use the + // "unknown" values for the package name and version. + // + result_manifest rm { + tm.name.empty () ? "unknown" : tm.name, + tm.version.empty () ? bpkg::version ("0") : tm.version, + result_status::abnormal, + operation_results {} + }; + + try + { + tftp_curl c (trace, + path ("-"), + nullfd, + curl::put, + url, + "--max-time", tftp_timeout); + + serialize_manifest (rm, c.out, url, "result"); + c.out.close (); + + if (!c.wait ()) + throw_generic_error (EIO); + } + catch (const system_error& e) + { + error << "unable to upload result manifest to " << url << ": " << e; + } + + throw; + } +} + +static void +bootstrap () +{ + bootstrap_manifest bm { + bootstrap_manifest::versions_type { + {"bbot", standard_version (BBOT_VERSION_STR)}, + {"libbbot", standard_version (LIBBBOT_VERSION_STR)}, + {"libbpkg", standard_version (LIBBPKG_VERSION_STR)}, + {"libbutl", standard_version (LIBBUTL_VERSION_STR)} + } + }; + + serialize_manifest (bm, cout, "stdout", "bootstrap"); +} + +int +main (int argc, char* argv[]) +try +{ + // This is a little hack to make out baseutils for Windows work when called + // with absolute path. In a nutshell, MSYS2's exec*p() doesn't search in the + // parent's executable directory, only in PATH. And since we are running + // without a shell (that would read /etc/profile which sets PATH to some + // sensible values), we are only getting Win32 PATH values. And MSYS2 /bin + // is not one of them. So what we are going to do is add /bin at the end of + // PATH (which will be passed as is by the MSYS2 machinery). This will make + // MSYS2 search in /bin (where our baseutils live). And for everyone else + // this should be harmless since it is not a valid Win32 path. + // +#ifdef _WIN32 + { + string mp ("PATH="); + if (const char* p = getenv ("PATH")) + { + mp += p; + mp += ';'; + } + mp += "/bin"; + + _putenv (mp.c_str ()); + } +#endif + + // On POSIX ignore SIGPIPE which is signaled to a pipe-writing process if + // the pipe reading end is closed. Note that by default this signal + // terminates a process. Also note that there is no way to disable this + // behavior on a file descriptor basis or for the write() function call. + // +#ifndef _WIN32 + if (signal (SIGPIPE, SIG_IGN) == SIG_ERR) + fail << "unable to ignore broken pipe (SIGPIPE) signal: " + << system_error (errno, generic_category ()); // Sanitize. +#endif + + cli::argv_scanner scan (argc, argv, true); + ops.parse (scan); + + verb = ops.verbose (); + + if (ops.systemd_daemon ()) + systemd_diagnostics (false); + + // Version. + // + if (ops.version ()) + { + cout << "bbot-worker " << BBOT_VERSION_ID << endl + << "libbbot " << LIBBBOT_VERSION_ID << endl + << "libbpkg " << LIBBBOT_VERSION_ID << endl + << "libbutl " << LIBBUTL_VERSION_ID << endl + << "Copyright (c) 2014-2017 Code Synthesis Ltd" << endl + << "TBC; All rights reserved" << endl; + + return 0; + } + + // Help. + // + if (ops.help ()) + { + pager p ("bbot-worker help", false); + print_bbot_worker_usage (p.stream ()); + + // If the pager failed, assume it has issued some diagnostics. + // + return p.wait () ? 0 : 1; + } + + // Figure out our mode. + // + if (ops.bootstrap () && ops.startup ()) + fail << "--bootstrap and --startup are mutually exclusive"; + + enum class mode {boot, start, build} m (mode::build); + + if (ops.bootstrap ()) m = mode::boot; + if (ops.startup ()) m = mode::start; + + if (ops.systemd_daemon ()) + { + info << "bbot worker " << BBOT_VERSION_ID; + } + + // Figure out our path (used for re-exec). + // + argv0 = process::path_search (argv[0], true); + + // Sort out the build directory. + // + if (ops.build_specified ()) + change_wd (ops.build (), true); // Create if does not exist. + + // Sort out the environment directory. + // + try + { + env_dir = ops.environments_specified () + ? ops.environments () + : dir_path::home_directory (); + + if (!dir_exists (env_dir)) + throw_generic_error (ENOENT); + } + catch (const system_error& e) + { + fail << "invalid environment directory: " << e; + } + + switch (m) + { + case mode::boot: bootstrap (); break; + case mode::start: startup (); break; + case mode::build: build (static_cast (argc), + const_cast (argv)); break; + } +} +catch (const failed&) +{ + return 1; // Diagnostics has already been issued. +} +catch (const cli::exception& e) +{ + error << e; + return 1; +} diff --git a/buildfile b/buildfile index 584a35d..0362e29 100644 --- a/buildfile +++ b/buildfile @@ -2,8 +2,8 @@ # copyright : Copyright (c) 2014-2017 Code Synthesis Ltd # license : TBC; see accompanying LICENSE file -./: bbot/ etc/ unit-tests/ doc/ doc{INSTALL LICENSE NEWS README version} \ - file{manifest} +./: {*/ -tests/ -build/} doc{INSTALL LICENSE NEWS README version} \ + file{manifest} doc{version}: file{manifest} # Generated by the version module. doc{version}: dist = true diff --git a/doc/buildfile b/doc/buildfile index c11de50..9131de2 100644 --- a/doc/buildfile +++ b/doc/buildfile @@ -2,29 +2,18 @@ # copyright : Copyright (c) 2014-2017 Code Synthesis Ltd # license : TBC; see accompanying LICENSE file -cmds = \ -bbot-agent \ -bbot-worker - define css: file css{*}: extension = css define xhtml: doc xhtml{*}: extension = xhtml -./: {man1 xhtml}{$cmds} \ - css{common pre-box man} \ - file{man-prologue.1 man-epilogue.1 \ - man-prologue.xhtml man-epilogue.xhtml} +./: {man1 xhtml}{bbot-*} \ + css{common pre-box man} \ + file{man-*} -./: doc{build2-build-bot-manual.xhtml \ - build2-build-bot-manual-a4.pdf \ - build2-build-bot-manual-a4.ps \ - build2-build-bot-manual-letter.pdf \ - build2-build-bot-manual-letter.ps} \ - css{code-box common doc pre-box toc} \ - file{a4.html2ps letter.html2ps doc.html2ps} \ - file{doc-prologue.xhtml doc-epilogue.xhtml \ - manual.cli} +./: doc{build2-build-bot-manual*} \ + css{code-box common doc pre-box toc} \ + file{manual.cli doc-* *.html2ps} ./: file{cli.sh} diff --git a/doc/cli.sh b/doc/cli.sh index 2a5136d..e8a06c8 100755 --- a/doc/cli.sh +++ b/doc/cli.sh @@ -51,7 +51,7 @@ o="--output-prefix bbot-" # #compile "bbot" $o --output-prefix "" -pages="bbot/agent bbot/worker" +pages="bbot/agent/agent bbot/worker/worker" for p in $pages; do compile $p $o diff --git a/tests/agent/buildfile b/tests/agent/buildfile index 3dc4ef5..8775cbd 100644 --- a/tests/agent/buildfile +++ b/tests/agent/buildfile @@ -6,5 +6,5 @@ # import agent = bbot%exe{bbot-agent} -./: test{testscript} file{btrfs-cpdir btrfs-rmdir} $agent +./: test{testscript} file{btrfs-*} $agent dir{./}: test = $agent diff --git a/unit-tests/bootstrap-manifest/buildfile b/unit-tests/bootstrap-manifest/buildfile index 2d2da8f..2be37fb 100644 --- a/unit-tests/bootstrap-manifest/buildfile +++ b/unit-tests/bootstrap-manifest/buildfile @@ -2,10 +2,10 @@ # copyright : Copyright (c) 2014-2017 Code Synthesis Ltd # license : TBC; see accompanying LICENSE file -import libs = libbutl%lib{butl} +import libs = libbutl%lib{butl} import libs += libbbot%lib{bbot} -exe{driver}: cxx{driver} ../../bbot/{hxx cxx}{bootstrap-manifest} $libs \ +exe{driver}: {hxx cxx}{*} ../../bbot/{hxx cxx}{bootstrap-manifest} $libs \ test{testscript} include ../../bbot/ diff --git a/unit-tests/machine-manifest/buildfile b/unit-tests/machine-manifest/buildfile index ae5ffa1..d56aa1f 100644 --- a/unit-tests/machine-manifest/buildfile +++ b/unit-tests/machine-manifest/buildfile @@ -2,10 +2,10 @@ # copyright : Copyright (c) 2014-2017 Code Synthesis Ltd # license : TBC; see accompanying LICENSE file -import libs = libbutl%lib{butl} +import libs = libbutl%lib{butl} import libs += libbbot%lib{bbot} -exe{driver}: cxx{driver} ../../bbot/{hxx cxx}{*-manifest} $libs \ +exe{driver}: {hxx cxx}{*} ../../bbot/{hxx cxx}{**-manifest} $libs \ test{testscript} include ../../bbot/ diff --git a/unit-tests/machine-manifest/driver.cxx b/unit-tests/machine-manifest/driver.cxx index 44b1e36..db42434 100644 --- a/unit-tests/machine-manifest/driver.cxx +++ b/unit-tests/machine-manifest/driver.cxx @@ -11,7 +11,7 @@ #include #include -#include +#include using namespace std; using namespace butl; -- cgit v1.1