From 5662e66dcdbf1af13b4ccf7352f3e435c1baf597 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 21 Jan 2019 13:14:19 +0200 Subject: Add support for running multiple instances of agent for same toolchain This allows us to perform multiple bootstraps/builds in parallel. Note that each machine can only be used by a single instance so it doesn't make sense to have more instances than machines. --- bbot/agent/agent.cli | 95 ++++++++------ bbot/agent/agent.cxx | 327 ++++++++++++++++++++++++++++++++++------------ bbot/agent/agent.hxx | 4 + bbot/agent/machine.cxx | 13 +- bbot/machine-manifest.hxx | 2 - bbot/worker/worker.cli | 14 +- 6 files changed, 320 insertions(+), 135 deletions(-) diff --git a/bbot/agent/agent.cli b/bbot/agent/agent.cli index 016fbe9..89fa9ff 100644 --- a/bbot/agent/agent.cli +++ b/bbot/agent/agent.cli @@ -24,9 +24,9 @@ namespace bbot \cb{bbot-agent} @@ TODO. - Note that on termination \cb{bbot-agent} may leave a working machine - snapshot behind. It is expected that the caller (normally Build OS - monitor) cleans them up before restarting the agent. + Note that on termination \cb{bbot-agent} may leave behind a machine lock + and working machine snapshot. It is expected that the caller (normally + Build OS monitor) cleans them up before restarting the agent. " } @@ -37,6 +37,13 @@ namespace bbot bool --help {"Print usage information and exit."} bool --version {"Print version and exit."} + uint16_t --verbose = 1 + { + "", + "Set the diagnostics verbosity to between 0 and 6 with level 1 + being the default." + } + bool --systemd-daemon { "Run as a simple systemd daemon." @@ -59,35 +66,6 @@ namespace bbot " } - path --openssl = "openssl" - { - "", - "The openssl program to be used for crypto operations. You can also - specify additional options that should be passed to the openssl program - with \cb{--openssl-option}. If the openssl program is not explicitly - specified, then \cb{bbot-agent} will use \cb{openssl} by default." - } - - strings --openssl-option - { - "", - "Additional option to be passed to the openssl program (see - \cb{--openssl} for details). Repeat this option to specify multiple - openssl options." - } - - size_t --cpu = 1 - { - "", - "Number of CPUs (threads) to use, 1 by default." - } - - size_t --ram (1024 * 1024) // 1G - { - "", - "Amount of RAM (in kB) to use, 1G by default." - } - string --toolchain-name = "default" { "", @@ -97,7 +75,10 @@ namespace bbot uint16_t --toolchain-num = 1 { "", - "Toolchain number, 1 by default." + "Toolchain number, 1 by default. If agents are running for several + toolchains, then each of them should have a unique toolchain number + between 1 and 99. This number is used as an offset for network ports, + interfaces, etc." } standard_version --toolchain-ver @@ -115,6 +96,30 @@ namespace bbot testing)." } + // We reserve 0 in case in the future we want to distinguish a single- + // instance mode or some such. + // + uint16_t --instance = 1 + { + "", + "Instance number, 1 by default. If several instances of an agent are + running for the same toolchain, then each of them should have a unique + instance number between 1 and 99. This number is used as an offset for + network ports, interfaces, etc." + } + + size_t --cpu = 1 + { + "", + "Number of CPUs (threads) to use, 1 by default." + } + + size_t --ram (1024 * 1024) // 1G + { + "", + "Amount of RAM (in kB) to use, 1G by default." + } + strings --trust { "", @@ -133,11 +138,15 @@ namespace bbot "The location of the TFTP server root, \cb{/build/tftp/} by default." } + // Low 23401+, 23501+, 23601+, etc., all look good collision-wise with + // with anything useful. + // uint16_t --tftp-port = 23400 { "", "TFTP server port base, 23400 by default. The actual port is calculated - by adding the toolchain number \c{--toolchain-num} to this value." + by adding an offset calculated based on the toolchain and instance + numbers." } size_t --bootstrap-timeout = 1200 @@ -193,11 +202,21 @@ namespace bbot and took just as long to complete." } - uint16_t --verbose = 1 + path --openssl = "openssl" { - "", - "Set the diagnostics verbosity to between 0 and 6 with level 1 - being the default." + "", + "The openssl program to be used for crypto operations. You can also + specify additional options that should be passed to the openssl program + with \cb{--openssl-option}. If the openssl program is not explicitly + specified, then \cb{bbot-agent} will use \cb{openssl} by default." + } + + strings --openssl-option + { + "", + "Additional option to be passed to the openssl program (see + \cb{--openssl} for details). Repeat this option to specify multiple + openssl options." } // Testing options. diff --git a/bbot/agent/agent.cxx b/bbot/agent/agent.cxx index 7a64a18..eec6072 100644 --- a/bbot/agent/agent.cxx +++ b/bbot/agent/agent.cxx @@ -4,11 +4,14 @@ #include -#include // getpwuid() -#include // PATH_MAX -#include // signal() -#include // rand_r() -#include // sleep(), realink(), getuid(), fsync() +#include // getpwuid() +#include // PATH_MAX +#include // signal() +#include // rand_r() +#include // sleep(), realink(), getuid(), fsync(), [f]stat() +#include // stat +#include // [f]stat() +#include // flock() #include // ifreq #include // sockaddr_in @@ -17,12 +20,13 @@ #include #include +#include #include #include #include #include -#include // dir_iterator +#include // dir_iterator, try_rmfile() #include @@ -53,6 +57,10 @@ namespace bbot standard_version tc_ver; string tc_id; + uint16_t inst; + + uint16_t offset; + string hname; uid_t uid; string uname; @@ -79,7 +87,7 @@ file_not_empty (const path& f) // The btrfs tool likes to print informational messages, like "Created // snapshot such and such". Luckily, it writes them to stdout while proper -// diagnostics to stderr. +// diagnostics goes to stderr. // template inline void @@ -136,9 +144,10 @@ bootstrap_machine (const dir_path& md, // Start the TFTP server (server chroot is --tftp). Map: // // GET requests to .../toolchains//* - // PUT requests to .../bootstrap//* + // PUT requests to .../bootstrap/-/* // - auto_rmdir arm ((dir_path (ops.tftp ()) /= "bootstrap") /= tc_name); + const string in_name (tc_name + '-' + to_string (inst)); + auto_rmdir arm ((dir_path (ops.tftp ()) /= "bootstrap") /= in_name); try_mkdir_p (arm.path); // Bootstrap result manifest. @@ -158,8 +167,8 @@ bootstrap_machine (const dir_path& md, for (size_t retry (0);; ++retry) { tftp_server tftpd ("Gr ^/?(.+)$ /toolchains/" + tc_name + "/\\1\n" + - "Pr ^/?(.+)$ /bootstrap/" + tc_name + "/\\1\n", - ops.tftp_port () + tc_num); + "Pr ^/?(.+)$ /bootstrap/" + in_name + "/\\1\n", + ops.tftp_port () + offset); l3 ([&]{trace << "tftp server on port " << tftpd.port ();}); @@ -296,16 +305,122 @@ bootstrap_machine (const dir_path& md, return r; } -// Return available machines and their directories as a parallel array. +// Machine locking. +// +// We use flock(2) which is straightforward. The tricky part is cleaning the +// file up. Here we may have a race when two processes are trying to open & +// lock the file that is being unlocked & removed by a third process. In this +// case one of these processes may still open the old file. To resolve this, +// after opening and locking the file, we verify that a new file hasn't +// appeared by stat'ing the path and file descriptor and comparing the inodes. +// +// Note that converting a lock (shared to exclusive or vice versa) is not +// guaranteed to be atomic (in case later we want to support exclusive +// bootstrap and shared build). +// +class machine_lock +{ +public: + machine_lock () = default; // Empty lock. + + ~machine_lock () + { + unlock (true /* ignore_errors */); + } + + void + unlock (bool ignore_errors = false) + { + if (fl_) + { + fl_ = false; // We have tried. + + try_rmfile (fp_, ignore_errors); + + if (flock (fd_.get (), LOCK_UN) != 0 && !ignore_errors) + throw_generic_error (errno); + } + } + + machine_lock (machine_lock&&) = default; + machine_lock& operator= (machine_lock&&) = default; + + machine_lock (const machine_lock&) = delete; + machine_lock& operator= (const machine_lock&) = delete; + + // Implementation details. + // +public: + machine_lock (path&& fp, auto_fd&& fd) + : fp_ (move (fp)), fd_ (move (fd)), fl_ (true) {} + +private: + path fp_; + auto_fd fd_; + bool fl_ = false; +}; + +// Try to lock the machine given its - directory. +// +static optional +lock_machine (const dir_path& tp) +{ + path fp (tp + ".lock"); // The -.lock file. + + for (;;) + { + auto_fd fd (fdopen (fp, fdopen_mode::out | fdopen_mode::create)); + + if (flock (fd.get (), LOCK_EX | LOCK_NB) != 0) + { + if (errno == EWOULDBLOCK) + return nullopt; + + throw_generic_error (errno); + } + + struct stat st1, st2; + + if (fstat (fd.get (), &st1) != 0 || + stat (fp.string ().c_str (), &st2) != 0 ) // Both should succeed. + throw_generic_error (errno); + + if (st1.st_ino == st2.st_ino) + return machine_lock (move (fp), move (fd)); + + // Note: unlocked by close(). + } +} + +// Given the toolchain directory (-) return the snapshot path in +// the -- form. +// +// We include the instance number into for debuggability. // -static pair +static inline dir_path +snapshot_path (const dir_path& tp) +{ + return tp.directory () /= + path::traits::temp_name (tp.leaf ().string () + '-' + to_string (inst)); +} + +// Return available machines, (re-)bootstrapping them if necessary. +// +struct bootstrapped_machine +{ + dir_path path; + bootstrapped_machine_manifest manifest; + machine_lock lock; +}; +using bootstrapped_machines = vector; + +static bootstrapped_machines enumerate_machines (const dir_path& machines) try { tracer trace ("enumerate_machines", machines.string ().c_str ()); - bootstrapped_machine_manifests rm; - dir_paths rd; + bootstrapped_machines r; if (ops.fake_machine_specified ()) { @@ -313,25 +428,30 @@ try parse_manifest ( ops.fake_machine (), "machine header")); - rm.push_back ( - bootstrapped_machine_manifest { - machine_manifest { - mh.id, - mh.name, - mh.summary, - machine_type::kvm, - string ("de:ad:be:ef:de:ad"), - nullopt, - strings ()}, - toolchain_manifest {tc_id}, - bootstrap_manifest {} - }); - - rd.push_back (dir_path (ops.machines ()) /= mh.name); // For diagnostics. - - return make_pair (move (rm), move (rd)); + r.push_back ( + bootstrapped_machine { + dir_path (ops.machines ()) /= mh.name, // For diagnostics. + bootstrapped_machine_manifest { + machine_manifest { + move (mh.id), + move (mh.name), + move (mh.summary), + machine_type::kvm, + string ("de:ad:be:ef:de:ad"), + nullopt, + strings ()}, + toolchain_manifest {tc_id}, + bootstrap_manifest {}}, + machine_lock ()}); + + return r; } + // Notice and warn if there are no machines (as opposed to all of them being + // locked). + // + bool none (true); + // The first level are machine volumes. // for (const dir_entry& ve: dir_iterator (machines, @@ -350,8 +470,7 @@ try // try { - for (const dir_entry& me: dir_iterator (vd, - false /* ignore_dangling */)) + for (const dir_entry& me: dir_iterator (vd, false /* ignore_dangling */)) { const string mn (me.path ().string ()); @@ -361,28 +480,32 @@ try const dir_path md (dir_path (vd) /= mn); // Our endgoal here is to obtain a bootstrapped snapshot of this - // machine while watching out for potential race conditions (machines - // being added/upgraded/removed; see the manual for details). + // machine while watching out for potential race conditions (other + // instances as well as machines being added/upgraded/removed; see the + // manual for details). // // So here is our overall plan: // // 1. Resolve current subvolume link for our bootstrap protocol. // - // 2. If there is no link, cleanup and ignore this machine. + // 2. Lock the machine. This excludes any other instance from trying + // to perform the following steps. // - // 3. Try to create a snapshot of current subvolume (this operation is + // 3. If there is no link, cleanup old bootstrap (if any) and ignore + // this machine. + // + // 4. Try to create a snapshot of current subvolume (this operation is // atomic). If failed (e.g., someone changed the link and removed // the subvolume in the meantime), retry from #1. // - // 4. Compare the snapshot to the already bootstrapped version (if + // 5. Compare the snapshot to the already bootstrapped version (if // any) and see if we need to re-bootstrap. If so, use the snapshot // as a starting point. Rename to bootstrapped at the end (atomic). // dir_path lp (dir_path (md) /= (mn + '-' + bs_prot)); // -

dir_path tp (dir_path (md) /= (mn + '-' + tc_name)); // - - bool te (dir_exists (tp)); - auto delete_t = [&tp, &trace] () + auto delete_bootstrapped = [&tp, &trace] () // Delete -. { run_btrfs (trace, "property", "set", "-ts", tp, "ro", "false"); run_btrfs (trace, "subvolume", "delete", tp); @@ -421,15 +544,29 @@ try fail << "unable to read subvolume link " << lp << ": " << e; } + none = none && sp.empty (); + + // Try to lock the machine, skipping it if already locked. + // + optional ml (lock_machine (tp)); + + if (!ml) + { + l4 ([&]{trace << "skipping " << md << ": locked";}); + break; + } + + bool te (dir_exists (tp)); + // If the resolution fails, then this means there is no current // machine subvolume (for this bootstrap protocol). In this case we - // clean up our toolchain subvolume (-) and ignore - // this machine. + // clean up our toolchain subvolume (-, if any) and + // ignore this machine. // if (sp.empty ()) { if (te) - delete_t (); + delete_bootstrapped (); l3 ([&]{trace << "skipping " << md << ": no subvolume link";}); break; @@ -437,8 +574,7 @@ try // -- // - const dir_path xp ( - dir_path (md) /= path::traits::temp_name (mn + '-' + tc_name)); + const dir_path xp (snapshot_path (tp)); if (btrfs_exit (trace, "subvolume", "snapshot", sp, xp) != 0) { @@ -525,7 +661,7 @@ try } if (!te) - delete_t (); + delete_bootstrapped (); } else l3 ([&]{trace << "bootstrapping " << tp;}); @@ -576,8 +712,8 @@ try // Add the machine to the lists. // - rm.push_back (move (*bmm)); - rd.push_back (move (tp)); + r.push_back ( + bootstrapped_machine {move (tp), move (*bmm), move (*ml)}); break; } @@ -589,7 +725,10 @@ try } } - return make_pair (move (rm), move (rd)); + if (none) + warn << "no build machines for toolchain " << tc_name; + + return r; } catch (const system_error& e) { @@ -629,10 +768,11 @@ try // TFTP server mapping (server chroot is --tftp): // - // GET requests to .../build//get/* - // PUT requests to .../build//put/* + // GET requests to .../build/-/get/* + // PUT requests to .../build/-/put/* // - auto_rmdir arm ((dir_path (ops.tftp ()) /= "build") /= tc_name); + const string in_name (tc_name + '-' + to_string (inst)); + auto_rmdir arm ((dir_path (ops.tftp ()) /= "build") /= in_name); dir_path gd (dir_path (arm.path) /= "get"); dir_path pd (dir_path (arm.path) /= "put"); @@ -671,8 +811,7 @@ try // -- // - const dir_path xp ( - md.directory () /= path::traits::temp_name (md.leaf ().string ())); + const dir_path xp (snapshot_path (md)); string br ("br1"); // Using private bridge for now. @@ -685,9 +824,9 @@ try // Start the TFTP server. // - tftp_server tftpd ("Gr ^/?(.+)$ /build/" + tc_name + "/get/\\1\n" + - "Pr ^/?(.+)$ /build/" + tc_name + "/put/\\1\n", - ops.tftp_port () + tc_num); + tftp_server tftpd ("Gr ^/?(.+)$ /build/" + in_name + "/get/\\1\n" + + "Pr ^/?(.+)$ /build/" + in_name + "/put/\\1\n", + ops.tftp_port () + offset); l3 ([&]{trace << "tftp server on port " << tftpd.port ();}); @@ -903,6 +1042,15 @@ try : standard_version (BBOT_VERSION_STR)); tc_id = ops.toolchain_id (); + if (tc_num == 0 || tc_num > 99) + fail << "invalid --toolchain-num value " << tc_num; + + inst = ops.instance (); + + if (inst == 0 || inst > 99) + fail << "invalid --instance value " << inst; + + offset = (tc_num - 1) * 100 + inst; // Controller URLs. // @@ -968,6 +1116,7 @@ try info << "toolchain num " << tc_num << info << "toolchain ver " << tc_ver.string () << info << "toolchain id " << tc_id << + info << "instance num " << inst << info << "CPU(s) " << ops.cpu () << info << "RAM(kB) " << ops.ram (); @@ -986,13 +1135,14 @@ try // 4. If a build task is returned, do it, upload the result, and go to #1 // (immediately). // - for (bool sleep (false);; ::sleep (sleep ? 60 : 0), sleep = false) + auto rand_sleep = [g = std::mt19937 (std::random_device {} ())] () mutable { - // Enumerate the machines. - // - auto mp (enumerate_machines (ops.machines ())); - bootstrapped_machine_manifests& ms (mp.first); - dir_paths& ds (mp.second); + return std::uniform_int_distribution (50, 60) (g); + }; + + for (unsigned int sleep (0);; ::sleep (sleep), sleep = 0) + { + bootstrapped_machines ms (enumerate_machines (ops.machines ())); // Prepare task request. // @@ -1004,10 +1154,12 @@ try machine_header_manifests {} }; - for (const bootstrapped_machine_manifest& m: ms) - tq.machines.emplace_back (m.machine.id, - m.machine.name, - m.machine.summary); + // Note: below we assume tq.size () == ms.size (). + // + for (const bootstrapped_machine& m: ms) + tq.machines.emplace_back (m.manifest.machine.id, + m.manifest.machine.name, + m.manifest.machine.summary); if (ops.dump_machines ()) { @@ -1019,13 +1171,17 @@ try if (tq.machines.empty ()) { - warn << "no build machines for toolchain " << tc_name; - sleep = true; + // Normally this means all the machines are locked so sleep a bit less. + // + sleep = rand_sleep () / 2; continue; } // Send task requests. // + // Note that we have to do it while holding the lock on all the machines + // since we don't know which machine we will need. + // string url; task_response_manifest tr; @@ -1131,22 +1287,22 @@ try if (tr.session.empty ()) // No task from any of the controllers. { l2 ([&]{trace << "no tasks from any controllers, sleeping";}); - sleep = true; + sleep = rand_sleep (); continue; } // We have a build task. // - // First find the index of the machine we were asked to use (and also - // verify it is one of those we sent). + // First find the index of the machine we were asked to use (and verify it + // is one of those we sent). Also unlock all the other machines. // - size_t i (0); - for (const machine_header_manifest& m: tq.machines) + size_t i (ms.size ()); + for (size_t j (0); j != ms.size (); ++j) { - if (m.name == tr.task->machine) - break; - - ++i; + if (tq.machines[j].name == tr.task->machine) + i = j; + else + ms[j].lock.unlock (); } if (i == ms.size ()) @@ -1174,10 +1330,12 @@ try if (!ops.trust ().empty ()) t.trust = ops.trust (); - const dir_path& d (ds[i]); // The - directory. - const bootstrapped_machine_manifest& m (ms[i]); + const dir_path& d (); // The - directory. + const bootstrapped_machine_manifest& m (); + + result_manifest r (perform_task (ms[i].path, ms[i].manifest, t)); - result_manifest r (perform_task (d, m, t)); + ms[i].lock.unlock (); // No need to hold the lock any longer. if (ops.dump_result ()) { @@ -1185,7 +1343,7 @@ try return 0; } - // Prepare answer to the private key challenge. + // Prepare the answer to the private key challenge. // optional> challenge; @@ -1211,7 +1369,8 @@ try catch (const system_error& e) { // The task response challenge is valid (verified by manifest parser), - // so there is something wrong with setup, and so the failure is fatal. + // so there must be something wrong with the setup and the failure is + // fatal. // fail << "unable to sign task response challenge: " << e; } diff --git a/bbot/agent/agent.hxx b/bbot/agent/agent.hxx index e5447dd..fd24151 100644 --- a/bbot/agent/agent.hxx +++ b/bbot/agent/agent.hxx @@ -23,10 +23,14 @@ namespace bbot extern standard_version tc_ver; // Toolchain version. extern string tc_id; // Toolchain id. + extern uint16_t inst; // Instance number. + extern string hname; // Our host name. extern uid_t uid; // Our effective user id. extern string uname; // Our effective user name. + extern uint16_t offset; // Agent offset. + // Random number generator (currently not MT-safe and limited to RAND_MAX). // size_t diff --git a/bbot/agent/machine.cxx b/bbot/agent/machine.cxx index 25b5278..df23fde 100644 --- a/bbot/agent/machine.cxx +++ b/bbot/agent/machine.cxx @@ -84,7 +84,7 @@ namespace bbot static string create_tap (const string& br, uint16_t port) { - string t ("tap" + to_string (tc_num)); + string t ("tap" + to_string (offset)); tracer trace ("create_tap", t.c_str ()); @@ -207,8 +207,13 @@ namespace bbot generate_mac ()), kvm ("kvm"), net (br, port), - vnc ("127.0.0.1:" + to_string (5900 + tc_num)), - monitor ("/tmp/" + tc_name + "-monitor") + // + // QEMU's -vnc option (see below) expects the port offset from 5900 + // rather than the absolute value. The low 5901+, 6001+, and 6101+ + // ports all look good collision-wise with anything useful. + // + vnc ("127.0.0.1:" + to_string (5900 + offset)), + monitor ("/tmp/monitor-" + tc_name + '-' + to_string (inst)) { tracer trace ("kvm_machine", md.string ().c_str ()); @@ -324,7 +329,7 @@ namespace bbot "-no-hpet", "-global", "kvm-pit.lost_tick_policy=discard", os, - "-vnc", "127.0.0.1:" + to_string (tc_num), // 5900 + tc_num + "-vnc", "127.0.0.1:" + to_string (offset), // 5900 + offset "-monitor", "unix:" + monitor.string () + ",server,nowait"); } diff --git a/bbot/machine-manifest.hxx b/bbot/machine-manifest.hxx index 7dbb038..3bb3443 100644 --- a/bbot/machine-manifest.hxx +++ b/bbot/machine-manifest.hxx @@ -114,8 +114,6 @@ namespace bbot void serialize (butl::manifest_serializer&) const; }; - - using bootstrapped_machine_manifests = vector; } #endif // BBOT_MACHINE_MANIFEST_HXX diff --git a/bbot/worker/worker.cli b/bbot/worker/worker.cli index ff287f0..eda79d7 100644 --- a/bbot/worker/worker.cli +++ b/bbot/worker/worker.cli @@ -47,6 +47,13 @@ namespace bbot bool --help {"Print usage information and exit."} bool --version {"Print version and exit."} + uint16_t --verbose = 1 + { + "", + "Set the diagnostics verbosity to between 0 and 6 with level 1 + being the default." + } + bool --bootstrap { "Perform the inital machine bootstrap insteading of building." @@ -76,13 +83,6 @@ namespace bbot specified, then the user's home directory is used." } - uint16_t --verbose = 1 - { - "", - "Set the diagnostics verbosity to between 0 and 6 with level 1 - being the default." - } - // Testing options. // string --tftp-host = "196.254.111.222" -- cgit v1.1