From c8ace1ee0a6cab5fd4ea2f084ea436cfa513637d Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 13 Jul 2017 22:50:15 +0300 Subject: Make use of wildcards in buildfiles --- bbot/agent/machine.cxx | 474 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 bbot/agent/machine.cxx (limited to 'bbot/agent/machine.cxx') diff --git a/bbot/agent/machine.cxx b/bbot/agent/machine.cxx new file mode 100644 index 0000000..422c623 --- /dev/null +++ b/bbot/agent/machine.cxx @@ -0,0 +1,474 @@ +// file : bbot/agent/machine.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : TBC; see accompanying LICENSE file + +#include + +#include // sleep() + +#include // sockaddr_un +#include + +#include // snprintf() +#include // strcpy() + +#include +#include + +using namespace std; +using namespace butl; + +namespace bbot +{ + // Forward TFTP requests (UDP/69) coming from the machine to the specified + // port. + // + // This allows the machine to connect to any "unknown" IP (e.g., link-local + // 196.254.111.222) port 69 and end up being redirected to out TFTP server. + // + static void + iptables (tracer& t, + const char* a, + const string& tap, + const string& br, + uint16_t port, + bool ignore_errors = false) + { + string addr (iface_addr (br)); + + auto_fd fdn (ignore_errors ? fdnull () : nullfd); + int ofd (ignore_errors ? fdn.get () : 2); + + process_exit::code_type e; + + e = run_io_exit (t, 0, ofd, ofd, + "sudo", "iptables", + "-t", "nat", + a, "PREROUTING", + "-m", "udp", + "-p", "udp", + "-m", "physdev", + "-i", br, + "--physdev-in", tap, + "--dport", 69, + "-j", "DNAT", + "--to-destination", addr + ':' + to_string (port)); + + if (e != 0 && !ignore_errors) + fail << "process iptables terminated with non-zero exit code"; + + // Nobody really knows whether this is really needed (really)... + // + e = run_io_exit (t, 0, ofd, ofd, + "sudo", "iptables", + a, "FORWARD", + "-m", "udp", + "-p", "udp", + "-m", "physdev", + "-o", br, + "--physdev-out", tap, + "-d", addr, + "--dport", port, + "-m", "state", + "--state", "NEW,ESTABLISHED,RELATED", + "-j", "ACCEPT"); + + if (e != 0 && !ignore_errors) + fail << "process iptables terminated with non-zero exit code"; + } + + static string + create_tap (const string& br, uint16_t port) + { + string t ("tap" + to_string (tc_num)); + + tracer trace ("create_tap", t.c_str ()); + + // First try to delete it in case there is one from a previous run. + // + iptables (trace, "-D", t, br, port, true); // Ignore errors. + run_exit (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); + + run (trace, "sudo", "ip", "tuntap", "add", t, "mode", "tap", "user", uid); + run (trace, "sudo", "ip", "link", "set", t, "up"); + run (trace, "sudo", "ip", "link", "set", t, "master", br); + + iptables (trace, "-A", t, br, port); // Add. + + return t; + } + + static void + destroy_tap (const string& t, const string& br, uint16_t port) + { + tracer trace ("destroy_tap", t.c_str ()); + iptables (trace, "-D", t, br, port); // Delete. + run (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); + } + + class tap + { + public: + string iface; + + string bridge; // Bridge interface to which this tap belongs + uint16_t port; // UDP port to forward TFTP traffic to. + + tap (string b, uint16_t p) + : iface (create_tap (b, p)), bridge (move (b)), port (p) {} + + ~tap () + { + if (!iface.empty ()) + { + try {destroy ();} catch (...) {} + } + } + + void + destroy () + { + destroy_tap (iface, bridge, port); + iface.clear (); + } + }; + + static string + generate_mac () + { + // The last two bits of the first byte are special: bit 1 indicates a + // multicast address (which we don't want) while bit 1 -- local assignment + // (which we do want). + // + char r[6 * 2 + 5 + 1]; + snprintf (r, sizeof (r), + "%02x:%02x:%02x:%02x:%02x:%02x", + (genrand () & 0xFE) | 0x02, + genrand (), + genrand (), + genrand (), + genrand (), + genrand ()); + return r; + } + + class kvm_machine: public machine + { + public: + kvm_machine (const dir_path&, + const machine_manifest&, + const optional& mac, + const string& br_iface, + uint16_t tftp_port); + + virtual bool + shutdown (size_t& seconds) override; + + virtual void + forcedown (bool fail_hard) override; + + virtual void + suspend () override; + + bool + wait (size_t& seconds, bool fail_hard) override; + + using machine::wait; + + virtual void + print_info (diag_record&) override; + + private: + void + monitor_command (const string&, bool fail_hard = true); + + private: + path kvm; // Hypervisor binary. + tap net; // Tap network interface. + string vnc; // QEMU VNC TCP addr:port. + path monitor; // QEMU monitor UNIX socket. + process proc; + }; + + kvm_machine:: + kvm_machine (const dir_path& md, + const machine_manifest& mm, + const optional& omac, + const string& br, + uint16_t port) + : machine (mm.mac ? *mm.mac : // Fixed mac from machine manifest. + omac ? *omac : // Generated mac from previous bootstrap. + generate_mac ()), + kvm ("kvm"), + net (br, port), + vnc ("127.0.0.1:" + to_string (5900 + tc_num)), + monitor ("/tmp/" + tc_name + "-monitor") + { + tracer trace ("kvm_machine", md.string ().c_str ()); + + if (sizeof (sockaddr_un::sun_path) <= monitor.size ()) + throw invalid_argument ("monitor unix socket path too long"); + + // Map logical CPUs to sockets/cores/threads. Failed that, QEMU just makes + // it a machine with that number of sockets and some operating systems + // (like Windows) only can do two. + // + size_t cpu (ops.cpu ()); + + size_t sockets (cpu <= 8 ? 1 : cpu <= 64 ? 2 : 4); + size_t cores (cpu / sockets); + size_t threads (cores <= 4 ? 1 : 2); + cores /= threads; + + + // We probably don't want to commit all the available RAM to the VM since + // some of it could be used on the host side for caching, etc. So the + // heuristics that we will use is 4G or 1G per CPU, whichever is greater + // and the rest divide equally between the host and the VM. + // + size_t ram ((cpu < 4 ? 4 : cpu) * 1024 * 1024); // Kb. + + if (ram > ops.ram ()) + ram = ops.ram (); + else + ram += (ops.ram () - ram) / 2; + + // If we have options, use that instead of the default network and + // disk configuration. + // + strings os; + + if (mm.options) + { + os = mm.unquoted_options (); + + // Pre-process ifname=? and mac=?. + // + auto sub = [] (string& o, const char* s, const string& r) + { + size_t p (o.find (s)); + + if (p != string::npos) + { + p = o.find ('?', p + 1); + assert (p != string::npos); + o.replace (p, 1, r); + } + }; + + for (string& o: os) + { + sub (o, "ifname=?", net.iface); + sub (o, "mac=?", mac); + } + } + else + { + auto add = [&os] (string o, string v) + { + os.push_back (move (o)); + os.push_back (move (v)); + }; + + // Network. + // + add ("-netdev", "tap,id=net0,script=no,ifname=" + net.iface); + add ("-device", "virtio-net-pci,netdev=net0,mac=" + mac); + + // Disk. + // + add ("-drive", "if=none,id=disk0,file=disk.img,format=raw"); + add ("-device", "virtio-blk-pci,scsi=off,drive=disk0"); + + //"-drive", "if=none,id=disk0,format=raw,file=disk.img" + //"-device", "virtio-scsi-pci,id=scsi" + //"-device", "scsi-hd,drive=disk0" + } + + // Start the VM. + // + // Notes: + // + // 1. echo system_powerdown | socat - UNIX-CONNECT:.../monitor + // + proc = run_io_start ( + trace, + fdnull (), + 2, + 2, + md, // Run from the machine's directory. + kvm, + "-boot", "c", // Boot from disk. + "-no-reboot", // Exit on VM reboot. + "-m", to_string (ram / 1024) + "M", + "-cpu", "host", + "-smp", (to_string (cpu) + + ",sockets=" + to_string (sockets) + + ",cores=" + to_string (cores) + + ",threads=" + to_string (threads)), + os, + "-vnc", "127.0.0.1:" + to_string (tc_num), // 5900 + tc_num + "-monitor", "unix:" + monitor.string () + ",server,nowait"); + } + + // Connect to the QEMU monitor via the UNIX socket and send system_reset. + // You may be wondering why not system_powerdown? The reason is that while + // not all OS know how to power-down the machine, pretty much all of them + // can reboot. So combined with the -no-reboot option above, we get the + // same result in a more robust way. + // + // Note that this setup has one side effect: if the VM decided to reboot, + // say, during bootstrap, then we will interpret it as a shutdown. Current + // thinking saying this is good since we don't want our VMs to reboot + // uncontrollably for security and predictability reasons (e.g., we don't + // want Windows to decide to install updates -- this stuff should all be + // disabled during the VM preparation). + // + // Actually, this turned out not to be entirely accurate: reset appears to + // be a "hard reset" while powerdown causes a clean shutdown. So we use + // powerdown to implement shutdown() and reset/-no-reboot for implement + // forcedown(). + // + bool kvm_machine:: + shutdown (size_t& seconds) + { + monitor_command ("system_powerdown"); + + // Wait for up to the specified number if seconds for the machine to + // shutdown. + // + return wait (seconds); + } + + void kvm_machine:: + forcedown (bool fh) + { + monitor_command ("system_reset", fh); + wait (fh); + } + + void kvm_machine:: + suspend () + { + monitor_command ("stop"); + } + + void kvm_machine:: + print_info (diag_record& dr) + { + dr << info << "qemu pid: " << proc.id () + << info << "qemu vnc: " << vnc + << info << "qemu monitor: unix:" << monitor; + } + + bool kvm_machine:: + wait (size_t& sec, bool fh) + { + try + { + tracer trace ("kvm_machine::wait"); + + bool t; + for (; !(t = proc.try_wait ()) && sec != 0; --sec) + sleep (1); + + if (t) + { + run_io_finish (trace, proc, kvm, fh); + net.destroy (); //@@ Always fails hard. + try_rmfile (monitor, true); // QEMU doesn't seem to remove it. + } + + return t; + } + catch (const process_error& e) + { + fail (fh) << "unable to execute " << kvm << ": " << e << endf; + } + } + + void kvm_machine:: + monitor_command (const string& c, bool fh) + { + try + { + sockaddr_un addr; + addr.sun_family = AF_LOCAL; + strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor + + auto_fd sock (socket (AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0)); + + if (sock.get () == -1) + throw_system_error (errno); + + if (connect (sock.get (), + reinterpret_cast (&addr), + sizeof (addr)) == -1) + throw_system_error (errno); + + // Read until we get something. + // + auto readsome = [&sock] () + { + ifdstream ifs (move (sock), + fdstream_mode::non_blocking, + ostream::badbit); + + char buf[256]; + for (streamsize n (0), m (0); + n == 0 || m != 0; + m = ifs.readsome (buf, sizeof (buf) - 1)) + { + if (m != 0) + { + n += m; + + //buf[m] = '\0'; + //text << buf; + } + } + + sock = ifs.release (); + }; + + // Read QEMU welcome. + // + readsome (); + + // Write our command. + // + { + ofdstream ofs (move (sock), fdstream_mode::blocking); + ofs << c << endl; + sock = ofs.release (); + } + + // Read QEMU reply (may hit eof). + // + readsome (); + return; + } + catch (const system_error& e) + { + fail (fh) << "unable to communicate with qemu monitor: " << e; + } + } + + unique_ptr + start_machine (const dir_path& md, + const machine_manifest& mm, + const optional& mac, + const string& br_iface, + uint16_t tftp_port) + { + switch (mm.type) + { + case machine_type::kvm: + return make_unique (md, mm, mac, br_iface, tftp_port); + case machine_type::nspawn: + assert (false); //@@ TODO + } + + return nullptr; + } +} -- cgit v1.1