diff options
Diffstat (limited to 'bbot/machine.cxx')
-rw-r--r-- | bbot/machine.cxx | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/bbot/machine.cxx b/bbot/machine.cxx new file mode 100644 index 0000000..bebcc3f --- /dev/null +++ b/bbot/machine.cxx @@ -0,0 +1,294 @@ +// file : bbot/machine.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <bbot/machine> + +#include <sys/un.h> // sockaddr_un +#include <sys/socket.h> + +#include <unistd.h> // getuid() +#include <sys/types.h> // getuid() + +#include <cstdio> // snprintf() +#include <cstring> // strcpy() + +#include <bbot/agent> +#include <bbot/machine-manifest> + +using namespace std; +using namespace butl; + +namespace bbot +{ + static string + create_tap () + { + tracer trace ("create_tap"); + + string b ("br1"); // Use private bridge for now. + string t ("tap" + tc_num); + + auto uid (getuid ()); + + // First try to delete it in case there is one from a previous run. + // + run_exit (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); + + run (trace, "sudo", "ip", "tuntap", "add", t, "mode", "tap", "user", uid); + run (trace, "sudo", "ip", "link", "set", t, "up"); + //sleep (1); + run (trace, "sudo", "ip", "link", "set", t, "master", b); + + return t; + } + + static void + destroy_tap (const string& t) + { + tracer trace ("create_tap"); + run (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); + } + + static string + generate_mac () + { + // The last two bits of the first byte are special: bit 1 indicates a + // multicast address (which we don't want) while bit 1 -- local assignment + // (which we do want). + // + char r[6 * 2 + 5 + 1]; + snprintf (r, sizeof (r), + "%02x:%02x:%02x:%02x:%02x:%02x", + (genrand<uint8_t> () & 0xFE) | 0x02, + genrand<uint8_t> (), + genrand<uint8_t> (), + genrand<uint8_t> (), + genrand<uint8_t> (), + genrand<uint8_t> ()); + return r; + } + + class kvm_machine: public machine + { + public: + kvm_machine (const dir_path&, + const machine_manifest&, + const optional<string>& mac); + + virtual bool + shutdown () override; + + virtual void + forcedown () override; + + private: + bool + wait (size_t seconds); + + void + monitor_command (const string&); + + private: + path kvm; // Hypervisor binary. + string tap; // Tap network interface. + path monitor; // QEMU monitor UNIX socket. + process proc; + }; + + kvm_machine:: + kvm_machine (const dir_path& md, + const machine_manifest& mm, + const optional<string>& omac) + : machine (mm.mac ? *mm.mac : // Fixed mac from machine manifest. + omac ? *omac : // Generated mac from previous bootstrap. + generate_mac ()), + kvm ("kvm"), + tap (create_tap ()), + monitor ("/tmp/" + tc_name + "-monitor") + { + tracer trace ("kvm_machine"); + + if (sizeof (sockaddr_un::sun_path) <= monitor.size ()) + throw invalid_argument ("monitor unix socket path too long"); + + // Start the VM. + // + // Notes: + // + // 1. For now we let qemu calculate sockets/cores/threads from the + // total number of CPUs (i.e., threads). + // + // 2. echo system_powerdown | socat - UNIX-CONNECT:.../monitor + // + proc = run_io_start ( + trace, + fdnull (), + 2, + 2, + md, // Run from the machine's directory. + kvm, + "-boot", "c", // Boot from disk. + "-no-reboot", // Exit on VM reboot. + // + // Machine. + // + "-m", to_string (ops.ram () / 1024) + "M", + "-cpu", "host", + "-smp", ops.cpu (), + // + // Network. + // + "-device", "virtio-net-pci,netdev=net0,mac=" + mac, + "-netdev", "tap,id=net0,script=no,ifname=" + tap, + // + // Disk. + // + "-device", "virtio-scsi-pci,id=scsi", + "-device", "scsi-hd,drive=disk0", + "-drive", "if=none,id=disk0,format=raw,file=disk.img", + // + // VNC & monitor. + // + "-vnc", "localhost:" + tc_num, // 5900 + tc_num + "-monitor", "unix:" + monitor.string () + ",server,nowait"); + } + + // Connect to the QEMU monitor via the UNIX socket and send system_reset. + // You may be wondering why not system_powerdown? The reason is that while + // not all OS know how to power-down the machine, pretty much all of them + // can reboot. So combined with the -no-reboot option above, we get the + // same result in a more robust way. + // + // Note that this setup has one side effect: if the VM decided to reboot, + // say, during bootstrap, then we will interpret it as a shutdown. Current + // thinking saying this is good since we don't want our VMs to reboot + // uncontrollably for security and predictability reasons (e.g., we don't + // want Windows to decide to install updates -- this stuff should all be + // disabled during the VM preparation). + // + // Actually, this turned out not to be entirely accurate: reset appears to + // be a "hard reset" while powerdown causes a clean shutdown. So we use + // powerdown to implement shutdown() and reset/-no-reboot for implement + // forcedown(). + // + bool kvm_machine:: + shutdown () + { + monitor_command ("system_powerdown"); + + // Wait for up to 10 seconds for the machine to shutdown. + // + return wait (10); + } + + void kvm_machine:: + forcedown () + { + monitor_command ("system_reset"); + wait (size_t (~0)); // Wait indefinitely. + } + + bool kvm_machine:: + wait (size_t sec) + try + { + tracer trace ("kvm_machine::wait"); + + bool t; + for (size_t i (0); !(t = proc.try_wait ()) && i != sec; ++i) + sleep (1); + + if (t) + { + run_io_finish (trace, proc, kvm); + + destroy_tap (tap); + try_rmfile (monitor, true); // QEMU doesn't seem to remove it. + } + + return t; + } + catch (const process_error& e) + { + fail << "unable to execute " << kvm << ": " << e << endf; + } + + void kvm_machine:: + monitor_command (const string& c) + try + { + sockaddr_un addr; + addr.sun_family = AF_LOCAL; + strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor + + auto_fd sock (socket (AF_LOCAL, SOCK_STREAM, 0)); + + if (sock.get () == -1) + throw_system_error (errno); + + if (connect (sock.get (), + reinterpret_cast<sockaddr*> (&addr), + sizeof (addr)) == -1) + throw_system_error (errno); + + // Read until we get something. + // + auto readsome = [&sock] () + { + ifdstream ifs (move (sock), + fdstream_mode::non_blocking, + ostream::badbit); + + char buf[256]; + for (streamsize n (0), m (0); + n == 0 || m != 0; + m = ifs.readsome (buf, sizeof (buf) - 1)) + { + if (m != 0) + { + n += m; + + //buf[m] = '\0'; + //text << buf; + } + } + + sock = move (ifs.release ()); + }; + + // Read QEMU welcome. + // + readsome (); + + // Write our command. + // + { + ofdstream ofs (move (sock), fdstream_mode::blocking); + ofs << c << endl; + sock = move (ofs.release ()); + } + + // Read QEMU reply (may hit eof). + // + readsome (); + } + catch (const system_error& e) + { + fail << "unable to communicate with qemu monitor: " << e; + } + + unique_ptr<machine> + start_machine (const dir_path& md, + const machine_manifest& mm, + const optional<string>& mac) + { + switch (mm.type) + { + case machine_type::kvm: return make_unique<kvm_machine> (md, mm, mac); + case machine_type::nspawn: assert (false); //@@ TODO + } + + return nullptr; + } +} |