aboutsummaryrefslogtreecommitdiff
path: root/bbot/machine.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'bbot/machine.cxx')
-rw-r--r--bbot/machine.cxx294
1 files changed, 294 insertions, 0 deletions
diff --git a/bbot/machine.cxx b/bbot/machine.cxx
new file mode 100644
index 0000000..bebcc3f
--- /dev/null
+++ b/bbot/machine.cxx
@@ -0,0 +1,294 @@
+// file : bbot/machine.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <bbot/machine>
+
+#include <sys/un.h> // sockaddr_un
+#include <sys/socket.h>
+
+#include <unistd.h> // getuid()
+#include <sys/types.h> // getuid()
+
+#include <cstdio> // snprintf()
+#include <cstring> // strcpy()
+
+#include <bbot/agent>
+#include <bbot/machine-manifest>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+ static string
+ create_tap ()
+ {
+ tracer trace ("create_tap");
+
+ string b ("br1"); // Use private bridge for now.
+ string t ("tap" + tc_num);
+
+ auto uid (getuid ());
+
+ // First try to delete it in case there is one from a previous run.
+ //
+ run_exit (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap");
+
+ run (trace, "sudo", "ip", "tuntap", "add", t, "mode", "tap", "user", uid);
+ run (trace, "sudo", "ip", "link", "set", t, "up");
+ //sleep (1);
+ run (trace, "sudo", "ip", "link", "set", t, "master", b);
+
+ return t;
+ }
+
+ static void
+ destroy_tap (const string& t)
+ {
+ tracer trace ("create_tap");
+ run (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap");
+ }
+
+ static string
+ generate_mac ()
+ {
+ // The last two bits of the first byte are special: bit 1 indicates a
+ // multicast address (which we don't want) while bit 1 -- local assignment
+ // (which we do want).
+ //
+ char r[6 * 2 + 5 + 1];
+ snprintf (r, sizeof (r),
+ "%02x:%02x:%02x:%02x:%02x:%02x",
+ (genrand<uint8_t> () & 0xFE) | 0x02,
+ genrand<uint8_t> (),
+ genrand<uint8_t> (),
+ genrand<uint8_t> (),
+ genrand<uint8_t> (),
+ genrand<uint8_t> ());
+ return r;
+ }
+
+ class kvm_machine: public machine
+ {
+ public:
+ kvm_machine (const dir_path&,
+ const machine_manifest&,
+ const optional<string>& mac);
+
+ virtual bool
+ shutdown () override;
+
+ virtual void
+ forcedown () override;
+
+ private:
+ bool
+ wait (size_t seconds);
+
+ void
+ monitor_command (const string&);
+
+ private:
+ path kvm; // Hypervisor binary.
+ string tap; // Tap network interface.
+ path monitor; // QEMU monitor UNIX socket.
+ process proc;
+ };
+
+ kvm_machine::
+ kvm_machine (const dir_path& md,
+ const machine_manifest& mm,
+ const optional<string>& omac)
+ : machine (mm.mac ? *mm.mac : // Fixed mac from machine manifest.
+ omac ? *omac : // Generated mac from previous bootstrap.
+ generate_mac ()),
+ kvm ("kvm"),
+ tap (create_tap ()),
+ monitor ("/tmp/" + tc_name + "-monitor")
+ {
+ tracer trace ("kvm_machine");
+
+ if (sizeof (sockaddr_un::sun_path) <= monitor.size ())
+ throw invalid_argument ("monitor unix socket path too long");
+
+ // Start the VM.
+ //
+ // Notes:
+ //
+ // 1. For now we let qemu calculate sockets/cores/threads from the
+ // total number of CPUs (i.e., threads).
+ //
+ // 2. echo system_powerdown | socat - UNIX-CONNECT:.../monitor
+ //
+ proc = run_io_start (
+ trace,
+ fdnull (),
+ 2,
+ 2,
+ md, // Run from the machine's directory.
+ kvm,
+ "-boot", "c", // Boot from disk.
+ "-no-reboot", // Exit on VM reboot.
+ //
+ // Machine.
+ //
+ "-m", to_string (ops.ram () / 1024) + "M",
+ "-cpu", "host",
+ "-smp", ops.cpu (),
+ //
+ // Network.
+ //
+ "-device", "virtio-net-pci,netdev=net0,mac=" + mac,
+ "-netdev", "tap,id=net0,script=no,ifname=" + tap,
+ //
+ // Disk.
+ //
+ "-device", "virtio-scsi-pci,id=scsi",
+ "-device", "scsi-hd,drive=disk0",
+ "-drive", "if=none,id=disk0,format=raw,file=disk.img",
+ //
+ // VNC & monitor.
+ //
+ "-vnc", "localhost:" + tc_num, // 5900 + tc_num
+ "-monitor", "unix:" + monitor.string () + ",server,nowait");
+ }
+
+ // Connect to the QEMU monitor via the UNIX socket and send system_reset.
+ // You may be wondering why not system_powerdown? The reason is that while
+ // not all OS know how to power-down the machine, pretty much all of them
+ // can reboot. So combined with the -no-reboot option above, we get the
+ // same result in a more robust way.
+ //
+ // Note that this setup has one side effect: if the VM decided to reboot,
+ // say, during bootstrap, then we will interpret it as a shutdown. Current
+ // thinking saying this is good since we don't want our VMs to reboot
+ // uncontrollably for security and predictability reasons (e.g., we don't
+ // want Windows to decide to install updates -- this stuff should all be
+ // disabled during the VM preparation).
+ //
+ // Actually, this turned out not to be entirely accurate: reset appears to
+ // be a "hard reset" while powerdown causes a clean shutdown. So we use
+ // powerdown to implement shutdown() and reset/-no-reboot for implement
+ // forcedown().
+ //
+ bool kvm_machine::
+ shutdown ()
+ {
+ monitor_command ("system_powerdown");
+
+ // Wait for up to 10 seconds for the machine to shutdown.
+ //
+ return wait (10);
+ }
+
+ void kvm_machine::
+ forcedown ()
+ {
+ monitor_command ("system_reset");
+ wait (size_t (~0)); // Wait indefinitely.
+ }
+
+ bool kvm_machine::
+ wait (size_t sec)
+ try
+ {
+ tracer trace ("kvm_machine::wait");
+
+ bool t;
+ for (size_t i (0); !(t = proc.try_wait ()) && i != sec; ++i)
+ sleep (1);
+
+ if (t)
+ {
+ run_io_finish (trace, proc, kvm);
+
+ destroy_tap (tap);
+ try_rmfile (monitor, true); // QEMU doesn't seem to remove it.
+ }
+
+ return t;
+ }
+ catch (const process_error& e)
+ {
+ fail << "unable to execute " << kvm << ": " << e << endf;
+ }
+
+ void kvm_machine::
+ monitor_command (const string& c)
+ try
+ {
+ sockaddr_un addr;
+ addr.sun_family = AF_LOCAL;
+ strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor
+
+ auto_fd sock (socket (AF_LOCAL, SOCK_STREAM, 0));
+
+ if (sock.get () == -1)
+ throw_system_error (errno);
+
+ if (connect (sock.get (),
+ reinterpret_cast<sockaddr*> (&addr),
+ sizeof (addr)) == -1)
+ throw_system_error (errno);
+
+ // Read until we get something.
+ //
+ auto readsome = [&sock] ()
+ {
+ ifdstream ifs (move (sock),
+ fdstream_mode::non_blocking,
+ ostream::badbit);
+
+ char buf[256];
+ for (streamsize n (0), m (0);
+ n == 0 || m != 0;
+ m = ifs.readsome (buf, sizeof (buf) - 1))
+ {
+ if (m != 0)
+ {
+ n += m;
+
+ //buf[m] = '\0';
+ //text << buf;
+ }
+ }
+
+ sock = move (ifs.release ());
+ };
+
+ // Read QEMU welcome.
+ //
+ readsome ();
+
+ // Write our command.
+ //
+ {
+ ofdstream ofs (move (sock), fdstream_mode::blocking);
+ ofs << c << endl;
+ sock = move (ofs.release ());
+ }
+
+ // Read QEMU reply (may hit eof).
+ //
+ readsome ();
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to communicate with qemu monitor: " << e;
+ }
+
+ unique_ptr<machine>
+ start_machine (const dir_path& md,
+ const machine_manifest& mm,
+ const optional<string>& mac)
+ {
+ switch (mm.type)
+ {
+ case machine_type::kvm: return make_unique<kvm_machine> (md, mm, mac);
+ case machine_type::nspawn: assert (false); //@@ TODO
+ }
+
+ return nullptr;
+ }
+}