// file : bbot/machine.cxx -*- C++ -*- // copyright : Copyright (c) 2014-2017 Code Synthesis Ltd // license : MIT; see accompanying LICENSE file #include #include // sockaddr_un #include #include // getuid() #include // getuid() #include // snprintf() #include // strcpy() #include #include using namespace std; using namespace butl; namespace bbot { static string create_tap () { tracer trace ("create_tap"); string b ("br1"); // Use private bridge for now. string t ("tap" + tc_num); auto uid (getuid ()); // First try to delete it in case there is one from a previous run. // run_exit (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); run (trace, "sudo", "ip", "tuntap", "add", t, "mode", "tap", "user", uid); run (trace, "sudo", "ip", "link", "set", t, "up"); //sleep (1); run (trace, "sudo", "ip", "link", "set", t, "master", b); return t; } static void destroy_tap (const string& t) { tracer trace ("create_tap"); run (trace, "sudo", "ip", "tuntap", "delete", t, "mode", "tap"); } static string generate_mac () { // The last two bits of the first byte are special: bit 1 indicates a // multicast address (which we don't want) while bit 1 -- local assignment // (which we do want). // char r[6 * 2 + 5 + 1]; snprintf (r, sizeof (r), "%02x:%02x:%02x:%02x:%02x:%02x", (genrand () & 0xFE) | 0x02, genrand (), genrand (), genrand (), genrand (), genrand ()); return r; } class kvm_machine: public machine { public: kvm_machine (const dir_path&, const machine_manifest&, const optional& mac); virtual bool shutdown () override; virtual void forcedown () override; private: bool wait (size_t seconds); void monitor_command (const string&); private: path kvm; // Hypervisor binary. string tap; // Tap network interface. path monitor; // QEMU monitor UNIX socket. process proc; }; kvm_machine:: kvm_machine (const dir_path& md, const machine_manifest& mm, const optional& omac) : machine (mm.mac ? *mm.mac : // Fixed mac from machine manifest. omac ? *omac : // Generated mac from previous bootstrap. generate_mac ()), kvm ("kvm"), tap (create_tap ()), monitor ("/tmp/" + tc_name + "-monitor") { tracer trace ("kvm_machine"); if (sizeof (sockaddr_un::sun_path) <= monitor.size ()) throw invalid_argument ("monitor unix socket path too long"); // Start the VM. // // Notes: // // 1. For now we let qemu calculate sockets/cores/threads from the // total number of CPUs (i.e., threads). // // 2. echo system_powerdown | socat - UNIX-CONNECT:.../monitor // proc = run_io_start ( trace, fdnull (), 2, 2, md, // Run from the machine's directory. kvm, "-boot", "c", // Boot from disk. "-no-reboot", // Exit on VM reboot. // // Machine. // "-m", to_string (ops.ram () / 1024) + "M", "-cpu", "host", "-smp", ops.cpu (), // // Network. // "-device", "virtio-net-pci,netdev=net0,mac=" + mac, "-netdev", "tap,id=net0,script=no,ifname=" + tap, // // Disk. // "-device", "virtio-scsi-pci,id=scsi", "-device", "scsi-hd,drive=disk0", "-drive", "if=none,id=disk0,format=raw,file=disk.img", // // VNC & monitor. // "-vnc", "localhost:" + tc_num, // 5900 + tc_num "-monitor", "unix:" + monitor.string () + ",server,nowait"); } // Connect to the QEMU monitor via the UNIX socket and send system_reset. // You may be wondering why not system_powerdown? The reason is that while // not all OS know how to power-down the machine, pretty much all of them // can reboot. So combined with the -no-reboot option above, we get the // same result in a more robust way. // // Note that this setup has one side effect: if the VM decided to reboot, // say, during bootstrap, then we will interpret it as a shutdown. Current // thinking saying this is good since we don't want our VMs to reboot // uncontrollably for security and predictability reasons (e.g., we don't // want Windows to decide to install updates -- this stuff should all be // disabled during the VM preparation). // // Actually, this turned out not to be entirely accurate: reset appears to // be a "hard reset" while powerdown causes a clean shutdown. So we use // powerdown to implement shutdown() and reset/-no-reboot for implement // forcedown(). // bool kvm_machine:: shutdown () { monitor_command ("system_powerdown"); // Wait for up to 10 seconds for the machine to shutdown. // return wait (10); } void kvm_machine:: forcedown () { monitor_command ("system_reset"); wait (size_t (~0)); // Wait indefinitely. } bool kvm_machine:: wait (size_t sec) try { tracer trace ("kvm_machine::wait"); bool t; for (size_t i (0); !(t = proc.try_wait ()) && i != sec; ++i) sleep (1); if (t) { run_io_finish (trace, proc, kvm); destroy_tap (tap); try_rmfile (monitor, true); // QEMU doesn't seem to remove it. } return t; } catch (const process_error& e) { fail << "unable to execute " << kvm << ": " << e << endf; } void kvm_machine:: monitor_command (const string& c) try { sockaddr_un addr; addr.sun_family = AF_LOCAL; strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor auto_fd sock (socket (AF_LOCAL, SOCK_STREAM, 0)); if (sock.get () == -1) throw_system_error (errno); if (connect (sock.get (), reinterpret_cast (&addr), sizeof (addr)) == -1) throw_system_error (errno); // Read until we get something. // auto readsome = [&sock] () { ifdstream ifs (move (sock), fdstream_mode::non_blocking, ostream::badbit); char buf[256]; for (streamsize n (0), m (0); n == 0 || m != 0; m = ifs.readsome (buf, sizeof (buf) - 1)) { if (m != 0) { n += m; //buf[m] = '\0'; //text << buf; } } sock = move (ifs.release ()); }; // Read QEMU welcome. // readsome (); // Write our command. // { ofdstream ofs (move (sock), fdstream_mode::blocking); ofs << c << endl; sock = move (ofs.release ()); } // Read QEMU reply (may hit eof). // readsome (); } catch (const system_error& e) { fail << "unable to communicate with qemu monitor: " << e; } unique_ptr start_machine (const dir_path& md, const machine_manifest& mm, const optional& mac) { switch (mm.type) { case machine_type::kvm: return make_unique (md, mm, mac); case machine_type::nspawn: assert (false); //@@ TODO } return nullptr; } }