From 0a2c63de3c508b90d168604f2a5bc1345a12f4c9 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 28 Jan 2019 13:10:22 +0200 Subject: Fix race in QEMU shutdown --- bbot/agent/machine.cxx | 18 ++++++++++++++---- bbot/agent/machine.hxx | 8 ++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/bbot/agent/machine.cxx b/bbot/agent/machine.cxx index 69ef3c7..fdc11c0 100644 --- a/bbot/agent/machine.cxx +++ b/bbot/agent/machine.cxx @@ -378,8 +378,16 @@ namespace bbot } catch (const system_error& e) { - size_t t (0); - if (wait (t)) + // There is a window between QEMU closing the monitor socket and exiting + // so we wait but only briefly. + // + size_t t (seconds > 0 ? 1 : 0); + + seconds -= t; + bool r (wait (t)); + seconds += t; + + if (r) return true; fail << "unable to communicate with qemu monitor: " << e; @@ -391,13 +399,15 @@ namespace bbot void kvm_machine:: forcedown (bool fh) { + // Similar logic to shutdown(). + // try { monitor_command ("system_reset"); } catch (const system_error& e) { - size_t t (0); + size_t t (1); if (wait (t, fh)) return; @@ -450,7 +460,7 @@ namespace bbot } catch (const process_error& e) { - fail (fh) << "unable to execute " << kvm << ": " << e << endf; + fail (fh) << "unable to wait for " << kvm << ": " << e << endf; } } diff --git a/bbot/agent/machine.hxx b/bbot/agent/machine.hxx index b1ad874..04da80e 100644 --- a/bbot/agent/machine.hxx +++ b/bbot/agent/machine.hxx @@ -38,10 +38,10 @@ namespace bbot virtual void suspend (bool fail_hard = true) = 0; - // Wait for the machine to terminate up to the specified number of - // seconds. Update the timeout and return false if the machine is still - // running, true if the machine exited successfully, and throw failed - // otherwise. + // Wait for the machine to terminate up to the specified number of seconds + // (with 0 meaning don't wait). Update the timeout and return false if the + // machine is still running, true if the machine exited successfully, and + // throw failed otherwise. // virtual bool wait (size_t& seconds, bool fail_hard = true) = 0; -- cgit v1.1