aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-05-01 11:53:04 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-05-01 11:53:04 +0200
commit8b896055d4d90b538211784cf9ad1bf335b20dc6 (patch)
tree755ca33df640a4a29e1d00c7e2e0265a4243bb94
parentc06600bc5e24389453d9a486c69d9746ae4fef23 (diff)
Soft-fail on VM forcedown failures
Since we've got the result this should be harmless. And should help with spurious KVM crashes with Mac OS guest.
-rw-r--r--bbot/agent.cxx5
-rw-r--r--bbot/machine.cxx145
-rw-r--r--bbot/machine.hxx10
-rw-r--r--bbot/utility.hxx4
-rw-r--r--bbot/utility.txx37
5 files changed, 113 insertions, 88 deletions
diff --git a/bbot/agent.cxx b/bbot/agent.cxx
index 6b59092..096bafe 100644
--- a/bbot/agent.cxx
+++ b/bbot/agent.cxx
@@ -659,9 +659,10 @@ try
soft_fail ("build terminated abnormally", false);
// Force the machine down (there is no need wasting time on clean
- // shutdown since the next step is to drop the snapshot).
+ // shutdown since the next step is to drop the snapshot). Also fail
+ // softly if things go badly.
//
- m->forcedown ();
+ try {m->forcedown (false);} catch (const failed&) {}
}
run_btrfs (trace, "subvolume", "delete", xp);
diff --git a/bbot/machine.cxx b/bbot/machine.cxx
index bf0c0bf..2f1e56b 100644
--- a/bbot/machine.cxx
+++ b/bbot/machine.cxx
@@ -165,13 +165,13 @@ namespace bbot
shutdown (size_t& seconds) override;
virtual void
- forcedown () override;
+ forcedown (bool fail_hard) override;
virtual void
suspend () override;
bool
- wait (size_t& seconds) override;
+ wait (size_t& seconds, bool fail_hard) override;
using machine::wait;
@@ -180,7 +180,7 @@ namespace bbot
private:
void
- monitor_command (const string&);
+ monitor_command (const string&, bool fail_hard = true);
private:
path kvm; // Hypervisor binary.
@@ -341,10 +341,10 @@ namespace bbot
}
void kvm_machine::
- forcedown ()
+ forcedown (bool fh)
{
- monitor_command ("system_reset");
- wait ();
+ monitor_command ("system_reset", fh);
+ wait (fh);
}
void kvm_machine::
@@ -362,91 +362,102 @@ namespace bbot
}
bool kvm_machine::
- wait (size_t& sec)
- try
+ wait (size_t& sec, bool fh)
{
- tracer trace ("kvm_machine::wait");
+ try
+ {
+ tracer trace ("kvm_machine::wait");
+
+ bool t;
+ for (; !(t = proc.try_wait ()) && sec != 0; --sec)
+ sleep (1);
- bool t;
- for (; !(t = proc.try_wait ()) && sec != 0; --sec)
- sleep (1);
+ if (t)
+ {
+ run_io_finish (trace, proc, kvm, fh);
+ net.destroy (); //@@ Always fails hard.
+ try_rmfile (monitor, true); // QEMU doesn't seem to remove it.
+ }
- if (t)
+ return t;
+ }
+ catch (const process_error& e)
{
- run_io_finish (trace, proc, kvm);
- net.destroy ();
- try_rmfile (monitor, true); // QEMU doesn't seem to remove it.
+ diag_record dr; if (fh) dr << fail; else dr << error;
+ dr << "unable to execute " << kvm << ": " << e;
}
- return t;
- }
- catch (const process_error& e)
- {
- fail << "unable to execute " << kvm << ": " << e << endf;
+ throw failed ();
}
void kvm_machine::
- monitor_command (const string& c)
- try
+ monitor_command (const string& c, bool fh)
{
- sockaddr_un addr;
- addr.sun_family = AF_LOCAL;
- strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor
+ try
+ {
+ sockaddr_un addr;
+ addr.sun_family = AF_LOCAL;
+ strcpy (addr.sun_path, monitor.string ().c_str ()); // Size check in ctor
- auto_fd sock (socket (AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0));
+ auto_fd sock (socket (AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0));
- if (sock.get () == -1)
- throw_system_error (errno);
+ if (sock.get () == -1)
+ throw_system_error (errno);
- if (connect (sock.get (),
- reinterpret_cast<sockaddr*> (&addr),
- sizeof (addr)) == -1)
- throw_system_error (errno);
+ if (connect (sock.get (),
+ reinterpret_cast<sockaddr*> (&addr),
+ sizeof (addr)) == -1)
+ throw_system_error (errno);
- // Read until we get something.
- //
- auto readsome = [&sock] ()
- {
- ifdstream ifs (move (sock),
- fdstream_mode::non_blocking,
- ostream::badbit);
-
- char buf[256];
- for (streamsize n (0), m (0);
- n == 0 || m != 0;
- m = ifs.readsome (buf, sizeof (buf) - 1))
+ // Read until we get something.
+ //
+ auto readsome = [&sock] ()
{
- if (m != 0)
+ ifdstream ifs (move (sock),
+ fdstream_mode::non_blocking,
+ ostream::badbit);
+
+ char buf[256];
+ for (streamsize n (0), m (0);
+ n == 0 || m != 0;
+ m = ifs.readsome (buf, sizeof (buf) - 1))
{
- n += m;
+ if (m != 0)
+ {
+ n += m;
- //buf[m] = '\0';
- //text << buf;
+ //buf[m] = '\0';
+ //text << buf;
+ }
}
- }
- sock = ifs.release ();
- };
+ sock = ifs.release ();
+ };
- // Read QEMU welcome.
- //
- readsome ();
+ // Read QEMU welcome.
+ //
+ readsome ();
- // Write our command.
- //
+ // Write our command.
+ //
+ {
+ ofdstream ofs (move (sock), fdstream_mode::blocking);
+ ofs << c << endl;
+ sock = ofs.release ();
+ }
+
+ // Read QEMU reply (may hit eof).
+ //
+ readsome ();
+ return;
+ }
+ catch (const system_error& e)
{
- ofdstream ofs (move (sock), fdstream_mode::blocking);
- ofs << c << endl;
- sock = ofs.release ();
+ diag_record dr; if (fh) dr << fail; else dr << error;
+ dr << "unable to communicate with qemu monitor: " << e;
}
- // Read QEMU reply (may hit eof).
- //
- readsome ();
- }
- catch (const system_error& e)
- {
- fail << "unable to communicate with qemu monitor: " << e;
+ throw failed ();
}
unique_ptr<machine>
diff --git a/bbot/machine.hxx b/bbot/machine.hxx
index 9ea0d48..c15d618 100644
--- a/bbot/machine.hxx
+++ b/bbot/machine.hxx
@@ -15,6 +15,8 @@ namespace bbot
// Note that if the machine is destroyed while it is still running, the
// destructor will block until the machine process terminates.
//
+ // Some functions can fail softly if the fail_hard argument is false.
+ //
class machine
{
public:
@@ -29,7 +31,7 @@ namespace bbot
// Force the machine down.
//
virtual void
- forcedown () = 0;
+ forcedown (bool fail_hard = true) = 0;
// Suspend the machine.
//
@@ -42,13 +44,13 @@ namespace bbot
// otherwise.
//
virtual bool
- wait (size_t& seconds) = 0;
+ wait (size_t& seconds, bool fail_hard = true) = 0;
bool
- wait ()
+ wait (bool fail_hard = true)
{
size_t sec (~0); // Wait indefinitely.
- return wait (sec);
+ return wait (sec, fail_hard);
}
// Print information about the machine (as info diagnostics) that can be
diff --git a/bbot/utility.hxx b/bbot/utility.hxx
index 50756dd..f6f0349 100644
--- a/bbot/utility.hxx
+++ b/bbot/utility.hxx
@@ -80,11 +80,11 @@ namespace bbot
template <typename P>
void
- run_io_finish (tracer&, process&, const P&);
+ run_io_finish (tracer&, process&, const P&, bool fail_hard = true);
template <typename P>
process_exit::code_type
- run_io_finish_exit (tracer&, process&, const P&);
+ run_io_finish_exit (tracer&, process&, const P&, bool fail_hard = true);
template <typename P, typename... A>
inline void
diff --git a/bbot/utility.txx b/bbot/utility.txx
index c35db33..519762b 100644
--- a/bbot/utility.txx
+++ b/bbot/utility.txx
@@ -44,7 +44,7 @@ namespace bbot
template <typename P>
process_exit::code_type
- run_io_finish_exit (tracer&, process& pr, const P& p)
+ run_io_finish_exit (tracer&, process& pr, const P& p, bool fh)
{
try
{
@@ -55,21 +55,32 @@ namespace bbot
if (e.normal ())
return e.code ();
- fail << "process " << p << " terminated abnormally: "
- << e.description () << (e.core () ? " (core dumped)" : "") << endf;
+ diag_record dr; if (fh) dr << fail; else dr << error;
+ dr << "process " << p << " terminated abnormally: "
+ << e.description () << (e.core () ? " (core dumped)" : "");
}
catch (const process_error& e)
{
- fail << "unable to execute " << p << ": " << e << endf;
+ diag_record dr; if (fh) dr << fail; else dr << error;
+ dr << "unable to execute " << p << ": " << e;
}
+
+ throw failed ();
}
template <typename P>
inline void
- run_io_finish (tracer& t, process& pr, const P& p)
+ run_io_finish (tracer& t, process& pr, const P& p, bool fh)
{
- if (run_io_finish_exit (t, pr, p) != 0)
- fail << "process " << p << " terminated with non-zero exit code";
+ if (run_io_finish_exit (t, pr, p, fh) == 0)
+ return;
+
+ {
+ diag_record dr; if (fh) dr << fail; else dr << error;
+ dr << "process " << p << " terminated with non-zero exit code";
+ }
+
+ throw failed ();
}
template <typename I, typename O, typename E, typename P, typename... A>
@@ -171,7 +182,7 @@ namespace bbot
parse_manifest (istream& is,
const string& name,
const char* what,
- bool hard,
+ bool fh,
bool ignore_unknown)
{
using namespace butl;
@@ -183,14 +194,14 @@ namespace bbot
}
catch (const manifest_parsing& e)
{
- diag_record dr; if (hard) dr << fail; else dr << error;
+ diag_record dr; if (fh) dr << fail; else dr << error;
dr << "invalid " << what << " manifest: "
<< name << ':' << e.line << ':' << e.column << ": " << e.description;
}
catch (const io_error& e)
{
- diag_record dr; if (hard) dr << fail; else dr << error;
+ diag_record dr; if (fh) dr << fail; else dr << error;
dr << "unable to read " << what << " manifest " << name << ": " << e;
}
@@ -227,7 +238,7 @@ namespace bbot
ostream& os,
const string& name,
const char* what,
- bool hard)
+ bool fh)
{
using namespace butl;
@@ -239,13 +250,13 @@ namespace bbot
}
catch (const manifest_serialization& e)
{
- diag_record dr; if (hard) dr << fail; else dr << error;
+ diag_record dr; if (fh) dr << fail; else dr << error;
dr << "invalid " << what << " manifest: " << e.description;
}
catch (const io_error& e)
{
- diag_record dr; if (hard) dr << fail; else dr << error;
+ diag_record dr; if (fh) dr << fail; else dr << error;
fail << "unable to write " << what << " manifest " << name << ": " << e;
}