aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-04-06 12:02:08 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-04-06 12:02:08 +0200
commit6af0f124675b6741dc8f683df902628dfc7e8eb7 (patch)
tree8dfb973f5f4f756bd471844e01eb8c7a595987e7
parent4fb0c6eacbfcee14b36d09d3f89f665bd2c70f36 (diff)
Implement bbot agent monitoring
-rwxr-xr-xbuildos42
1 files changed, 28 insertions, 14 deletions
diff --git a/buildos b/buildos
index 893ccc6..b0095a0 100755
--- a/buildos
+++ b/buildos
@@ -640,9 +640,10 @@ function bbot_start () # <toolchain-name>
info "failed to stop bbot-agent@$tn service, assuming not running"
fi
+ # We may not be able to uninstall if we previously failed to build.
+ #
if ! bpkg uninstall "${vars[@]}" bbot; then
- info "failed to uninstall bbot agent"
- break
+ info "failed to uninstall bbot agent, assuming not installed"
fi
fi
@@ -692,19 +693,14 @@ function bbot_start () # <toolchain-name>
fi
# Start the service. With Type=simple start returns as soon as the process
- # has forked. To see if the service actually started we wait a bit and
- # check with status.
+ # has forked. To see if the service actually started is done as part of
+ # service monitoring.
#
if ! sudo systemctl start "bbot-agent@$tn"; then
info "failed to start bbot-agent@$tn service"
break
fi
- if ! sudo systemctl status "bbot-agent@$tn"; then
- info "failed to start bbot-agent@$tn service"
- break
- fi
-
r=0
break
done
@@ -779,7 +775,7 @@ EOF
#
info "bootstrapping $tn toolchain..."
- toolchain_bootstrap "$tn" 2>&1 | tee "$tr/bootstrap-$count.log" 1>&2
+ toolchain_bootstrap "$tn" 2>&1 | tee "$tr/toolchain-$count.log" 1>&2
if [ "${PIPESTATUS[0]}" -eq 0 ]; then
v="$(cat $tr/version)"
@@ -794,7 +790,7 @@ EOF
info "$s"
email "$s" <<EOF
-$tn.bootstrap_log: tftp://$hname/toolchain/$tn/bootstrap-$count.log
+$tn.toolchain_log: tftp://$hname/toolchain/$tn/toolchain-$count.log
EOF
fi
else
@@ -804,7 +800,7 @@ EOF
done
# If we have boostrapped all the toolchains, (re)build and (re)start their
- # bbot agents.
+ # bbot agents and then monitor them.
#
if [ "${#toolchain_names[@]}" -eq "${#toolchain_boots[@]}" ]; then
@@ -822,7 +818,25 @@ EOF
case "${PIPESTATUS[0]}" in
0)
rm -f "$tr/bbot-$count.log"
- continue # Nothing to do.
+
+ # Check if the service has failed.
+ #
+ if sudo systemctl is-failed --quiet "bbot-agent@$tn"; then
+ s="bbot-agent@$tn service has failed, stopping"
+
+ # Note: ignore errors.
+ #
+ sudo systemctl status "bbot-agent@$tn" 2>&1 | \
+ tee "$tr/bbot-$count.log" 1>&2
+
+ # Reset it so that we don't keep sending the log on each
+ # iteration. Note: ignore errors.
+ #
+ sudo systemctl reset-failed "bbot-agent@$tn" 2>&1 | \
+ tee -a "$tr/bbot-$count.log" 1>&2
+ else
+ continue # Nothing to do.
+ fi
;;
1)
s="re"
@@ -847,7 +861,7 @@ EOF
info "$s"
email "$s" <<EOF
-$tn.start_log: tftp://$hname/toolchain/$tn/bbot-$count.log
+$tn.bbot_log: tftp://$hname/toolchain/$tn/bbot-$count.log
EOF
done
fi