aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-04-06 09:49:34 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-04-06 09:49:34 +0200
commitc8ef98c0ea3f1ea8a7b920511021ff282b091592 (patch)
treee22a18f2eea476a997368ddebe45b6e2a7c9d55c
parent205bc8f9e657f6890c62dae8f5ceafe914f65d5f (diff)
Clean working snapshots before starting bbot agent
-rwxr-xr-xbuildos276
1 files changed, 179 insertions, 97 deletions
diff --git a/buildos b/buildos
index 512bf27..0547c97 100755
--- a/buildos
+++ b/buildos
@@ -192,140 +192,203 @@ if [ "${#toolchain_names[@]}" -eq 0 ]; then
info "no buildos.toolchain_url specified, not bootstrapping"
fi
-# Cleanup /build/machines/ of any stray snapshots or deleted machines.
+# Machines cleanup (/build/machines/).
#
diag=()
fail=
-for v in /build/machines/*; do
- if [ ! -d "$v" ]; then
- diag+=("$v: error: invalid volume")
- fail="true"
- continue
- fi
- cd "$v"
+function print_diag ()
+{
+ local p
+ for p in "${diag[@]}"; do
+ echo " $p"
+ done
+}
+
+# Iterate over all the machines and call a function (one of the below
+# machines_clean_*()) for each.
+#
+function machines_for () # <function> <function-args>...
+{
+ local f="$1"
+ shift
- for m in *; do
- if [ ! -d "$m" ]; then
- diag+=("$v/$m: error: invalid machine")
+ diag=()
+ fail=
+
+ local v m
+ for v in /build/machines/*; do
+ if [ ! -d "$v" ]; then
+ diag+=("$v: error: invalid volume")
fail="true"
continue
fi
- cd "$m"
+ cd "$v"
- # Collect current machine symlink's bootstrap protocol numbers. If there
- # are no current machine symlinks, then we delete the whole thing.
- #
- ps=()
- for s in "$m"-*; do
- if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then
+ for m in *; do
+ if [ ! -d "$m" ]; then
+ diag+=("$v/$m: error: invalid machine")
+ fail="true"
+ continue
+ fi
- if [ ! -L "$s" ]; then
- diag+=("$v/$m/$s: error: not a symlink")
- fail="true"
- fi
+ "$f" "$v" "$m" "$@"
- # Treat it as if it were a symlink even if its not. Failed that we
- # may try to delete the whole thing.
- #
- ps+=("$(sed -n -re 's/^.+-([0-9]+)$/\1/p' <<<"$s")")
- fi
done
- # Examine each machine subvolume.
- #
- for s in "$m"-*; do
+ cd "$owd"
+ done
+}
- # <name>-<P> (current machine symlink)
- #
- if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then
- continue
- fi
+function machines_clean_subvolume () # <subvolume-path>
+{
+ if ! btrfs property set -ts "$1" ro false; then
+ diag+=("$1: error: unable to change subvolume property")
+ fail="true"
+ return 1
+ fi
+
+ if ! btrfs subvolume delete "$1"; then
+ diag+=("$1: error: unable to delete subvolume")
+ fail="true"
+ return 1
+ fi
+}
+
+# Cleanup the <name>-<toolchain>-<xxx> entries for the specified toolchain
+# called before starting each toolchain.
+#
+function machines_clean_toolchain () # <volume-dir> <machine> <toolchain>
+{
+ local v="$1"
+ local m="$2"
+ local tn="$3"
+
+ cd "$m"
+
+ local s
+ for s in "$m"-"$tn"-*; do
+
+ if [ ! -d "$s" ]; then
+ diag+=("$v/$m/$s: error: invalid machine subvolume")
+ fail="true"
+ continue
+ fi
+
+ if machines_clean_subvolume "$v/$m/$s"; then
+ diag+=("$v/$m/$s: info: deleted stray toolchain working subvolume")
+ fi
+ done
- if [ ! -d "$s" ]; then
- diag+=("$v/$m/$s: error: invalid machine subvolume")
+ cd "$v"
+}
+
+# Cleanup stray snapshots or deleted machines. Called once during startup.
+#
+function machines_clean_stray () # <volume-dir> <machine>
+{
+ local v="$1"
+ local m="$2"
+
+ cd "$m"
+
+ # Collect current machine symlink's bootstrap protocol numbers. If there
+ # are no current machine symlinks, then we delete the whole thing.
+ #
+ local s ps=()
+ for s in "$m"-*; do
+ if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then
+
+ if [ ! -L "$s" ]; then
+ diag+=("$v/$m/$s: error: not a symlink")
fail="true"
- continue
fi
- # Unless we are deleting the whole thing, keep initial and bootstrapped
- # (for known toolchains) subvolumes.
+ # Treat it as if it were a symlink even if its not. Failed that we
+ # may try to delete the whole thing.
#
- if [ "${#ps[@]}" -gt 0 ]; then
+ ps+=("$(sed -n -re 's/^.+-([0-9]+)$/\1/p' <<<"$s")")
+ fi
+ done
- # <name>-<P>.<R> (initial image)
- #
- f=
- for p in "${ps[@]}"; do
- if [[ "$s" =~ ^"$m"-"$p"\.[0-9]+$ ]]; then
- f="true"
- break
- fi
- done
+ # Examine each machine subvolume.
+ #
+ for s in "$m"-*; do
- if [ -n "$f" ]; then
- continue
- fi
+ # <name>-<P> (current machine symlink)
+ #
+ if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then
+ continue
+ fi
- # <name>-<toolchain> (bootstrapped image)
- #
- f=
- for tn in "${toolchain_names[@]}"; do
- if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then
- f="true"
- break
- fi
- done
+ if [ ! -d "$s" ]; then
+ diag+=("$v/$m/$s: error: invalid machine subvolume")
+ fail="true"
+ continue
+ fi
- if [ -n "$f" ]; then
- continue
- fi
- fi
+ # Unless we are deleting the whole thing, keep initial and bootstrapped
+ # (for known toolchains) subvolumes.
+ #
+ if [ "${#ps[@]}" -gt 0 ]; then
- # This is either a stray working submodule or a bootsrapped subvolume
- # for a toolchain that was deleted (or we are deleting everything).
+ # <name>-<P>.<R> (initial image)
#
- if ! btrfs property set -ts "$s" ro false; then
- diag+=("$v/$m/$s: error: unable to change subvolume property")
- fail="true"
- continue
- fi
+ local p f=
+ for p in "${ps[@]}"; do
+ if [[ "$s" =~ ^"$m"-"$p"\.[0-9]+$ ]]; then
+ f="true"
+ break
+ fi
+ done
- if ! btrfs subvolume delete "$s"; then
- diag+=("$v/$m/$s: error: unable to delete subvolume")
- fail="true"
+ if [ -n "$f" ]; then
continue
fi
- diag+=("$v/$m/$s: info: deleted subvolume")
- done
-
- cd "$v"
+ # <name>-<toolchain> (bootstrapped image)
+ #
+ f=
+ local tn
+ for tn in "${toolchain_names[@]}"; do
+ if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then
+ f="true"
+ break
+ fi
+ done
- # Delete the machine directory (which we expect to be now empty).
- #
- if [ "${#ps[@]}" -eq 0 ]; then
- if ! rmdir "$m"; then
- diag+=("$v/$m: error: unable to delete machine directory")
- fail="true"
+ if [ -n "$f" ]; then
continue
fi
+ fi
- diag+=("$v/$m: info: deleted machine directory")
+ # This is either a stray working submodule or a bootsrapped subvolume
+ # for a toolchain that was deleted (or we are deleting everything).
+ #
+ if machines_clean_subvolume "$v/$m/$s"; then
+ diag+=("$v/$m/$s: info: deleted subvolume")
fi
done
- cd "$owd"
-done
-function print_diag ()
-{
- local p
- for p in "${diag[@]}"; do
- echo " $p"
- done
+ cd "$v"
+
+ # Delete the machine directory (which we expect to be now empty).
+ #
+ if [ "${#ps[@]}" -eq 0 ]; then
+ if rmdir "$m"; then
+ diag+=("$v/$m: info: deleted machine directory")
+ else
+ diag+=("$v/$m: error: unable to delete machine directory")
+ fail="true"
+ fi
+ fi
}
+# Do the initial cleanup.
+#
+machines_for machines_clean_stray
+
if [ "${#diag[@]}" -gt 0 ]; then
if [ -z "$fail" ]; then
s="cleaned up entries in /build/machines/"
@@ -334,7 +397,7 @@ if [ "${#diag[@]}" -gt 0 ]; then
fi
print_diag | email "$s"
- info "$s" && print_diag 2>&1
+ info "$s" && print_diag 1>&2
if [ -n "$fail" ]; then
info "correct and restart the monitor (systemctl restart buildos)"
@@ -609,6 +672,25 @@ function bb_start () # <toolchain-name>
sudo ln -sf "$id/lib/systemd/system/bbot-agent@.service" \
"/usr/lib/systemd/system/bbot-agent@$tn.service"
+ # Clean up any machine snapshots that might have been left behind.
+ #
+ machines_for machines_clean_toolchain "$tn"
+
+ if [ "${#diag[@]}" -gt 0 ]; then
+ if [ -z "$fail" ]; then
+ info "cleaned up entries in /build/machines/"
+ else
+ info "invalid entries in /build/machines/, not starting"
+ fi
+
+ print_diag 1>&2
+
+ if [ -n "$fail" ]; then
+ info "correct and start bbot agent (systemctl start bbot-agent@$tn)"
+ break
+ fi
+ fi
+
# Start the service. With Type=simple start returns as soon as the process
# has forked. To see if the service actually started we wait a bit and
# check with status.
@@ -748,7 +830,7 @@ EOF
2)
info "${s}starting bbot-agent@$tn..."
- # Append to the same log.
+ # Note: appending to the same log.
#
bb_start "$tn" 2>&1 | tee -a "$tr/bbot-$count.log" 1>&2