From c8ef98c0ea3f1ea8a7b920511021ff282b091592 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 6 Apr 2017 09:49:34 +0200 Subject: Clean working snapshots before starting bbot agent --- buildos | 276 +++++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 179 insertions(+), 97 deletions(-) (limited to 'buildos') diff --git a/buildos b/buildos index 512bf27..0547c97 100755 --- a/buildos +++ b/buildos @@ -192,140 +192,203 @@ if [ "${#toolchain_names[@]}" -eq 0 ]; then info "no buildos.toolchain_url specified, not bootstrapping" fi -# Cleanup /build/machines/ of any stray snapshots or deleted machines. +# Machines cleanup (/build/machines/). # diag=() fail= -for v in /build/machines/*; do - if [ ! -d "$v" ]; then - diag+=("$v: error: invalid volume") - fail="true" - continue - fi - cd "$v" +function print_diag () +{ + local p + for p in "${diag[@]}"; do + echo " $p" + done +} + +# Iterate over all the machines and call a function (one of the below +# machines_clean_*()) for each. +# +function machines_for () # ... +{ + local f="$1" + shift - for m in *; do - if [ ! -d "$m" ]; then - diag+=("$v/$m: error: invalid machine") + diag=() + fail= + + local v m + for v in /build/machines/*; do + if [ ! -d "$v" ]; then + diag+=("$v: error: invalid volume") fail="true" continue fi - cd "$m" + cd "$v" - # Collect current machine symlink's bootstrap protocol numbers. If there - # are no current machine symlinks, then we delete the whole thing. - # - ps=() - for s in "$m"-*; do - if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then + for m in *; do + if [ ! -d "$m" ]; then + diag+=("$v/$m: error: invalid machine") + fail="true" + continue + fi - if [ ! -L "$s" ]; then - diag+=("$v/$m/$s: error: not a symlink") - fail="true" - fi + "$f" "$v" "$m" "$@" - # Treat it as if it were a symlink even if its not. Failed that we - # may try to delete the whole thing. - # - ps+=("$(sed -n -re 's/^.+-([0-9]+)$/\1/p' <<<"$s")") - fi done - # Examine each machine subvolume. - # - for s in "$m"-*; do + cd "$owd" + done +} - # -

(current machine symlink) - # - if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then - continue - fi +function machines_clean_subvolume () # +{ + if ! btrfs property set -ts "$1" ro false; then + diag+=("$1: error: unable to change subvolume property") + fail="true" + return 1 + fi + + if ! btrfs subvolume delete "$1"; then + diag+=("$1: error: unable to delete subvolume") + fail="true" + return 1 + fi +} + +# Cleanup the -- entries for the specified toolchain +# called before starting each toolchain. +# +function machines_clean_toolchain () # +{ + local v="$1" + local m="$2" + local tn="$3" + + cd "$m" + + local s + for s in "$m"-"$tn"-*; do + + if [ ! -d "$s" ]; then + diag+=("$v/$m/$s: error: invalid machine subvolume") + fail="true" + continue + fi + + if machines_clean_subvolume "$v/$m/$s"; then + diag+=("$v/$m/$s: info: deleted stray toolchain working subvolume") + fi + done - if [ ! -d "$s" ]; then - diag+=("$v/$m/$s: error: invalid machine subvolume") + cd "$v" +} + +# Cleanup stray snapshots or deleted machines. Called once during startup. +# +function machines_clean_stray () # +{ + local v="$1" + local m="$2" + + cd "$m" + + # Collect current machine symlink's bootstrap protocol numbers. If there + # are no current machine symlinks, then we delete the whole thing. + # + local s ps=() + for s in "$m"-*; do + if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then + + if [ ! -L "$s" ]; then + diag+=("$v/$m/$s: error: not a symlink") fail="true" - continue fi - # Unless we are deleting the whole thing, keep initial and bootstrapped - # (for known toolchains) subvolumes. + # Treat it as if it were a symlink even if its not. Failed that we + # may try to delete the whole thing. # - if [ "${#ps[@]}" -gt 0 ]; then + ps+=("$(sed -n -re 's/^.+-([0-9]+)$/\1/p' <<<"$s")") + fi + done - # -

. (initial image) - # - f= - for p in "${ps[@]}"; do - if [[ "$s" =~ ^"$m"-"$p"\.[0-9]+$ ]]; then - f="true" - break - fi - done + # Examine each machine subvolume. + # + for s in "$m"-*; do - if [ -n "$f" ]; then - continue - fi + # -

(current machine symlink) + # + if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then + continue + fi - # - (bootstrapped image) - # - f= - for tn in "${toolchain_names[@]}"; do - if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then - f="true" - break - fi - done + if [ ! -d "$s" ]; then + diag+=("$v/$m/$s: error: invalid machine subvolume") + fail="true" + continue + fi - if [ -n "$f" ]; then - continue - fi - fi + # Unless we are deleting the whole thing, keep initial and bootstrapped + # (for known toolchains) subvolumes. + # + if [ "${#ps[@]}" -gt 0 ]; then - # This is either a stray working submodule or a bootsrapped subvolume - # for a toolchain that was deleted (or we are deleting everything). + # -

. (initial image) # - if ! btrfs property set -ts "$s" ro false; then - diag+=("$v/$m/$s: error: unable to change subvolume property") - fail="true" - continue - fi + local p f= + for p in "${ps[@]}"; do + if [[ "$s" =~ ^"$m"-"$p"\.[0-9]+$ ]]; then + f="true" + break + fi + done - if ! btrfs subvolume delete "$s"; then - diag+=("$v/$m/$s: error: unable to delete subvolume") - fail="true" + if [ -n "$f" ]; then continue fi - diag+=("$v/$m/$s: info: deleted subvolume") - done - - cd "$v" + # - (bootstrapped image) + # + f= + local tn + for tn in "${toolchain_names[@]}"; do + if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then + f="true" + break + fi + done - # Delete the machine directory (which we expect to be now empty). - # - if [ "${#ps[@]}" -eq 0 ]; then - if ! rmdir "$m"; then - diag+=("$v/$m: error: unable to delete machine directory") - fail="true" + if [ -n "$f" ]; then continue fi + fi - diag+=("$v/$m: info: deleted machine directory") + # This is either a stray working submodule or a bootsrapped subvolume + # for a toolchain that was deleted (or we are deleting everything). + # + if machines_clean_subvolume "$v/$m/$s"; then + diag+=("$v/$m/$s: info: deleted subvolume") fi done - cd "$owd" -done -function print_diag () -{ - local p - for p in "${diag[@]}"; do - echo " $p" - done + cd "$v" + + # Delete the machine directory (which we expect to be now empty). + # + if [ "${#ps[@]}" -eq 0 ]; then + if rmdir "$m"; then + diag+=("$v/$m: info: deleted machine directory") + else + diag+=("$v/$m: error: unable to delete machine directory") + fail="true" + fi + fi } +# Do the initial cleanup. +# +machines_for machines_clean_stray + if [ "${#diag[@]}" -gt 0 ]; then if [ -z "$fail" ]; then s="cleaned up entries in /build/machines/" @@ -334,7 +397,7 @@ if [ "${#diag[@]}" -gt 0 ]; then fi print_diag | email "$s" - info "$s" && print_diag 2>&1 + info "$s" && print_diag 1>&2 if [ -n "$fail" ]; then info "correct and restart the monitor (systemctl restart buildos)" @@ -609,6 +672,25 @@ function bb_start () # sudo ln -sf "$id/lib/systemd/system/bbot-agent@.service" \ "/usr/lib/systemd/system/bbot-agent@$tn.service" + # Clean up any machine snapshots that might have been left behind. + # + machines_for machines_clean_toolchain "$tn" + + if [ "${#diag[@]}" -gt 0 ]; then + if [ -z "$fail" ]; then + info "cleaned up entries in /build/machines/" + else + info "invalid entries in /build/machines/, not starting" + fi + + print_diag 1>&2 + + if [ -n "$fail" ]; then + info "correct and start bbot agent (systemctl start bbot-agent@$tn)" + break + fi + fi + # Start the service. With Type=simple start returns as soon as the process # has forked. To see if the service actually started we wait a bit and # check with status. @@ -748,7 +830,7 @@ EOF 2) info "${s}starting bbot-agent@$tn..." - # Append to the same log. + # Note: appending to the same log. # bb_start "$tn" 2>&1 | tee -a "$tr/bbot-$count.log" 1>&2 -- cgit v1.1