#!/bin/bash # Build OS monitor. It starts as a systemd service and performs the following # steps: # # 1. Bootstrap the build2 toolchain. # 2. Build and start bbot. # 3. Build and start bslave. # 4. Monitor for OS and toolchain changes and reboot if detected. # # @@ What will systemd do if we fail? Perhaps configure it to restart # us? Or not since we may hose the logs. # owd="$(pwd)" trap "{ cd '$owd'; exit 1; }" ERR set -o errtrace # Trap in functions. shopt -s nullglob # Expand patterns than don't match to empty. # Note: diagnostics goes to stdout. # function info () { echo "$*" 1>&2; } function error () { if [ "$#" -gt 0 ]; then info "$*"; fi exit 1 } info "starting build os monitor..." # Parse the kernel command line. This is complicated by the fact that the # values can be quoted, for example: # # foo='foo fox' # bar="bar 'box'" # # First we separete quoted variables and arguments with newlines (giving # priority to assignments). Then we replace whitespaces with newline on # lines that don't contain quites. Finally, clean up by removing blank # lines. # # Note: the same code as in init. # readarray -t cmdline < <(cat /proc/cmdline | \ sed -r -e "s/([^ ]+=)?('[^']*'|\"[^\"]*\")/\n\1\2\n/g" | \ sed -r -e "/['\"]/!s/ /\n/g" | sed -r -e '/^\s*$/d') # Enter all buildos variables as bash variables. # # Map of toolchain names (as specified in buildos..) to the # corresponding bash variable prefix. # declare -A toolchains toolchains["default"]="" for v in "${cmdline[@]}"; do var="$(sed -n -re 's/^buildos\.([^=]+)=.*$/\1/p' <<<"$v")" # Extract name. if [ -n "$var" ]; then val="$(sed -re 's/^[^=]+=(.*)$/\1/' <<<"$v")" # Extract value. val="$(sed -re "s/^('(.*)'|\"(.*)\")$/\2\3/" <<<"$val")" # Strip quoted. # If the variable contains a dot, then it is a toolchain variable. # if [[ "$var" == *.* ]]; then tn="$(sed -re 's/^[^.]+\.(.+)$/\1/' <<<"$var")" var="${tn}_$(sed -re 's/^([^.]+)\..+$/\1/' <<<"$var")" toolchains["$tn"]="${tn}_" fi declare "$var=$val" fi done hname="$(hostname)" # Get the build id. # buildid="$(sed -n -re 's/^BUILD_ID="(.+)"$/\1/p' /etc/os-release)" function email () # < { (echo -e "Subject: [$hname] $1\n"; cat -) | sendmail -i "$admin_email" } function restart () { sendmail -q # Flush mail queue. sleep 10 # Give any remaining mail chance to go through. sudo systemctl reboot } # Process toolchains. # # Return the value of one of the toolchain_* variables for this toolchain. # function tc_value () # { local n="${1}${2}" echo "${!n}" } toolchain_names=() for tn in "${!toolchains[@]}"; do tp="${toolchains["$tn"]}" tu="$(tc_value "$tp" toolchain_url)" if [ -z "$tu" ]; then continue fi toolchain_names+=("$tn") # The toolchain "sums" file (a list of SHA sums and relative file names, as # produced by shaNNNsum). The first entry should always be build2-toolchain # tar archive itself (which we use to figure out the version). Blank lines # and lines that start with '#' are ignored. # tf="$(sed -n -re 's%^.+/([^/]+)$%\1%p' <<<"$tu")" declare "${tp}toolchain_file=$tf" declare "${tp}toolchain_csum=$(sed -n -re 's%^.+\.([^.]+)$%\1%p' <<<"$tf")" declare "${tp}toolchain_root=/build/tftp/toolchain/$tn" declare "${tp}toolchain_ver=" # If buildos.toolchain_trust was not specified, set it to "no" so that # we don't prompt if the repository happens to be signed. # if [ -z "$(tc_value "$tp" toolchain_trust)" ]; then declare "${tp}toolchain_trust=no" fi done # Divide CPUs and memory (in kB) among the toolchains. # # Reserve 4G of RAM for ourselves (rootfs, tmpfs). # mem_total="$(sed -n -re 's/^MemTotal: *([0-9]+) *kB$/\1/p' - (current machine symlink) # if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then continue fi if [ ! -d "$s" ]; then diag+=("$v/$m/$s: error: invalid machine subvolume") fail="true" continue fi # Unless we are deleting the whole thing, keep initial and bootstrapped # (for known toolchains) subvolumes. # if [ "${#ps[@]}" -gt 0 ]; then # - . (initial image) # f= for p in "${ps[@]}"; do if [[ "$s" =~ ^"$m"-"$p"\.[0-9]+$ ]]; then f="true" break fi done if [ -n "$f" ]; then continue fi # - (bootstrapped image) # f= for tn in "${toolchain_names[@]}"; do if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then f="true" break fi done if [ -n "$f" ]; then continue fi fi # This is either a stray working submodule or a bootsrapped subvolume # for a toolchain that was deleted (or we are deleting everything). # if ! btrfs property set -ts "$s" ro false; then diag+=("$v/$m/$s: error: unable to change subvolume property") fail="true" continue fi if ! btrfs subvolume delete "$s"; then diag+=("$v/$m/$s: error: unable to delete subvolume") fail="true" continue fi diag+=("$v/$m/$s: info: deleted subvolume") done cd "$v" # Delete the machine directory (which we expect to be now empty). # if [ "${#ps[@]}" -eq 0 ]; then if ! rmdir "$m"; then diag+=("$v/$m: error: unable to delete machine directory") fail="true" continue fi diag+=("$v/$m: info: deleted machine directory") fi done cd "$owd" done function print_diag () { local p for p in "${diag[@]}"; do echo " $p" done } if [ "${#diag[@]}" -gt 0 ]; then if [ -z "$fail" ]; then s="cleaned up entries in /build/machines/" else s="invalid entries in /build/machines/, halting" fi print_diag | email "$s" info "$s" && print_diag 2>&1 if [ -n "$fail" ]; then info "correct and restart the monitor (systemctl restart buildos)" exit 1 fi fi # Toolchain-related funtions. # # Calculate the file checksum using the shaNNNsum utility. # function tc_checksum () # { "$(tc_value "$1" toolchain_csum)sum" -b "$2" | \ sed -n -re 's/^([^ ]+) .+$/\1/p' } # Fetch a file from the sums file into $toolchain_root, verify its checksum, # and make a predictable name (without version) symlink. # function tc_fetch () # { local s p f u l tp tu tr tv tp="$1" tu="$(tc_value "$tp" toolchain_url)" tr="$(tc_value "$tp" toolchain_root)" s="$(sed -n -re 's/^([^ ]+) .+$/\1/p' <<<"$2")" # Checksum. p="$(sed -n -re 's/^[^ ]+ \*([^ ]+)$/\1/p' <<<"$2")" # File path (relative). f="$(sed -n -re 's%^(.+/)?([^/]+)$%\2%p' <<<"$p")" # File name. u="$(sed -n -re 's%^(.+)/[^/]+$%\1%p' <<<"$tu")/$p" # File URL. if [ -z "$s" -o -z "$p" -o -z "$f" -o -z "$u" ]; then info "invalid sum line '$2'" return 1 fi # Extract the version. # tv="$(tc_value "$tp" toolchain_ver)" if [ -z "$tv" ]; then tv="$(sed -n -re 's/build2-toolchain-(.+)\.tar.*/\1/p' <<<"$f")" if [ -z "$tv" ]; then info "unable to extract toolchain version from '$f'" return 1 fi declare -g "${tp}toolchain_ver=$tv" info "toolchain version $tv" echo "$tv" >"$tr/version" fi # Derive a predictable name link. # l="$(sed -n -re "s/^(.+)-$tv(.*)$/\1\2/p" <<<"$f")" if [ -z "$l" ]; then info "unable to derive predicatable name from '$f' and '$tv'" return 1 fi # Fetch the file. # info "fetching $u [$l]" if ! curl -f -L -s -S -o "$tr/$f" "$u"; then info "unable to fetch $u" return 1 fi # Verify the checksum. # info "verifying checksum for $f" local cs cs="$(tc_checksum "$tp" "$tr/$f")" if [ "$cs" != "$s" ]; then info "checksum mismatch for $u" info " expected: $s" info " calculated: $cs" return 1 fi # Make the link. # ln -s "$f" "$tr/$l" } # Bootstrap the toolchain. # function tc_bootstrap () # { local tn="$1" local tp="${toolchains["$tn"]}" local tr="$(tc_value "$tp" toolchain_root)" local tf="$(tc_value "$tp" toolchain_file)" # Fetch files according to the sums file. Skip empty lines and those that # start with '#'. # local l ls=() readarray -t ls < <(sed -e '/^\s*#/d;/^\s*$/d' "$tr/$tf") for l in "${ls[@]}"; do if ! tc_fetch "$tp" "$l"; then return 1 # Diagnostics has already been issued. fi done local tv="$(tc_value "$tp" toolchain_ver)" # Should be set by tc_fetch(). local tt="$(tc_value "$tp" toolchain_trust)" # Bootstrap in /tmp/toolchain/$tn/, install to /build/toolchain/$tn/. # local wd="/tmp/toolchain/$tn" local id="/build/toolchain/$tn" mkdir -p "$wd" mkdir -p "$id" local r=1 cd "$wd" while true; do # The "breakout loop". # Extract the toolchain. # if ! tar -xf "$tr/build2-toolchain.tar.xz"; then info "unable to extract $tr/build2-toolchain.tar.xz" break fi cd "build2-toolchain-$tv" # Bootstrap, stage, and install using the provided build.sh script. # if ! ./build.sh --install-dir "$id" --trust "$tt" g++; then info "failed to build $(pwd)" break fi cd "$wd" rm -r "build2-toolchain-$tv" mv -T build2-toolchain-* build2-toolchain # Strip version. r=0 break done cd "$owd" return "$r" } # Check if we need to build/start or rebuild/restart the bbot agent. Return # 0 if nothing to do, 1 for upgrades, 2 for first build, and 3 for failure. # function bb_check () # { local tn="$1" export PATH="/build/toolchain/$tn/bin:$PATH" # Running in subshell. cd "/tmp/toolchain/$tn/build2-toolchain" local r=3 local l_stat b_stat while true; do # The "breakout loop". l_stat="$(bpkg status libbbot)" b_stat="$(bpkg status bbot)" if ! bpkg fetch -q; then info "failed to fetch package information" break fi # See if this is the first time or if we need to upgrade. # if [ "$(cut -d ' ' -f 1 <<<"$b_stat")" = "configured" ]; then # We assume that if anything has changed in the status line, then we # have a new version. # if [ "$b_stat" = "$(bpkg status bbot)" -a \ "$l_stat" = "$(bpkg status libbbot)" ]; then r=0 break fi r=1 break fi r=2 break done cd "$owd" return "$r" } # Build and start bbot agent using the bpkg configuration created by # tc_bootstrap(). # function bb_start () # { local tn="$1" local id="/build/bbot/$tn" mkdir -p "$id" # Install/uninstall vars. # local vars=(config.install.root="$id" config.bin.rpath="$id/lib") export PATH="/build/toolchain/$tn/bin:$PATH" # Running in subshell. cd "/tmp/toolchain/$tn/build2-toolchain" local r=1 local b_word while true; do # The "breakout loop". b_word="$(bpkg status bbot | cut -d ' ' -f 1)" # If upgrading, stop the service and uninstall. # if [ "$b_word" = "configured" ]; then if ! sudo systemctl stop "bbot-agent@$tn"; then info "failed to stop bbot-agent@$tn service, assuming not running" fi if ! bpkg uninstall "${vars[@]}" bbot; then info "failed to uninstall bbot agent" break fi fi # Build and install the bbot agent. # if ! bpkg build --build-option --jobs --build-option "$cpu_slice" \ --yes libbbot bbot; then info "failed to build bbot agent" break fi if ! bpkg install "${vars[@]}" bbot; then info "failed to install bbot agent" break fi # Post-process and install systemd .service file. Note that we cannot use # the systemd pattern machinery since each version of bbot can have its # own version of the .service file. # sed -i -re "s/%[iI]/$tn/g" "$id/lib/systemd/system/bbot-agent@.service" sudo ln -sf "$id/lib/systemd/system/bbot-agent@.service" \ "/usr/lib/systemd/system/bbot-agent@$tn.service" # Start the service. # if ! sudo systemctl start "bbot-agent@$tn"; then info "failed to start bbot-agent@$tn service" break fi r=0 break done cd "$owd" return "$r" } # Array of bootstrapped toolchains. # # The idea is to collect them until we bootstrap all of them and only then # start their bbot agents. # toolchain_boots=() # Monitoring loop. # count=0 while true; do count=$(($count + 1)) # Check for toolchain changes. If this is the first run, bootstrap them. # for tn in "${toolchain_names[@]}"; do tp="${toolchains["$tn"]}" tu="$(tc_value "$tp" toolchain_url)" tr="$(tc_value "$tp" toolchain_root)" tf="$(tc_value "$tp" toolchain_file)" p="$tr/$tf" mkdir -p "$tr" # Fetch the toolchain sums either to $p if this is the first time or to # $p.new if we are checking for changes. # if [ -e "$p" ]; then f="$p.new" else f="$p" fi if curl -f -L -s -S -o "$f" "$tu"; then # Take care of change detection. # if [ "$f" != "$p" ]; then ts="$(tc_value "$tp" toolchain_file_csum)" cs="$(tc_checksum "$tp" "$f")" if [ "$ts" != "$cs" ]; then email "rebooting because of new $tn toolchain" <&1 | tee "$tr/bootstrap-$count.log" 1>&2 if [ "${PIPESTATUS[0]}" -eq 0 ]; then v="$(cat $tr/version)" declare "${tp}toolchain_ver=$v" s="bootstrapped $tn toolchain $v" toolchain_boots+=("$tn") else s="failed to bootstrap $tn toolchain, waiting for new version" toolchain_boots+=("") # Skip. fi info "$s" email "$s" <&1 | tee "$tr/bbot-$count.log" 1>&2 case "${PIPESTATUS[0]}" in 0) rm -f "$tr/bbot-$count.log" continue # Nothing to do. ;; 1) s="re" ;& 2) info "${s}starting bbot-agent@$tn..." # Append to the same log. # bb_start "$tn" 2>&1 | tee -a "$tr/bbot-$count.log" 1>&2 if [ "${PIPESTATUS[0]}" -eq 0 ]; then s="${s}started bbot-agent@$tn" else s="failed to ${s}start bbot-agent@$tn, waiting for new version" fi ;; *) s="failed to fetch package information for $tn, will try again" ;; esac info "$s" email "$s" <