aboutsummaryrefslogtreecommitdiff
path: root/buildos
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-04-04 14:03:28 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-04-04 14:03:28 +0200
commit1fbd27f288cf31625cff567788aaa0be66caffbd (patch)
tree82436078a7d247f798fa4b2029a3a3dcf6cabe32 /buildos
parente3889e5ccde6ef7b8a1377ced008252c3a13831b (diff)
Implement bbot agent startup and monitoring
Diffstat (limited to 'buildos')
-rwxr-xr-xbuildos569
1 files changed, 387 insertions, 182 deletions
diff --git a/buildos b/buildos
index e9c6acd..2de20f9 100755
--- a/buildos
+++ b/buildos
@@ -95,7 +95,7 @@ function restart ()
sudo systemctl reboot
}
-# Toolchain-related funtions.
+# Process toolchains.
#
# Return the value of one of the toolchain_* variables for this toolchain.
@@ -106,172 +106,74 @@ function tc_value () # <toolchain-prefix> <variable>
echo "${!n}"
}
-# Calculate the file checksum using the shaNNNsum utility.
-#
-function tc_checksum () # <toolchain-prefix> <file>
-{
- "$(tc_value "$1" toolchain_csum)sum" -b "$2" | \
- sed -n -re 's/^([^ ]+) .+$/\1/p'
-}
-
-# Fetch a file from the sums file into $toolchain_root, verify its checksum,
-# and make a predictable name (without version) symlink.
-#
-function tc_fetch () # <toolchain-prefix> <line>
-{
- local s p f u l tp tu tr tv
-
- tp="$1"
+toolchain_names=()
+for tn in "${!toolchains[@]}"; do
+ tp="${toolchains["$tn"]}"
tu="$(tc_value "$tp" toolchain_url)"
- tr="$(tc_value "$tp" toolchain_root)"
- s="$(sed -n -re 's/^([^ ]+) .+$/\1/p' <<<"$2")" # Checksum.
- p="$(sed -n -re 's/^[^ ]+ \*([^ ]+)$/\1/p' <<<"$2")" # File path (relative).
- f="$(sed -n -re 's%^(.+/)?([^/]+)$%\2%p' <<<"$p")" # File name.
- u="$(sed -n -re 's%^(.+)/[^/]+$%\1%p' <<<"$tu")/$p" # File URL.
-
- if [ -z "$s" -o -z "$p" -o -z "$f" -o -z "$u" ]; then
- info "invalid sum line '$2'"
- return 1
- fi
-
- # Extract the version.
- #
- tv="$(tc_value "$tp" toolchain_ver)"
-
- if [ -z "$tv" ]; then
- tv="$(sed -n -re 's/build2-toolchain-(.+)\.tar.*/\1/p' <<<"$f")"
-
- if [ -z "$tv" ]; then
- info "unable to extract toolchain version from '$f'"
- return 1
- fi
-
- declare -g "${tp}toolchain_ver=$tv"
-
- info "toolchain version $tv"
- echo "$tv" >"$tr/version"
+ if [ -z "$tu" ]; then
+ continue
fi
- # Derive a predictable name link.
- #
- l="$(sed -n -re "s/^(.+)-$tv(.*)$/\1\2/p" <<<"$f")"
-
- if [ -z "$l" ]; then
- info "unable to derive predicatable name from '$f' and '$tv'"
- return 1
- fi
+ toolchain_names+=("$tn")
- # Fetch the file.
+ # The toolchain "sums" file (a list of SHA sums and relative file names, as
+ # produced by shaNNNsum). The first entry should always be build2-toolchain
+ # tar archive itself (which we use to figure out the version). Blank lines
+ # and lines that start with '#' are ignored.
#
- info "fetching $u [$l]"
+ tf="$(sed -n -re 's%^.+/([^/]+)$%\1%p' <<<"$tu")"
- if ! curl -f -L -s -S -o "$tr/$f" "$u"; then
- info "unable to fetch $u"
- return 1
- fi
+ declare "${tp}toolchain_file=$tf"
+ declare "${tp}toolchain_csum=$(sed -n -re 's%^.+\.([^.]+)$%\1%p' <<<"$tf")"
+ declare "${tp}toolchain_root=/build/tftp/toolchain/$tn"
+ declare "${tp}toolchain_ver="
- # Verify the checksum.
+ # If buildos.toolchain_trust was not specified, set it to "no" so that
+ # we don't prompt if the repository happens to be signed.
#
- info "verifying checksum for $f"
-
- local cs
- cs="$(tc_checksum "$tp" "$tr/$f")"
-
- if [ "$cs" != "$s" ]; then
- info "checksum mismatch for $u"
- info " expected: $s"
- info " calculated: $cs"
- return 1
+ if [ -z "$(tc_value "$tp" toolchain_trust)" ]; then
+ declare "${tp}toolchain_trust=no"
fi
+done
- # Make the link.
- #
- ln -s "$f" "$tr/$l"
-}
-
-# Bootstrap the toolchain.
+# Divide CPUs and memory (in kB) among the toolchains.
#
-function tc_bootstrap () # <toolchain-name>
-{
- local tn="$1"
- local tp="${toolchains["$tn"]}"
- local tr="$(tc_value "$tp" toolchain_root)"
- local tf="$(tc_value "$tp" toolchain_file)"
-
- # Fetch files according to the sums file. Skip empty lines and those that
- # start with '#'.
- #
- local l ls=()
-
- readarray -t ls < <(sed -e '/^\s*#/d;/^\s*$/d' "$tr/$tf")
-
- for l in "${ls[@]}"; do
- if ! tc_fetch "$tp" "$l"; then
- return 1 # Diagnostics has already been issued.
- fi
- done
-
- local tv="$(tc_value "$tp" toolchain_ver)" # Should be set by tc_fetch().
- local tt="$(tc_value "$tp" toolchain_trust)"
-
- # Bootstrap in /tmp/toolchains/$tn/, install to /build/toolchains/$tn/.
- #
- local wd="/tmp/toolchains/$tn"
- local id="/build/toolchains/$tn"
-
- mkdir -p "$wd"
- mkdir -p "$id"
-
- local r=1
-
- cd "$wd"
- while true; do # The "breakout loop".
-
- # Extract the toolchain.
- #
- if ! tar -xf "$tr/build2-toolchain.tar.xz"; then
- info "unable to extract $tr/build2-toolchain.tar.xz"
- break
- fi
-
- cd "build2-toolchain-$tv"
-
- # Bootstrap, stage, and install using the provided build.sh script.
- #
- if ! ./build.sh --install-dir "$id" --trust "$tt" g++; then
- info "failed to build $(pwd)"
- break
- fi
+# Reserve 4G of RAM for ourselves (rootfs, tmpfs).
+#
+mem_total="$(sed -n -re 's/^MemTotal: *([0-9]+) *kB$/\1/p' </proc/meminfo)"
+cpu_total="$(lscpu | sed -n -re 's/^CPU\(s\): *([0-9]+)$/\1/p')"
- r=0
- break
- done
- cd "$owd"
+mem_slice=$(("$mem_total" - 4 * 1024 * 1024))
+cpu_slice="$cpu_total"
- # Clean up.
- #
- rm -r "$wd"
+if [ "${#toolchain_names[@]}" -gt 1 ]; then
+ mem_slice=$(("$mem_slice" / "${#toolchain_names[@]}"))
+ cpu_slice=$(("$cpu_slice" / "${#toolchain_names[@]}"))
- return "$r"
-}
+ if [ "$cpu_slice" -eq 0 ]; then
+ cpu_slice=1
+ fi
+fi
# Print monitor configuration as email body.
#
function print ()
{
+ echo "cpu_total: $cpu_total"
+ echo "cpu_slice: $cpu_slice"
+
+ echo "mem_total: $mem_total kB"
+ echo "mem_slice: $mem_slice kB"
+ echo
+
echo "buildid: $buildid"
echo "buildid_url: $buildid_url"
echo
- for tn in "${!toolchains[@]}"; do
+ for tn in "${toolchain_names[@]}"; do
tp="${toolchains["$tn"]}"
tu="$(tc_value "$tp" toolchain_url)"
-
- if [ -z "$tu" ]; then
- continue
- fi
-
tt="$(tc_value "$tp" toolchain_trust)"
echo "$tn.toolchain_url: $tu"
@@ -286,38 +188,7 @@ if [ -z "$buildid_url" ]; then
info "no buildos.buildid_url specified, not monitoring for new os builds"
fi
-tc=
-for tn in "${!toolchains[@]}"; do
- tp="${toolchains["$tn"]}"
- tu="$(tc_value "$tp" toolchain_url)"
-
- if [ -z "$tu" ]; then
- continue
- fi
-
- tc="true"
-
- # The toolchain "sums" file (a list of SHA sums and relative file names, as
- # produced by shaNNNsum). The first entry should always be build2-toolchain
- # tar archive itself (which we use to figure out the version). Blank lines
- # and lines that start with '#' are ignored.
- #
- tf="$(sed -n -re 's%^.+/([^/]+)$%\1%p' <<<"$tu")"
-
- declare "${tp}toolchain_file=$tf"
- declare "${tp}toolchain_csum=$(sed -n -re 's%^.+\.([^.]+)$%\1%p' <<<"$tf")"
- declare "${tp}toolchain_root=/build/tftp/toolchains/$tn"
- declare "${tp}toolchain_ver="
-
- # If buildos.toolchain_trust was not specified, set it to "no" so that
- # we don't prompt if the repository happens to be signed.
- #
- if [ -z "$(tc_value "$tp" toolchain_trust)" ]; then
- declare "${tp}toolchain_trust=no"
- fi
-done
-
-if [ -z "$tc" ]; then
+if [ "${#toolchain_names[@]}" -eq 0 ]; then
info "no buildos.toolchain_url specified, not bootstrapping"
fi
@@ -400,7 +271,7 @@ for v in /build/machines/*; do
# <name>-<toolchain> (bootstrapped image)
#
f=
- for tn in "${!toolchains[@]}"; do
+ for tn in "${toolchain_names[@]}"; do
if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then
f="true"
break
@@ -471,20 +342,303 @@ if [ "${#diag[@]}" -gt 0 ]; then
fi
fi
+# Toolchain-related funtions.
+#
+
+# Calculate the file checksum using the shaNNNsum utility.
+#
+function tc_checksum () # <toolchain-prefix> <file>
+{
+ "$(tc_value "$1" toolchain_csum)sum" -b "$2" | \
+ sed -n -re 's/^([^ ]+) .+$/\1/p'
+}
+
+# Fetch a file from the sums file into $toolchain_root, verify its checksum,
+# and make a predictable name (without version) symlink.
+#
+function tc_fetch () # <toolchain-prefix> <line>
+{
+ local s p f u l tp tu tr tv
+
+ tp="$1"
+ tu="$(tc_value "$tp" toolchain_url)"
+ tr="$(tc_value "$tp" toolchain_root)"
+
+ s="$(sed -n -re 's/^([^ ]+) .+$/\1/p' <<<"$2")" # Checksum.
+ p="$(sed -n -re 's/^[^ ]+ \*([^ ]+)$/\1/p' <<<"$2")" # File path (relative).
+ f="$(sed -n -re 's%^(.+/)?([^/]+)$%\2%p' <<<"$p")" # File name.
+ u="$(sed -n -re 's%^(.+)/[^/]+$%\1%p' <<<"$tu")/$p" # File URL.
+
+ if [ -z "$s" -o -z "$p" -o -z "$f" -o -z "$u" ]; then
+ info "invalid sum line '$2'"
+ return 1
+ fi
+
+ # Extract the version.
+ #
+ tv="$(tc_value "$tp" toolchain_ver)"
+
+ if [ -z "$tv" ]; then
+ tv="$(sed -n -re 's/build2-toolchain-(.+)\.tar.*/\1/p' <<<"$f")"
+
+ if [ -z "$tv" ]; then
+ info "unable to extract toolchain version from '$f'"
+ return 1
+ fi
+
+ declare -g "${tp}toolchain_ver=$tv"
+
+ info "toolchain version $tv"
+ echo "$tv" >"$tr/version"
+ fi
+
+ # Derive a predictable name link.
+ #
+ l="$(sed -n -re "s/^(.+)-$tv(.*)$/\1\2/p" <<<"$f")"
+
+ if [ -z "$l" ]; then
+ info "unable to derive predicatable name from '$f' and '$tv'"
+ return 1
+ fi
+
+ # Fetch the file.
+ #
+ info "fetching $u [$l]"
+
+ if ! curl -f -L -s -S -o "$tr/$f" "$u"; then
+ info "unable to fetch $u"
+ return 1
+ fi
+
+ # Verify the checksum.
+ #
+ info "verifying checksum for $f"
+
+ local cs
+ cs="$(tc_checksum "$tp" "$tr/$f")"
+
+ if [ "$cs" != "$s" ]; then
+ info "checksum mismatch for $u"
+ info " expected: $s"
+ info " calculated: $cs"
+ return 1
+ fi
+
+ # Make the link.
+ #
+ ln -s "$f" "$tr/$l"
+}
+
+# Bootstrap the toolchain.
+#
+function tc_bootstrap () # <toolchain-name>
+{
+ local tn="$1"
+ local tp="${toolchains["$tn"]}"
+ local tr="$(tc_value "$tp" toolchain_root)"
+ local tf="$(tc_value "$tp" toolchain_file)"
+
+ # Fetch files according to the sums file. Skip empty lines and those that
+ # start with '#'.
+ #
+ local l ls=()
+
+ readarray -t ls < <(sed -e '/^\s*#/d;/^\s*$/d' "$tr/$tf")
+
+ for l in "${ls[@]}"; do
+ if ! tc_fetch "$tp" "$l"; then
+ return 1 # Diagnostics has already been issued.
+ fi
+ done
+
+ local tv="$(tc_value "$tp" toolchain_ver)" # Should be set by tc_fetch().
+ local tt="$(tc_value "$tp" toolchain_trust)"
+
+ # Bootstrap in /tmp/toolchain/$tn/, install to /build/toolchain/$tn/.
+ #
+ local wd="/tmp/toolchain/$tn"
+ local id="/build/toolchain/$tn"
+
+ mkdir -p "$wd"
+ mkdir -p "$id"
+
+ local r=1
+
+ cd "$wd"
+ while true; do # The "breakout loop".
+
+ # Extract the toolchain.
+ #
+ if ! tar -xf "$tr/build2-toolchain.tar.xz"; then
+ info "unable to extract $tr/build2-toolchain.tar.xz"
+ break
+ fi
+
+ cd "build2-toolchain-$tv"
+
+ # Bootstrap, stage, and install using the provided build.sh script.
+ #
+ if ! ./build.sh --install-dir "$id" --trust "$tt" g++; then
+ info "failed to build $(pwd)"
+ break
+ fi
+
+ cd "$wd"
+ rm -r "build2-toolchain-$tv"
+ mv -T build2-toolchain-* build2-toolchain # Strip version.
+
+ r=0
+ break
+ done
+ cd "$owd"
+
+ return "$r"
+}
+
+# Check if we need to build/start or rebuild/restart the bbot agent. Return
+# 0 if nothing to do, 1 for upgrades, 2 for first build, and 3 for failure.
+#
+function bb_check () # <toolchain-name>
+{
+ local tn="$1"
+
+ export PATH="/build/toolchain/$tn/bin:$PATH" # Running in subshell.
+
+ cd "/tmp/toolchain/$tn/build2-toolchain"
+
+ local r=3
+
+ local l_stat b_stat
+ while true; do # The "breakout loop".
+
+ l_stat="$(bpkg status libbbot)"
+ b_stat="$(bpkg status bbot)"
+
+ if ! bpkg fetch -q; then
+ info "failed to fetch package information"
+ break
+ fi
+
+ # See if this is the first time or if we need to upgrade.
+ #
+ if [ "$(cut -d ' ' -f 1 <<<"$b_stat")" = "configured" ]; then
+
+ # We assume that if anything has changed in the status line, then we
+ # have a new version.
+ #
+ if [ "$b_stat" = "$(bpkg status bbot)" -a \
+ "$l_stat" = "$(bpkg status libbbot)" ]; then
+ r=0
+ break
+ fi
+
+ r=1
+ break
+ fi
+
+ r=2
+ break
+ done
+ cd "$owd"
+
+ return "$r"
+}
+
+# Build and start bbot agent using the bpkg configuration created by
+# tc_bootstrap().
+#
+function bb_start () # <toolchain-name>
+{
+ local tn="$1"
+
+ local id="/build/bbot/$tn"
+ mkdir -p "$id"
+
+ # Install/uninstall vars.
+ #
+ local vars=(config.install.root="$id" config.bin.rpath="$id/lib")
+
+ export PATH="/build/toolchain/$tn/bin:$PATH" # Running in subshell.
+
+ cd "/tmp/toolchain/$tn/build2-toolchain"
+
+ local r=1
+
+ local b_word
+ while true; do # The "breakout loop".
+
+ b_word="$(bpkg status bbot | cut -d ' ' -f 1)"
+
+ # If upgrading, stop the service and uninstall.
+ #
+ if [ "$b_word" = "configured" ]; then
+
+ if ! sudo systemctl stop "bbot-agent@$tn"; then
+ info "failed to stop bbot-agent@$tn service, assuming not running"
+ fi
+
+ if ! bpkg uninstall "${vars[@]}" bbot; then
+ info "failed to uninstall bbot agent"
+ break
+ fi
+ fi
+
+ # Build and install the bbot agent.
+ #
+ if ! bpkg build --build-option --jobs --build-option "$cpu_slice" \
+ --yes libbbot bbot; then
+ info "failed to build bbot agent"
+ break
+ fi
+
+ if ! bpkg install "${vars[@]}" bbot; then
+ info "failed to install bbot agent"
+ break
+ fi
+
+ # Post-process and install systemd .service file. Note that we cannot use
+ # the systemd pattern machinery since each version of bbot can have its
+ # own version of the .service file.
+ #
+ sed -i -re "s/%[iI]/$tn/g" "$id/lib/systemd/system/bbot-agent@.service"
+ sudo ln -sf "$id/lib/systemd/system/bbot-agent@.service" \
+ "/usr/lib/systemd/system/bbot-agent@$tn.service"
+
+ # Start the service.
+ #
+ if ! sudo systemctl start "bbot-agent@$tn"; then
+ info "failed to start bbot-agent@$tn service"
+ break
+ fi
+
+ r=0
+ break
+ done
+ cd "$owd"
+
+ return "$r"
+}
+
+# Array of bootstrapped toolchains.
+#
+# The idea is to collect them until we bootstrap all of them and only then
+# start their bbot agents.
+#
+toolchain_boots=()
+
# Monitoring loop.
#
+count=0
while true; do
+ count=$(($count + 1))
+
# Check for toolchain changes. If this is the first run, bootstrap them.
#
- for tn in "${!toolchains[@]}"; do
+ for tn in "${toolchain_names[@]}"; do
tp="${toolchains["$tn"]}"
tu="$(tc_value "$tp" toolchain_url)"
- if [ -z "$tu" ]; then
- continue
- fi
-
tr="$(tc_value "$tp" toolchain_root)"
tf="$(tc_value "$tp" toolchain_file)"
p="$tr/$tf"
@@ -531,20 +685,22 @@ EOF
#
info "bootstrapping $tn toolchain..."
- tc_bootstrap "$tn" 2>&1 | tee "$tr/bootstrap.log" 1>&2
+ tc_bootstrap "$tn" 2>&1 | tee "$tr/bootstrap-$count.log" 1>&2
if [ "${PIPESTATUS[0]}" -eq 0 ]; then
v="$(cat $tr/version)"
declare "${tp}toolchain_ver=$v"
s="bootstrapped $tn toolchain $v"
+ toolchain_boots+=("$tn")
else
s="failed to bootstrap $tn toolchain, waiting for new version"
+ toolchain_boots+=("") # Skip.
fi
info "$s"
email "$s" <<EOF
-$tn.bootstrap_log: tftp://$hname/toolchains/$tn/bootstrap.log
+$tn.bootstrap_log: tftp://$hname/toolchain/$tn/bootstrap-$count.log
EOF
fi
else
@@ -553,6 +709,55 @@ EOF
fi
done
+ # If we have boostrapped all the toolchains, (re)build and (re)start their
+ # bbot agents.
+ #
+ if [ "${#toolchain_names[@]}" -eq "${#toolchain_boots[@]}" ]; then
+
+ for tn in "${toolchain_boots[@]}"; do
+
+ # Skip those that failed to bootstrap.
+ #
+ if [ -z "$tn" ]; then
+ continue
+ fi
+
+ s=
+ bb_check "$tn" 2>&1 | tee "$tr/bbot-$count.log" 1>&2
+
+ case "${PIPESTATUS[0]}" in
+ 0)
+ rm -f "$tr/bbot-$count.log"
+ continue # Nothing to do.
+ ;;
+ 1)
+ s="re"
+ ;&
+ 2)
+ info "${s}starting bbot-agent@$tn..."
+
+ # Append to the same log.
+ #
+ bb_start "$tn" 2>&1 | tee -a "$tr/bbot-$count.log" 1>&2
+
+ if [ "${PIPESTATUS[0]}" -eq 0 ]; then
+ s="${s}started bbot-agent@$tn"
+ else
+ s="failed to ${s}start bbot-agent@$tn, waiting for new version"
+ fi
+ ;;
+ *)
+ s="failed to fetch package information for $tn, will try again"
+ ;;
+ esac
+
+ info "$s"
+ email "$s" <<EOF
+$tn.start_log: tftp://$hname/toolchain/$tn/bbot-$count.log
+EOF
+ done
+ fi
+
# Check for OS changes.
#
if [ -n "$buildid_url" ]; then
@@ -574,5 +779,5 @@ EOF
fi
info "monitoring..."
- sleep 10
+ sleep 20
done