From b304f7b1c52550fef0d0a116e0155f247c16141c Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 31 Mar 2017 15:51:48 +0200 Subject: Establish build machine layout --- buildos | 125 +++++++++++++++++++++++++++++++++++++++++++++++- doc/manual.cli | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ init | 11 ++++- 3 files changed, 281 insertions(+), 3 deletions(-) diff --git a/buildos b/buildos index 7aa9a0e..ec2be0b 100755 --- a/buildos +++ b/buildos @@ -14,7 +14,8 @@ # owd="$(pwd)" trap "{ cd '$owd'; exit 1; }" ERR -set -o errtrace # Trap in functions. +set -o errtrace # Trap in functions. +shopt -s nullglob # Expand patterns than don't match to empty. # Note: diagnostics goes to stdout. # @@ -320,6 +321,128 @@ if [ -z "$tc" ]; then info "no buildos.toolchain_url specified, not bootstrapping" fi +# Cleanup /build/machines/ of any stray snapshots or deleted machines. +# +diag=() +fail= +for v in /build/machines/*; do + if [ ! -d "$v" ]; then + diag+=("$v: error: invalid volume") + fail="true" + continue + fi + + cd "$v" + + for m in *; do + if [ ! -d "$m" ]; then + diag+=("$v/$m: error: invalid machine") + fail="true" + continue + fi + + cd "$m" + + # If there is no current machine symlink, then delete the whole thing. + # + d= + if [ ! -L "$m" ]; then + d="true" + fi + + # Examine each machine subvolume. + # + for s in "$m"-*; do + if [ ! -d "$s" ]; then + diag+=("$v/$m/$s: error: invalid machine subvolume") + fail="true" + continue + fi + + # Unless we are deleting the whole thing, keep initial and bootstrapped + # (for known toolchains) subvolumes. + # + if [ -z "$d" ]; then + # - + # + if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then + continue + fi + + # - + # + f= + for tn in "${!toolchains[@]}"; do + if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then + f="true" + break + fi + done + + if [ -n "$f" ]; then + continue + fi + fi + + # This is either a stray working submodule or a bootsrapped subvolume + # for a toolchain that was deleted (or we are deleting everything). + # + if ! btrfs property set -ts "$s" ro false; then + diag+=("$v/$m/$s: error: unable to change subvolume property") + fail="true" + continue + fi + + if ! btrfs subvolume delete "$s"; then + diag+=("$v/$m/$s: error: unable to delete subvolume") + fail="true" + continue + fi + + diag+=("$v/$m/$s: info: deleted subvolume") + done + + cd "$v" + + # Delete the machine directory (which we expect to be now empty). + # + if [ -n "$d" ]; then + if ! rmdir "$m"; then + diag+=("$v/$m: error: unable to delete machine directory") + fail="true" + continue + fi + + diag+=("$v/$m: info: deleted machine directory") + fi + done + cd "$owd" +done + +function print_diag () +{ + local p + for p in "${diag[@]}"; do + echo " $p" + done +} + +if [ "${#diag[@]}" -gt 0 ]; then + if [ -z "$fail" ]; then + s="cleaned up entries in /build/machines/" + else + s="invalid entries in /build/machines/, halting" + fi + + print_diag | email "$s" + info "$s" && print_diag 2>&1 + + if [ -n "$fail" ]; then + info "correct and restart the monitor (systemctl restart buildos)" + exit 1 + fi +fi + # Monitoring loop. # while true; do diff --git a/doc/manual.cli b/doc/manual.cli index e20c151..e5f9b55 100644 --- a/doc/manual.cli +++ b/doc/manual.cli @@ -348,4 +348,152 @@ build2-mingw-x86_64-windows.tar.xz -> build2-mingw-0.4.0-x86_64-windows.tar.xz While the monitor itself only needs the \c{build2-toolchain} package, build machine toolchain bootstrap may require additional packages (which will be accessed via TFTP using predictable names). + +\h1#machines|Build Machines| + +At the top level, a machine storage volume (\l{#config-storage-machines +Machines}) contains machine directories, for example: + +\ +/build/machines/default/ +├── linux-gcc_6/ +└── windows-msvc_14/ +\ + +The layout inside a machine directory is as follows, where \c{} is +the machine name and \c{toolchain} is the toolchain name: + +\ +/ +├── -> -2 +├── -1/ +├── -2/ +├── -/ +└── --/ +\ + +The \c{-} entries are read-only \c{btrfs} subvolumes that contain +the initial (that is, \i{pre-bootstrap}) machine images. The numeric \c{} +part indicate the machine revision. + +The \c{} entry is a symbolic link to the \c{-} that is +currently in effect. + +The \c{-} entry is the bootstrapped machine image for +\c{}. It is created by cloning \c{} and then bootstrapping +the \c{build2} toolchain inside. + +The \c{--} entries are the temporary snapshots of +\c{-} created for building packages. + +A machine can be added, upgraded, or deleted on a live Build OS instance. +This needs to be done in a particular order to avoid inconsistencies and race +conditions. + +\h#machines-add|Adding a Machine| + +Let's assume you have a read-only \c{btrfs} \c{linux-gcc_6-1} subvolume on a +development host (we will call it \c{devel}) that contains the initial version +of our virtual machine. We would like to add it to the build host (running +Build OS, we will call it \c{build}) into the default machine volume +(\c{/build/machines/default/}). To achieve this in an atomic way we perform +the following steps: + +\ +# Create the machine directory. +# +build$ mkdir /build/machines/default/linux-gcc_6 + +# Send the machine subvolume to build host. +# +devel$ sudo btrfs send linux-gcc_6-1 | \ + ssh build@build sudo btrfs receive /build/machines/default/linux-gcc_6/ + +build$ cd /build/machines/default/linux-gcc_6 + +# Make user build the owner of the machine subvolume. +# +build$ sudo btrfs property set -ts linux-gcc_6-1 ro false +build$ sudo chown build:build linux-gcc_6-1 +build$ btrfs property set -ts linux-gcc_6-1 ro true + +# Make the subvolume the current machine. +# +build$ ln -s linux-gcc_6-1 linux-gcc_6 +\ + +\h#machines-upgade|Upgrading a Machine| + +Continuing with the example started in the previous section, let's assume we +have created \c{linux-gcc_6-2} as a snapshot of \c{linux-gcc_6-1} and have +made some modification to the virtual machine (all on the development +host). We now would like to switch to this new revision of our machine on the +build host. To achieve this in an atomic way we perform the following steps: + +\ +# Send the new machine subvolume to build host incrementally. +# +devel$ sudo btrfs send -p linux-gcc_6-1 linux-gcc_6-2 | \ + ssh build@build sudo btrfs receive /build/machines/default/linux-gcc_6/ + +build$ cd /build/machines/default/linux-gcc_6 + +# Make user build the owner of the new machine subvolume. +# +build$ sudo btrfs property set -ts linux-gcc_6-2 ro false +build$ sudo chown build:build linux-gcc_6-2 +build$ btrfs property set -ts linux-gcc_6-2 ro true + +# Switch the current machine atomically. +# +build$ ln -s linux-gcc_6-2 new-linux-gcc_6 +build$ mv -T new-linux-gcc_6 linux-gcc_6 + +# Remove the old machine subvolume (optional). +# +build$ btrfs property set -ts linux-gcc_6-1 ro false +build$ btrfs subvolume delete linux-gcc_6-1 +\ + +\h#machines-delete|Deleting a Machine| + +Continuing with the example started in the previous section, let's assume we +are no longer interested in the \c{linux-gcc_6} machine and would like to +delete it. This operation is complicated by the possibility of \c{bbot} +instances currently building with this machine. + +\ +build$ cd /build/machines/default/linux-gcc_6 + +# Delete the current machine symlink. +# +build$ rm linux-gcc_6 + +# Wait for all the linux-gcc_6-- subvolumes +# to disappear. +# +build$ for d in linux-gcc_6-*-*; do \ + while [ -d $d ]; do \ + echo \"waiting for $d\" && \ + sleep 10; \ + done; \ +done + +# Delete the initial and bootstrapped machine subvolume(s). +# +build$ for d in linux-gcc_6-*; do \ + btrfs property set -ts $d ro false && \ + btrfs subvolume delete $d; \ +done + +# Delete the machine directory. +# +build$ cd .. +build$ rmdir /build/machines/default/linux-gcc_6 +\ + +Note also that on reboot the Build OS monitor examines and cleans up +machine directories of any stray subvolumes. As a result, an alternative +approach would be to remove the current machine symlink and reboot the +build host. " diff --git a/init b/init index e29de66..c4fd7e0 100755 --- a/init +++ b/init @@ -338,9 +338,15 @@ while read l || [ -n "$l" ]; do info "mounting $d (buildos.$l) on $m" - mkdir -p "$m" o="defaults,noatime,nodiratime,user_subvol_rm_allowed" echo "$d $m btrfs $o 0 0" >>$fstab + + # Mount it and change the owner of the filesystem root. + # + mkdir -p "$m" + mount -t btrfs -o "$o" "$d" "$m" + chown build:build "$m" + continue fi done < <(lsblk --pairs --paths --output NAME,FSTYPE,LABEL) @@ -367,7 +373,8 @@ fi # compromized VMs will be able to upload to. # mkdir -p /build/tftp -o="nodev,noexec,nosuid,size=200M,mode=0755,uid=$(id -u build),gid=$(id -g build)" +o="nodev,noexec,nosuid,size=200M" +o+=",mode=0755,uid=$(id -u build),gid=$(id -g build)" echo "tmpfs /build/tftp tmpfs $o 0 0" >>$fstab # Configure Postfix. -- cgit v1.1