aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-03-31 15:51:48 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-03-31 15:51:48 +0200
commitb304f7b1c52550fef0d0a116e0155f247c16141c (patch)
tree33c06f702541a8a58f2dbe5138ab88eaa0facddc
parent5461c13bdcace38ce40494acde0e21d2cb3c9081 (diff)
Establish build machine layout
-rwxr-xr-xbuildos125
-rw-r--r--doc/manual.cli148
-rwxr-xr-xinit11
3 files changed, 281 insertions, 3 deletions
diff --git a/buildos b/buildos
index 7aa9a0e..ec2be0b 100755
--- a/buildos
+++ b/buildos
@@ -14,7 +14,8 @@
#
owd="$(pwd)"
trap "{ cd '$owd'; exit 1; }" ERR
-set -o errtrace # Trap in functions.
+set -o errtrace # Trap in functions.
+shopt -s nullglob # Expand patterns than don't match to empty.
# Note: diagnostics goes to stdout.
#
@@ -320,6 +321,128 @@ if [ -z "$tc" ]; then
info "no buildos.toolchain_url specified, not bootstrapping"
fi
+# Cleanup /build/machines/ of any stray snapshots or deleted machines.
+#
+diag=()
+fail=
+for v in /build/machines/*; do
+ if [ ! -d "$v" ]; then
+ diag+=("$v: error: invalid volume")
+ fail="true"
+ continue
+ fi
+
+ cd "$v"
+
+ for m in *; do
+ if [ ! -d "$m" ]; then
+ diag+=("$v/$m: error: invalid machine")
+ fail="true"
+ continue
+ fi
+
+ cd "$m"
+
+ # If there is no current machine symlink, then delete the whole thing.
+ #
+ d=
+ if [ ! -L "$m" ]; then
+ d="true"
+ fi
+
+ # Examine each machine subvolume.
+ #
+ for s in "$m"-*; do
+ if [ ! -d "$s" ]; then
+ diag+=("$v/$m/$s: error: invalid machine subvolume")
+ fail="true"
+ continue
+ fi
+
+ # Unless we are deleting the whole thing, keep initial and bootstrapped
+ # (for known toolchains) subvolumes.
+ #
+ if [ -z "$d" ]; then
+ # <name>-<N>
+ #
+ if [[ "$s" =~ ^"$m"-[0-9]+$ ]]; then
+ continue
+ fi
+
+ # <name>-<toolchain>
+ #
+ f=
+ for tn in "${!toolchains[@]}"; do
+ if [[ "$s" =~ ^"$m"-"$tn"$ ]]; then
+ f="true"
+ break
+ fi
+ done
+
+ if [ -n "$f" ]; then
+ continue
+ fi
+ fi
+
+ # This is either a stray working submodule or a bootsrapped subvolume
+ # for a toolchain that was deleted (or we are deleting everything).
+ #
+ if ! btrfs property set -ts "$s" ro false; then
+ diag+=("$v/$m/$s: error: unable to change subvolume property")
+ fail="true"
+ continue
+ fi
+
+ if ! btrfs subvolume delete "$s"; then
+ diag+=("$v/$m/$s: error: unable to delete subvolume")
+ fail="true"
+ continue
+ fi
+
+ diag+=("$v/$m/$s: info: deleted subvolume")
+ done
+
+ cd "$v"
+
+ # Delete the machine directory (which we expect to be now empty).
+ #
+ if [ -n "$d" ]; then
+ if ! rmdir "$m"; then
+ diag+=("$v/$m: error: unable to delete machine directory")
+ fail="true"
+ continue
+ fi
+
+ diag+=("$v/$m: info: deleted machine directory")
+ fi
+ done
+ cd "$owd"
+done
+
+function print_diag ()
+{
+ local p
+ for p in "${diag[@]}"; do
+ echo " $p"
+ done
+}
+
+if [ "${#diag[@]}" -gt 0 ]; then
+ if [ -z "$fail" ]; then
+ s="cleaned up entries in /build/machines/"
+ else
+ s="invalid entries in /build/machines/, halting"
+ fi
+
+ print_diag | email "$s"
+ info "$s" && print_diag 2>&1
+
+ if [ -n "$fail" ]; then
+ info "correct and restart the monitor (systemctl restart buildos)"
+ exit 1
+ fi
+fi
+
# Monitoring loop.
#
while true; do
diff --git a/doc/manual.cli b/doc/manual.cli
index e20c151..e5f9b55 100644
--- a/doc/manual.cli
+++ b/doc/manual.cli
@@ -348,4 +348,152 @@ build2-mingw-x86_64-windows.tar.xz -> build2-mingw-0.4.0-x86_64-windows.tar.xz
While the monitor itself only needs the \c{build2-toolchain} package, build
machine toolchain bootstrap may require additional packages (which will be
accessed via TFTP using predictable names).
+
+\h1#machines|Build Machines|
+
+At the top level, a machine storage volume (\l{#config-storage-machines
+Machines}) contains machine directories, for example:
+
+\
+/build/machines/default/
+├── linux-gcc_6/
+└── windows-msvc_14/
+\
+
+The layout inside a machine directory is as follows, where \c{<name>} is
+the machine name and \c{toolchain} is the toolchain name:
+
+\
+<name>/
+├── <name> -> <name>-2
+├── <name>-1/
+├── <name>-2/
+├── <name>-<toolchain>/
+└── <name>-<toolchain>-<xxx>/
+\
+
+The \c{<name>-<N>} entries are read-only \c{btrfs} subvolumes that contain
+the initial (that is, \i{pre-bootstrap}) machine images. The numeric \c{<N>}
+part indicate the machine revision.
+
+The \c{<name>} entry is a symbolic link to the \c{<name>-<N>} that is
+currently in effect.
+
+The \c{<name>-<toolchain>} entry is the bootstrapped machine image for
+\c{<toolchain>}. It is created by cloning \c{<name>} and then bootstrapping
+the \c{build2} toolchain inside.
+
+The \c{<name>-<toolchain>-<xxx>} entries are the temporary snapshots of
+\c{<name>-<toolchain>} created for building packages.
+
+A machine can be added, upgraded, or deleted on a live Build OS instance.
+This needs to be done in a particular order to avoid inconsistencies and race
+conditions.
+
+\h#machines-add|Adding a Machine|
+
+Let's assume you have a read-only \c{btrfs} \c{linux-gcc_6-1} subvolume on a
+development host (we will call it \c{devel}) that contains the initial version
+of our virtual machine. We would like to add it to the build host (running
+Build OS, we will call it \c{build}) into the default machine volume
+(\c{/build/machines/default/}). To achieve this in an atomic way we perform
+the following steps:
+
+\
+# Create the machine directory.
+#
+build$ mkdir /build/machines/default/linux-gcc_6
+
+# Send the machine subvolume to build host.
+#
+devel$ sudo btrfs send linux-gcc_6-1 | \
+ ssh build@build sudo btrfs receive /build/machines/default/linux-gcc_6/
+
+build$ cd /build/machines/default/linux-gcc_6
+
+# Make user build the owner of the machine subvolume.
+#
+build$ sudo btrfs property set -ts linux-gcc_6-1 ro false
+build$ sudo chown build:build linux-gcc_6-1
+build$ btrfs property set -ts linux-gcc_6-1 ro true
+
+# Make the subvolume the current machine.
+#
+build$ ln -s linux-gcc_6-1 linux-gcc_6
+\
+
+\h#machines-upgade|Upgrading a Machine|
+
+Continuing with the example started in the previous section, let's assume we
+have created \c{linux-gcc_6-2} as a snapshot of \c{linux-gcc_6-1} and have
+made some modification to the virtual machine (all on the development
+host). We now would like to switch to this new revision of our machine on the
+build host. To achieve this in an atomic way we perform the following steps:
+
+\
+# Send the new machine subvolume to build host incrementally.
+#
+devel$ sudo btrfs send -p linux-gcc_6-1 linux-gcc_6-2 | \
+ ssh build@build sudo btrfs receive /build/machines/default/linux-gcc_6/
+
+build$ cd /build/machines/default/linux-gcc_6
+
+# Make user build the owner of the new machine subvolume.
+#
+build$ sudo btrfs property set -ts linux-gcc_6-2 ro false
+build$ sudo chown build:build linux-gcc_6-2
+build$ btrfs property set -ts linux-gcc_6-2 ro true
+
+# Switch the current machine atomically.
+#
+build$ ln -s linux-gcc_6-2 new-linux-gcc_6
+build$ mv -T new-linux-gcc_6 linux-gcc_6
+
+# Remove the old machine subvolume (optional).
+#
+build$ btrfs property set -ts linux-gcc_6-1 ro false
+build$ btrfs subvolume delete linux-gcc_6-1
+\
+
+\h#machines-delete|Deleting a Machine|
+
+Continuing with the example started in the previous section, let's assume we
+are no longer interested in the \c{linux-gcc_6} machine and would like to
+delete it. This operation is complicated by the possibility of \c{bbot}
+instances currently building with this machine.
+
+\
+build$ cd /build/machines/default/linux-gcc_6
+
+# Delete the current machine symlink.
+#
+build$ rm linux-gcc_6
+
+# Wait for all the linux-gcc_6-<toolchain>-<xxx> subvolumes
+# to disappear.
+#
+build$ for d in linux-gcc_6-*-*; do \
+ while [ -d $d ]; do \
+ echo \"waiting for $d\" && \
+ sleep 10; \
+ done; \
+done
+
+# Delete the initial and bootstrapped machine subvolume(s).
+#
+build$ for d in linux-gcc_6-*; do \
+ btrfs property set -ts $d ro false && \
+ btrfs subvolume delete $d; \
+done
+
+# Delete the machine directory.
+#
+build$ cd ..
+build$ rmdir /build/machines/default/linux-gcc_6
+\
+
+Note also that on reboot the Build OS monitor examines and cleans up
+machine directories of any stray subvolumes. As a result, an alternative
+approach would be to remove the current machine symlink and reboot the
+build host.
"
diff --git a/init b/init
index e29de66..c4fd7e0 100755
--- a/init
+++ b/init
@@ -338,9 +338,15 @@ while read l || [ -n "$l" ]; do
info "mounting $d (buildos.$l) on $m"
- mkdir -p "$m"
o="defaults,noatime,nodiratime,user_subvol_rm_allowed"
echo "$d $m btrfs $o 0 0" >>$fstab
+
+ # Mount it and change the owner of the filesystem root.
+ #
+ mkdir -p "$m"
+ mount -t btrfs -o "$o" "$d" "$m"
+ chown build:build "$m"
+
continue
fi
done < <(lsblk --pairs --paths --output NAME,FSTYPE,LABEL)
@@ -367,7 +373,8 @@ fi
# compromized VMs will be able to upload to.
#
mkdir -p /build/tftp
-o="nodev,noexec,nosuid,size=200M,mode=0755,uid=$(id -u build),gid=$(id -g build)"
+o="nodev,noexec,nosuid,size=200M"
+o+=",mode=0755,uid=$(id -u build),gid=$(id -g build)"
echo "tmpfs /build/tftp tmpfs $o 0 0" >>$fstab
# Configure Postfix.