diff options
-rw-r--r-- | .gitignore | 9 | ||||
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | README | 2 | ||||
-rwxr-xr-x | bootstrap | 399 | ||||
-rw-r--r-- | bootstrap.txt | 41 | ||||
-rwxr-xr-x | buildos | 195 | ||||
-rwxr-xr-x | doc/cli.sh | 2 | ||||
-rw-r--r-- | doc/manual.cli | 50 | ||||
m--------- | doc/style | 0 | ||||
-rwxr-xr-x | init | 122 | ||||
-rw-r--r-- | manifest | 6 | ||||
-rwxr-xr-x | pxeflatten | 5 | ||||
-rwxr-xr-x | remove-machine | 6 | ||||
-rwxr-xr-x | test-machine | 43 | ||||
-rwxr-xr-x | test-machine-aarch64-m1 | 115 | ||||
-rwxr-xr-x | upload-machine | 26 | ||||
-rwxr-xr-x | upload-os | 24 |
17 files changed, 829 insertions, 218 deletions
@@ -1,4 +1,5 @@ -*.cpio.gz -buildos-buildid -buildos-initrd -buildos-image +*.cpio.xz +buildos-buildid-* +buildos-initrd-* +buildos-image-* +bootstrap*.log @@ -1,4 +1,4 @@ -Copyright (c) 2014-2020 Code Synthesis Ltd. +Copyright (c) 2014-2024 Code Synthesis Ltd. TBC; All rights reserved @@ -1,4 +1,4 @@ -This package contains the build2 operating system bootstrap scripts. +This package contains the build2 Build OS bootstrap scripts. build2 is an open source, cross-platform toolchain for building and packaging C++ code. Its aim is a modern build system and dependency manager for the C++ @@ -4,12 +4,16 @@ # # Assumptions/expectations: # -# - host debootstrap/debian-archive-keyring matching release +# - Bootstrapping for the host CPU (currently x86_64 or aarch64). +# +# - Host debootstrap/debian-archive-keyring matching release. # # - /btrfs/<user> is a btrfs directory where the current user can create -# snapshots +# snapshots. +# +# - sudo is passwordless (used to run debootstrap, systemd-nspawn, etc). # -# - sudo is passwordless (used to run debootstrap, systemd-nspawn, etc) +# - This script is executed from the buildos/ source directory. # # Options: # @@ -19,19 +23,48 @@ # # 1 - bootstrap phase 1 # 2 - bootstrap phase 2 -# 3 - kernel build +# 3 - kernel build (and package patching, if necessary) # 4 - setup # 5 - create footfs # 6 - create kernel image and initrd # +# --arch-variant +# Architecture variant, for example, m1 for aarch64-m1. +# +# --kernel-source +# Pre-configured Linux kernel source .tar.* archive. If unspecified, Debian +# source/configuration will be used. +# +# --firmware +# Firmware .tar.* archive to be unpackged into /lib/firmware/. +# usage="usage: $0" -macaddr="de:ad:be:ef:b8:da" # @@ TMP mac address for testing. +owd="$(pwd)" +trap "{ cd '$owd'; exit 1; }" ERR +set -o errtrace # Trap in functions. + +function info () { echo "$*" 1>&2; } +function error () { info "$*"; exit 1; } id="$(id -un)" btrfs=/btrfs root="$btrfs/$id/buildos" +arch="$(uname -m)" + +case "$arch" in + x86_64) + debian_arch=amd64 + ;; + aarch64) + debian_arch=arm64 + ;; + *) + error "unsupported architecture: $arch" + ;; +esac + # Source distribution and packages. Base packages are installed on stage 1 via # debootstrap. Extra packages are added on stage 4 via apt-get install. The # idea is to be able to add extra packages without upgrading the base system. @@ -41,45 +74,63 @@ root="$btrfs/$id/buildos" # # - some packages (such as CPU microcode updates) are in non-free. # - systemd-container seems to be required by host systemd-nspawn. -# - not installing linux-image-amd64 since building custom below +# - must explicitly select between dbus and dbus-broker +# - not installing linux-image-* since building custom below # release="testing" components="main,contrib,non-free" -mirror="http://deb.debian.org/debian/" -#mirror="https://deb.debian.org/debian/" +mirror="http://http.us.debian.org/debian/" +#mirror="https://http.us.debian.org/debian/" -base_pkgs="locales,klibc-utils,sudo,systemd-container,udev" +base_pkgs="locales,klibc-utils,sudo" +base_pkgs+=",udev,dbus,systemd-timesyncd,systemd-container" base_pkgs+=",kmod,linux-base,firmware-linux-free,irqbalance" -base_pkgs+=",intel-microcode,amd64-microcode" -base_pkgs+=",pciutils,usbutils,dmidecode,cpuid" +base_pkgs+=",pciutils,usbutils,dmidecode" base_pkgs+=",hdparm,btrfs-progs" -base_pkgs+=",lm-sensors,smartmontools" +base_pkgs+=",lm-sensors,smartmontools,linux-cpupower" base_pkgs+=",psmisc" base_pkgs+=",net-tools,iproute2,iptables,isc-dhcp-client" -base_pkgs+=",ifupdown,bridge-utils,dnsmasq,ntp,postfix" +base_pkgs+=",ifupdown,bridge-utils,dnsmasq,postfix" base_pkgs+=",iputils-ping,wget,curl,ca-certificates" base_pkgs+=",openssh-client,openssh-server" base_pkgs+=",tftp-hpa,tftpd-hpa" -base_pkgs+=",bzip2,xz-utils" +base_pkgs+=",zstd,xz-utils" base_pkgs+=",less,nano,time" - -base_pkgs+=",qemu-kvm,qemu-utils,socat" - -base_pkgs+=",g++,make,pkg-config" - -extra_pkgs="linux-cpupower" - -owd="$(pwd)" -trap "{ cd '$owd'; exit 1; }" ERR -set -o errtrace # Trap in functions. - -function info () { echo "$*" 1>&2; } -function error () { info "$*"; exit 1; } - -stage="1" -stage_max="6" +base_pkgs+=",g++,make" + +base_pkgs+=",qemu-utils,socat" + +case "$arch" in + x86_64) + base_pkgs+=",cpuid" + base_pkgs+=",intel-microcode" #,amd64-microcode + base_pkgs+=",qemu-system-x86" + ;; + aarch64) + base_pkgs+=",qemu-system-arm" + ;; +esac + +extra_pkgs="ethtool" + +case "$arch" in + x86_64) + #extra_pkgs+="" + ;; + aarch64) + extra_pkgs+=",ipxe-qemu" + ;; +esac + +# Options. +# +stage=1 +stage_max=6 +arch_variant= +kernel_source= +firmware= while [ "$#" -gt 0 ]; do case "$1" in @@ -88,6 +139,21 @@ while [ "$#" -gt 0 ]; do stage="$1" shift ;; + --arch-variant) + shift + arch_variant="$1" + shift + ;; + --kernel-source) + shift + kernel_source="$1" + shift + ;; + --firmware) + shift + firmware="$1" + shift + ;; -*) error "unknown option: $1" ;; @@ -101,6 +167,11 @@ if [ "$stage" -lt "1" -o "$stage" -gt "$stage_max" ]; then error "invalid stage number $stage" fi +arch_with_variant="$arch" +if [ -n "$arch_variant" ]; then + arch_with_variant="$arch_with_variant-$arch_variant" +fi + # Extract version. # version="$(sed -n -re 's/^version: ([0-9]+\.[0-9]+\.[0-9]+).*$/\1/p' ./manifest)" @@ -154,14 +225,28 @@ function nspawn () # <root> <systemd-nspawn-args> local r="$1" shift + # systemd-nspawn appears to carry over the root directory owner into the + # container which then causes other issues (Debian bug#950684). + # + # @@ Looking around didn't reveal any way (e.g., an option) to fix this. + # Perhaps newer systemd-nspawn does the right thing automatically? + # + ug="$(stat --format="%G:%U" "$r")" + sudo chown root:root "$r" + sudo systemd-nspawn --register=no -D "$r" "$@" - # systemd-nspawn may create the /var/lib/machines subvolume which prevents - # the deletion of the containing submodule. So we clean it up. + sudo chown "$ug" "$r" + + # systemd-nspawn may create the /var/lib/{machines,portables} subvolumes + # which prevents the deletion of the containing submodule. So we clean'em + # up. # - if sudo btrfs subvol show "$r/var/lib/machines" 1>/dev/null 2>&1; then - sudo btrfs subvol delete "$r/var/lib/machines" - fi + for s in /var/lib/machines /var/lib/portables; do + if sudo btrfs subvol show "$r$s" 1>/dev/null 2>&1; then + sudo btrfs subvol delete "$r$s" + fi + done } # (Over)write or append to a file in the installation root, for example: @@ -196,7 +281,7 @@ if [ "$stage" -eq "1" ]; then sudo debootstrap \ --foreign \ - --arch=amd64 \ + --arch="$debian_arch" \ --merged-usr \ --variant=minbase \ --components="$components" \ @@ -234,15 +319,24 @@ trap "exit 1" ERR set -x # Hack around systemd bug#79306 (changes /etc/localtime) by removing it now -# and making readonly below. +# and making readonly below. @@ See --timezone systemd-nspawn option? # rm /etc/localtime # Both nspawn and debootstrap try to mount /proc /sys (Debian bug#840372). # -mkdir /tmp/proc /tmp/sys -mount --move /proc /tmp/proc -mount --move /sys /tmp/sys +# @@ TMP this now causes issues with newer systemd. +# +#mkdir /tmp/proc /tmp/sys +#mount --move /proc /tmp/proc +#mount --move /sys /tmp/sys + +# systemd-nspawn "helpfully" creates a /lib64 symlink that then trips +# up is-usr-merged package (Debain bug #1019575). +# +if [ $arch = aarch64 ]; then + rm /lib64 +fi # Run second stage of debootstrap. # @@ -354,47 +448,162 @@ EOF # Create the kernel build snapshot, write the script that does the build # from within the installation and boot it up via systemd-nspawn --boot. # - # Add this line before shutdown if need to debug/check things. - # - # setsid /bin/bash -c "exec /bin/bash -i <>/dev/console 1>&0 2>&1" + # Add `bash` before shutdown if need to debug/check things. But note that it + # does not work well with `... | tee bootstrap.log`! # subvol_delete "$root-3-kernel" subvol_snapshot "$root" "$root-3-kernel" + # Copy custom kernel source. + # + if [ -n "$kernel_source" ]; then + sudo cp "$kernel_source" "$root-3-kernel/usr/src/" + fi + + # Copy patches. + # + #sudo cp ./patches/tftp-hpa-partial-upload.patch "$root-3-kernel/bootstrap/" + write <<EOF /bootstrap/setup "$root-3-kernel" #!/bin/bash trap "exit 1" ERR set -x +# Add deb-src to each deb entry in /etc/apt/sources.list. +# +sed -ri -e 's/^deb (.+)/deb \1\ndeb-src \1/' /etc/apt/sources.list + +apt-get update +apt-get install -y build-essential devscripts + +# Build custom/patched packages. +# +cd /usr/src + +if false; then +mkdir tftpd-hpa +cd tftpd-hpa +apt-get install -y libwrap0-dev +apt-get source tftpd-hpa +cd tftp-hpa-*/ +patch -p1 </bootstrap/tftp-hpa-partial-upload.patch +dch -n "Apply patches." +cd ../tftp-hpa-*/ # May get renamed. +dpkg-buildpackage -us -uc +cd ../.. +fi + +# Build kernel. +# # This seems to be the simplest method of building the standard Debian # kernel with adjusted configuration. Taken from the Debian Kernel Handbook. # apt-get update -apt-get install -y build-essential linux-source +apt-get install -y bc apt-get install -y bison flex apt-get install -y libelf-dev apt-get install -y libssl-dev +apt-get install -y cpio rsync +apt-get install -y dwarves +apt-get install -y device-tree-compiler + cd /usr/src -tar xf linux-source-* -mv linux-source-*/ linux -xzcat linux-config-*/config.amd64_none_amd64.xz >linux/.config + +if [ -z "$kernel_source" ]; then + apt-get install -y linux-source + tar xf linux-source-* + mv linux-source-*/ linux + xzcat linux-config-*/config.${debian_arch}_none_${debian_arch}.xz >linux/.config +else + kernel_source=\$(basename $kernel_source) + tar xf \$kernel_source + mv \$(sed -re 's/(.+)\.tar\..+/\1/' <<<\$kernel_source) linux +fi + cd linux # Adjust configuration. # -echo 'CONFIG_SYSTEM_TRUSTED_KEYS=""' >>.config -echo 'CONFIG_BUILD_SALT=""' >>.config -echo 'CONFIG_MODULE_SIG=n' >>.config +# Note that we do some of these even for the pre-configured kernel. +# +# Note that SECURITY_LOCKDOWN_LSM forces MODULE_SIG ('select' in Kconfig). +# +# Generally, if you disable an option but it still appears enabled after +# the kernel build, search for 'select XXX' in Kconfig* and also disable +# any found symbols. +# +scripts/config --disable KCSAN +scripts/config --disable SECURITY_LOCKDOWN_LSM +scripts/config --disable MODULE_SIG +scripts/config --set-str BUILD_SALT '' +scripts/config --set-str SYSTEM_TRUSTED_KEYS '' + +scripts/config --enable INIT_STACK_NONE +scripts/config --disable INIT_STACK_ALL_PATTERN +scripts/config --disable INIT_STACK_ALL_ZERO + +scripts/config --enable DEBUG_INFO_NONE +scripts/config --disable DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT + +# aarch64/5.19 additions: +# +scripts/config --disable KCOV +scripts/config --disable SHADOW_CALL_STACK +scripts/config --disable ZERO_CALL_USED_REGS +scripts/config --disable VIDEO_ADV7511 + +if [ -z "$kernel_source" ]; then + + # Disable sound subsystem/drivers. + # + sed -i -re '/^CONFIG_SND_.+/d' .config + sed -i -re '/^CONFIG_SOUND_.+/d' .config + scripts/config --disable CONFIG_SOUND + scripts/config --disable CONFIG_SND + + # Disable GPU subsystem/drivers. + # + # NOTE: this seems to have killed the high-resolution console. + # + sed -i -re '/^CONFIG_DRM_.+/d' .config + scripts/config --disable CONFIG_DRM + + # Disable wireless network drivers. + # + scripts/config --disable CONFIG_WLAN + +fi # Adjust kernel command line size limit. # -sed -i -re 's/^(#define COMMAND_LINE_SIZE ).+\$/\1 4096/' arch/x86/include/asm/setup.h +sed -i -re 's/^(#define COMMAND_LINE_SIZE).+\$/\1 4096/' arch/x86/include/asm/setup.h +sed -i -re 's/^(#define COMMAND_LINE_SIZE).+\$/\1 4096/' arch/arm64/include/uapi/asm/setup.h + +# Change certain network diagnostics from pr_info() to pr_debug() (otherwise +# to drowns out any useful diagnostics). +# +patch net/core/dev.c <<EOP +@@ -0 +0 @@ +- pr_info("device %s %s promiscuous mode\n", ++ pr_debug("device %s %s promiscuous mode\n", +EOP +patch net/bridge/br_stp.c <<EOP +@@ -0 +0 @@ +- br_info(p->br, "port %u(%s) entered %s state\n", ++ br_debug(p->br, "port %u(%s) entered %s state\n", +EOP +patch net/ipv6/addrconf.c <<EOP +@@ -0 +0 @@ +- pr_info("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n", ++ pr_debug("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n", +EOP + +#bash make oldconfig -scripts/config --disable DEBUG_INFO make clean -make deb-pkg LOCALVERSION=-buildos KDEB_PKGVERSION=1 -j 8 +make deb-pkg LOCALVERSION=-buildos KDEB_PKGVERSION=1-1 -j 8 # Clean up and shutdown. # @@ -406,8 +615,9 @@ EOF nspawn "$root-3-kernel" --boot - # Copy the kernel over and install it. + # Copy the kernel and packages over and install them. # + #sudo cp "$root-3-kernel/usr/src/tftpd-hpa/tftpd-hpa_"*.deb "$root/usr/src/" sudo cp "$root-3-kernel/usr/src/linux-image-"*.deb "$root/usr/src/" write <<EOF /bootstrap/setup @@ -416,7 +626,9 @@ EOF trap "exit 1" ERR set -x +#dpkg -i /usr/src/tftpd-hpa_*.deb dpkg -i /usr/src/linux-image-*.deb + rm -rf /usr/src/* cd / ln -s boot/vmlinuz-* /vmlinuz @@ -438,6 +650,12 @@ fi # if [ "$stage" -le "4" ]; then + # Copy custom firmware. + # + if [ -n "$firmware" ]; then + sudo cp "$firmware" "$root/usr/src/" + fi + # Write the setup script that will finish the setup (the service is already # there from stage 3). # @@ -454,6 +672,17 @@ for p in \$(sed -e 's/,/ /g' <<<"$extra_pkgs"); do apt-get install -y --no-install-recommends "\$p" done +# Install custom firmware. +# +if [ -n "$firmware" ]; then + firmware=\$(basename $firmware) + tar -C /lib/firmware -xf /usr/src/\$firmware +fi + +# Final /usr/src cleanup. +# +rm -rf /usr/src/* + # We want the utility (smartctl) but not the daemon. # systemctl disable smartd @@ -464,15 +693,21 @@ systemctl disable smartd # consistent across builds. # addgroup --gid 2000 build -adduser --uid 2000 --gid 2000 --home /build --gecos "" --disabled-password build +adduser --uid 2000 --ingroup build --home /build --gecos "" --disabled-password build adduser build kvm echo "build ALL=(ALL) NOPASSWD:ALL" >/etc/sudoers.d/build echo "Defaults:build !syslog" >>/etc/sudoers.d/build chmod 0440 /etc/sudoers.d/build -# Clean up package cache. +# Clean up package cache and fetched repository information. # apt-get clean +rm -rf /var/lib/apt/lists + +# Strip GCC executables (Debian bug #998841). Fixed but let's keep the command +# around in case it pops up again. +# +#strip \$(find /usr/lib/gcc -type f -executable) # Clean up /bootstrap. # @@ -502,15 +737,18 @@ if [ "$stage" -le "5" ]; then # Quite a few files/directories are only accessible by root (e.g., /root) so # we run under sudo. # + # Note also that without --check=crc32 the kernel will not be able to + # decompress things. + # root_dirs="build dev etc mnt root usr var" root_links="bin sbin lib lib32 lib64" - info "generating buildos-rootfs.cpio.gz..." + info "generating buildos-rootfs.cpio.xz..." cd "$root" sudo find $root_dirs $root_links -print0 | \ sudo cpio --null -o -H newc | \ - gzip -9 > "$owd/buildos-rootfs.cpio.gz" + xz -9 --check=crc32 >"$owd/buildos-rootfs.cpio.xz" cd "$owd" subvol_snapshot -r "$root" "$root-5" @@ -537,11 +775,16 @@ if [ "$stage" -le "6" ]; then sudo ln -sf "$root/usr/lib/systemd/system/buildos.service" \ "$root/etc/systemd/system/multi-user.target.wants/buildos.service" - info "generating buildos-init.cpio.gz..." + # Patch in the arch variant. + # + sudo sed -i -e "s/^arch_variant=.*/arch_variant=$arch_variant/" \ + "$root/usr/sbin/buildos" + + info "generating buildos-init.cpio.xz..." cd "$root" sudo cpio -o -H newc <<EOF | \ - gzip -9 > "$owd/buildos-init.cpio.gz" + xz -9 --check=crc32 >"$owd/buildos-init.cpio.xz" usr/lib/os-release init usr/sbin/buildos @@ -550,41 +793,13 @@ etc/systemd/system/multi-user.target.wants/buildos.service EOF cd "$owd" - cat buildos-rootfs.cpio.gz buildos-init.cpio.gz >buildos-initrd + cat buildos-rootfs.cpio.xz \ + buildos-init.cpio.xz >"buildos-initrd-$arch_with_variant" # Copy the kernel image next to the initramfs for convenience. # - cp "$root/vmlinuz" buildos-image - echo "$buildid" >buildos-buildid + cp "$root/vmlinuz" "buildos-image-$arch_with_variant" + echo "$buildid" >"buildos-buildid-$arch_with_variant" subvol_snapshot -r "$root" "$root-6" fi - -exit 0 - -# Test. -# -if [ ! -e /tmp/buildos-state ]; then - qemu-img create -f raw /tmp/buildos-state 20M -fi - -if [ ! -e /tmp/buildos-machines ]; then - qemu-img create -f raw /tmp/buildos-machines 100M -fi - -# To test PXE boot, replace -kernel/-initrd/-append with '-boot n'. -# -sudo kvm \ - -m 16G \ - -cpu host -smp "sockets=1,cores=4,threads=2" \ - -device "e1000,netdev=net0,mac=$macaddr" \ - -netdev "tap,id=net0,script=./qemu-ifup" \ - -device "virtio-scsi-pci,id=scsi" \ - -device "scsi-hd,drive=disk1" \ - -drive "if=none,id=disk1,file=/tmp/buildos-state,format=raw" \ - -device "scsi-hd,drive=disk2" \ - -drive "if=none,id=disk2,file=/tmp/buildos-machines,format=raw" \ - -boot n - -# -kernel buildos-image -initrd buildos-initrd \ -# -append "buildos.smtp_relay=build2.org buildos.admin_email=admin@build2.org" diff --git a/bootstrap.txt b/bootstrap.txt index 2554d43..d02088a 100644 --- a/bootstrap.txt +++ b/bootstrap.txt @@ -4,14 +4,14 @@ - systemd - qemu-system-x86 - - linux-image-amd64 + - linux-image-amd64 & btrfs-progs - g++ If some of them are very recent (and thus likely to still have issues) in unstable, consider using testing. See Debian Changelog for release dates. - Check the current source (unstable/testing) and update is necessary. + Check the current source (unstable/testing) and update if necessary. * Examining init changes: @@ -21,18 +21,45 @@ diff -urw <old> <new> - We currently only use init plus udev in scripts/init-{top,bottom}. + We currently only use init plus udev in scripts/init-{top,bottom} (but + the latter may use functions form other files). -* Grep for 'bug' in init and bootstrap scripts, see if any bugs have +* Grep for 'bug' and @@ in init and bootstrap scripts, see if any bugs have been fixed and corresponding workarounds can be removed. + For some may make sense to do a round of cleanups after successful bootstrap + not to conflate failures. + * Upgrade to latest debootstrap and debian-archive-keyring from unstable - (or testing). + (or testing). Other required host packages: + + uuid-runtime (uuidgen) + systemd-container (systemd-nspawn) * Note: as of latest attempt, bootstrap over https was still broken. * Bootstrap: - ./bootstrap 2>&1 | tee bootstrap.log + ./bootstrap 2>&1 | tee bootstrap-X.Y.log + + NOTE: tee does not play well with bash prompt (used for troubleshooting). + + NOTE: if during boostrap you need to but unable to delete an intermediate + btrfs submodules, then it most likely has nested submodules (see nspawn() + in bootstrap). + + NOTE: to kill nspawn container, press Ctrl-]]]. + + Save the log for later comparison (might have to redo a from-stage-1 + bootstrap to get the complete log). + + To bootstrap an architecture variant with a pre-configured kernel: + + ./bootstrap --arch-variant m1 --kernel-source .../linux-asahi-5.19-5-1.tar.xz + +* Compare sizes to previous version for any abnormalities (if a lot larger, + check if GCC executables are stripped or if the kernel is somewhy is now + built with debug info). -* Compare sizes to previous version for any abnormalities. +* After deployment, test VM upload/removal scripts (there are often issues + after upgrading to new btrfs-progs). @@ -29,6 +29,16 @@ function error () exit 1 } +# Note: the arch variant is patched in by the bootstrap script. +# +arch="$(uname -m)" +arch_variant= + +arch_with_variant="$arch" +if [ -n "$arch_variant" ]; then + arch_with_variant="$arch_with_variant-$arch_variant" +fi + # Network timeouts: 60 seconds to connect, 10 minutes to complete, 4 retries # (5 attempts total). These are similar to bbot timeouts. Note that the # toolchain archives can be quite sizable. @@ -49,6 +59,11 @@ info "starting build os monitor..." # foo='foo fox' # bar="bar 'box'" # +# Or (as rewritten by GRUB): +# +# 'foo=foo fox' +# "bar=bar 'box'" +# # First we separete quoted variables and arguments with newlines (giving # priority to assignments). Then we replace whitespaces with newline on # lines that don't contain quites. Finally, we clean up by removing blank @@ -71,11 +86,24 @@ declare -A toolchains toolchains["default"]="" for v in "${cmdline[@]}"; do - var="$(sed -n -re 's/^buildos\.([^=]+)=.*$/\1/p' <<<"$v")" # Extract name. + + # Rewrite "x=y" as x="y" (as well as the single-quote variant). + # + v1="$(sed -n -re "s/^\"([^= ]+)=(.*)\"\$/\1=\"\2\"/p" <<<"$v")" + if [ -n "$v1" ]; then + v="$v1" + else + v1="$(sed -n -re "s/^'([^= ]+)=(.*)'\$/\1='\2'/p" <<<"$v")" + if [ -n "$v1" ]; then + v="$v1" + fi + fi + + var="$(sed -n -re 's/^buildos\.([^= ]+)=.*$/\1/p' <<<"$v")" # Extract name. if [ -n "$var" ]; then - val="$(sed -re 's/^[^=]+=(.*)$/\1/' <<<"$v")" # Extract value. - val="$(sed -re "s/^('(.*)'|\"(.*)\")$/\2\3/" <<<"$val")" # Strip quoted. + val="$(sed -re 's/^[^= ]+=(.*)$/\1/' <<<"$v")" # Extract value. + val="$(sed -re "s/^('(.*)'|\"(.*)\")\$/\2\3/" <<<"$val")" # Strip quoted. # Recognize some variables as arrays. # @@ -122,14 +150,17 @@ function restart () sudo systemctl reboot } -if [ -z "$buildid_url" ]; then +if [ -n "$buildid_url" ]; then + buildid_url="$buildid_url-$arch_with_variant" +else info "no buildos.buildid_url specified, not monitoring for new os builds" fi # Process toolchains. # -# Return the value of one of the toolchain_* variables for this toolchain. +# Return the value of one of the <variable>.<toolchain> variables for this +# toolchain. # function toolchain_value () # <toolchain-prefix> <variable> { @@ -171,6 +202,12 @@ for tn in "${!toolchains[@]}"; do instances=$(($instances + $(toolchain_value "$tp" instances))) + # Default to non-interactive-only. + # + if [ -z "$(toolchain_value "$tp" interactive)" ]; then + declare "${tp}interactive=false" + fi + # Default to 0 nice value. # if [ -z "$(toolchain_value "$tp" nice)" ]; then @@ -208,27 +245,36 @@ fi # # Note that MemTotal in /proc/meminfo is the available memory, not physical. # And to make it easier to provision memory it's really helpful to base it -# in the physical value. +# on the physical value. # -ram_total=0 -for i in $(sudo dmidecode -t 17 | sed -n -re 's/^\s*Size:\s*([0-9]+)\s*MB.*$/\1/p'); do - ram_total=$(($ram_total + $i * 1024)) -done +if [ -z "$ram_total" ]; then + ram_total=0 + for i in $(sudo dmidecode -t 17 | sed -n -re 's/^\s*Size:\s*([0-9]+)\s*GB.*$/\1/p'); do + ram_total=$(($ram_total + $i * 1024 * 1024)) + done -if [ "$ram_total" -eq 0 ]; then - error "unable to determine physical memory size" + if [ "$ram_total" -eq 0 ]; then + error "unable to determine physical memory size, use buildos.ram_total to specify" + fi +else + ram_total=$(($ram_total * 1024 * 1024)) fi cpu_total="$(lscpu | sed -n -re 's/^CPU\(s\): *([0-9]+)$/\1/p')" +# RAM reserved to the host. +# if [ -z "$ram_reserved" ]; then ram_reserved=4 fi ram_reserved=$(($ram_reserved * 1024 * 1024)) -if [ -z "$ram_overcommit" ]; then - ram_overcommit=1 +# RAM reserved for auxiliary machines. +# +if [ -z "$ram_auxiliary" ]; then + ram_auxiliary=0 fi +ram_auxiliary=$(($ram_auxiliary * 1024 * 1024)) if [ -z "$cpu_reserved" ]; then cpu_reserved=0 @@ -238,11 +284,13 @@ if [ -z "$cpu_overcommit" ]; then cpu_overcommit=1 fi -ram_slice=$(($ram_total - $ram_reserved)) +ram_build_slice=$(($ram_total - $ram_reserved - $ram_auxiliary)) +ram_auxil_slice=$ram_auxiliary cpu_slice=$(($cpu_total - $cpu_reserved)) if [ "$instances" -gt 1 ]; then - ram_slice=$(($ram_slice * $ram_overcommit / $instances)) + ram_build_slice=$(($ram_build_slice / $instances)) + ram_auxil_slice=$(($ram_auxil_slice / $instances)) cpu_slice=$(($cpu_slice * $cpu_overcommit / $instances)) if [ "$cpu_slice" -eq 0 ]; then @@ -254,20 +302,24 @@ fi # function print () { - echo "cpu_total: $cpu_total" - echo "cpu_reserved: $cpu_reserved" - echo "cpu_overcommit: $cpu_overcommit" - echo "cpu_slice: $cpu_slice" + echo "cpu_total: $cpu_total" + echo "cpu_reserved: $cpu_reserved" + echo "cpu_overcommit: $cpu_overcommit" + echo "cpu_slice: $cpu_slice" + if [ -n "$cpu_affinity" ]; then + echo "cpu_affinity: $cpu_affinity" + fi echo - echo "ram_total: $ram_total KB" - echo "ram_reserved: $ram_reserved KB" - echo "ram_overcommit: $ram_overcommit" - echo "ram_slice: $ram_slice KB" + echo "ram_total: $ram_total KiB" + echo "ram_reserved: $ram_reserved KiB" + echo "ram_auxiliary: $ram_auxiliary KiB" + echo "ram_build_slice: $ram_build_slice KiB" + echo "ram_auxil_slice: $ram_auxil_slice KiB" echo - echo "buildid: $buildid" - echo "buildid_url: $buildid_url" + echo "buildid: $buildid" + echo "buildid_url: $buildid_url" echo local n i tn tp tu tt @@ -276,23 +328,41 @@ function print () tc="$(toolchain_value "$tp" nice)" tb="$(toolchain_value "$tp" bridge)" ti="$(toolchain_value "$tp" instances)" + ta="$(toolchain_value "$tp" interactive)" tu="$(toolchain_value "$tp" toolchain_url)" tt="$(toolchain_value "$tp" toolchain_trust)" - echo "$tn.nice: $tc" - echo "$tn.bridge: $tb" - echo "$tn.instances: $ti" - echo "$tn.toolchain_url: $tu" - echo "$tn.toolchain_trust: $tt" + tbt="$(toolchain_value "$tp" build_timeout)" + tst="$(toolchain_value "$tp" bootstrap_timeout)" + tat="$(toolchain_value "$tp" interactive_timeout)" + + echo "$tn.nice: $tc" + echo "$tn.bridge: $tb" + echo "$tn.instances: $ti" + echo "$tn.interactive: $ta" + echo "$tn.toolchain_url: $tu" + echo "$tn.toolchain_trust: $tt" + + if [ -n "$tbt" ]; then + echo "$tn.build_timeout: $tbt" + fi + + if [ -n "$tst" ]; then + echo "$tn.bootstrap_timeout: $tst" + fi + + if [ -n "$tat" ]; then + echo "$tn.interactive_timeout: $tat" + fi n="${tp}controller_url[@]" for i in "${!n}"; do - echo "$tn.controller_url: $i" + echo "$tn.controller_url: $i" done n="${tp}controller_trust[@]" for i in "${!n}"; do - echo "$tn.controller_trust: $i" + echo "$tn.controller_trust: $i" done echo @@ -801,9 +871,14 @@ function bbot_start () # <toolchain-name> <toolchain-index> local tc="$(toolchain_value "$tp" nice)" local tb="$(toolchain_value "$tp" bridge)" local ti="$(toolchain_value "$tp" instances)" + local ta="$(toolchain_value "$tp" interactive)" local tv="$(toolchain_value "$tp" toolchain_fver)" local ts="$(toolchain_value "$tp" toolchain_file_csum)" + local tbt="$(toolchain_value "$tp" build_timeout)" + local tst="$(toolchain_value "$tp" bootstrap_timeout)" + local tat="$(toolchain_value "$tp" interactive_timeout)" + local id="/build/bots/$tn" mkdir -p "$id" @@ -826,7 +901,9 @@ function bbot_start () # <toolchain-name> <toolchain-index> # if [ "$b_word" = "configured" ]; then - for ((i=1; i <= ti; i++)); do + # Note: stop extra instance. + # + for ((i=1; i <= ti + 1; i++)); do if ! sudo systemctl stop "bbot-agent-$tn@$i"; then info "failed to stop bbot-agent-$tn@$i service, assuming not running" continue @@ -867,17 +944,44 @@ function bbot_start () # <toolchain-name> <toolchain-index> sed -i -r \ -e "s#%I#$tn/%I#g" \ -e "s/^(Environment=CPU)=.*/\1=$cpu_slice/" \ - -e "s/^(Environment=RAM)=.*/\1=$ram_slice/" \ + -e "s/^(Environment=RAM_BUILD)=.*/\1=$ram_build_slice/" \ + -e "s/^(Environment=RAM_AUXIL)=.*/\1=$ram_auxil_slice/" \ -e "s/^(Environment=BRIDGE)=.*/\1=$tb/" \ -e "s#^(Environment=AUTH_KEY)=.*#\1=/state/etc/host-key.pem#" \ + -e "s/^(Environment=INTERACTIVE)=.*/\1=$ta/" \ -e "s/^(Environment=TOOLCHAIN_ID)=.*/\1=$ts/" \ -e "s/^(Environment=TOOLCHAIN_NUM)=.*/\1=$tx/" \ -e "s/^(Environment=TOOLCHAIN_VER)=.*/\1=$tv/" \ -e "s/^(Environment=TOOLCHAIN_NAME)=.*/\1=$tn/" \ + -e "s/^(Environment=INSTANCE_MAX)=.*/\1=$ti/" \ -e "s/^(Nice)=.*/\1=$tc/" \ -e "s#^ExecStart=[^ ]+(.*)#ExecStart=$id/bin/bbot-agent\1#" \ "$id/lib/systemd/system/bbot-agent@.service" + # Patch in CPU affinity. + # + if [ -n "$cpu_affinity" ]; then + sed -i -r -e "s/^(CPUAffinity)=.*/\1=$cpu_affinity/" \ + "$id/lib/systemd/system/bbot-agent@.service" + fi + + # Patch in build/bootstrap/interactive timeouts. + # + if [ -n "$tbt" ]; then + sed -i -r -e "s/^(Environment=BUILD_TIMEOUT)=.*/\1=$tbt/" \ + "$id/lib/systemd/system/bbot-agent@.service" + fi + + if [ -n "$tst" ]; then + sed -i -r -e "s/^(Environment=BOOTSTRAP_TIMEOUT)=.*/\1=$tst/" \ + "$id/lib/systemd/system/bbot-agent@.service" + fi + + if [ -n "$tat" ]; then + sed -i -r -e "s/^(Environment=INTERACTIVE_TIMEOUT)=.*/\1=$tat/" \ + "$id/lib/systemd/system/bbot-agent@.service" + fi + # Patch in the controller URLs. These can contain special characters # like `&` so we have to escape them. # @@ -927,8 +1031,10 @@ function bbot_start () # <toolchain-name> <toolchain-index> # the process has forked. Making sure the service has actually started is # done as part of the service monitoring. # + # Note: start extra instance. + # r=0 - for ((i=1; i <= ti; i++)); do + for ((i=1; i <= ti + 1; i++)); do if ! sudo systemctl start "bbot-agent-$tn@$i"; then info "failed to start bbot-agent-$tn@$i service instance" r=1 @@ -953,6 +1059,7 @@ declare -A toolchain_cursors # Latest systemd journal cursor. # Monitoring loop. # +sensors=true count=0 while true; do @@ -1102,7 +1209,9 @@ EOF # For each service instance check if it has failed. # - for ((i=1; i <= ti; i++)); do + # Note: check extra instance. + # + for ((i=1; i <= ti + 1; i++)); do if sudo systemctl is-failed --quiet "bbot-agent-$tn@$i"; then s="bbot-agent-$tn@$i service has failed, stopping" @@ -1198,6 +1307,8 @@ EOF # subject line (note that there can be a mix so we have to # try in the priority order). # + # @@ pipefail + # p=2 s="$("${c[@]}" --output cat --priority 2 | head -n 1)" if [ -z "$s" ]; then @@ -1214,6 +1325,8 @@ EOF s="bbot-agent-$tn@$i: $s" + # @@ pipefail + # info "$s" { echo "$tn.bbot_cmd: ssh build@$hname ${c[@]}"; @@ -1267,7 +1380,13 @@ EOF done fi - sensors -A + if [ "$sensors" ]; then + if ! sensors -A; then + info "unable to query sensors, disabling" + sensors= + fi + fi + info "monitoring..." sleep 60 done @@ -1,6 +1,6 @@ #! /usr/bin/env bash -version=0.14.0-a.0.z +version=0.17.0-a.0.z trap 'exit 1' ERR set -o errtrace # Trap in functions. diff --git a/doc/manual.cli b/doc/manual.cli index 5187e87..2852064 100644 --- a/doc/manual.cli +++ b/doc/manual.cli @@ -181,23 +181,47 @@ sudo kvm \ A Build OS instances divides available CPUs and RAM (minus reserved, see below) into \i{slices} that are then \i{committed} to each instance of each -toolchain. If you don't expect your builds to utilize these resources at the -same time, then it may make sense to overcommit them to improve utilization. -The respective overcommit values can be specified as ratios with the -\c{buildos.cpu_overcommit} and \c{buildos.ram_overcommit} kernel command -line parameters. For example, given the following CPU overcommit: +toolchain. In case of CPU it normally makes sense to overcommit this resource +in order to improve utilization. This can be achieved by specifying the +overcommit values as a ratio with \c{buildos.cpu_overcommit}. For example, +given the following CPU overcommit: \ buildos.cpu_overcommit=3/2 \ -A Build OS machine with 8 CPUs (hardware threads) and three toolchains will +A Build OS machine with 8 CPUs (hardware threads) and three instances will assign 4 CPUs (\c{8 * 3/2 / 3}) to each slice. -It is also possible to reserve a number of CPUs and an amount of RAM to -Build OS with the \c{buildos.cpu_reserved} and \c{buildos.ram_reserved} -(in GB) kernel command line parameters. If unspecified, 4GB of RAM is -reserved by default. +It is also possible to specify CPU affinity with \c{buildos.cpu_affinity}. +For example, specifying: + +\ +buildos.cpu_affinity=2-9 +\ + +Will restrict the instances to only running on CPUs 2-9. + +It is possible to reserve a number of CPUs and an amount of RAM to Build OS +with the \c{buildos.cpu_reserved} and \c{buildos.ram_reserved} (in GiB) kernel +command line parameters. If unspecified, 4GiB of RAM is reserved by default. + +An amount of RAM can be reserved for auxiliary machines with +\c{buildos.ram_auxiliary}. This amount will also be divided into slices and +committed to each instance. + +Finally, if the total available RAM cannot be auto-detected, it can be +specified manually with \c{buildos.ram_total}. Here is a complete example of +specifying all the possible RAM values: + +\ +buildos.ram_total=64 +buildos.ram_reserved=4 +buildos.ram_auxiliary=12 +\ + +Assuming three instances, the configuration will assign 16GiB of build and +4GiB of auxiliary RAM to each instance and keep 4GiB reserved to Build OS. \h#config-storage|Storage| @@ -280,6 +304,12 @@ confirm over-provisioning, format the disk as \c{btrfs}, and label it as # ^D # Exit shell and reboot. \ +To create a single \c{btrfs} disk that spans multiple physical devices: + +\ +# mkfs.btrfs -L buildos.machines -d single -m single /dev/sda /dev/sdb +\ + \h#config-net|Network| Network is configured via DHCP. Initially, all Ethernet interfaces that have diff --git a/doc/style b/doc/style -Subproject 10f31a8bea8e5817fccf01978009c1ecaf3eabf +Subproject b72eb624d13b1628e27e9f6c0b3c80853e8e015 @@ -40,7 +40,15 @@ mount -t proc -o nodev,noexec,nosuid proc /proc info "init starting up..." -mount -t devtmpfs -o noexec,nosuid,mode=0755 udev /dev +mount -t devtmpfs -o nosuid,mode=0755 udev /dev + +# Prepare the /dev directory. +# +ln -s /proc/self/fd /dev/fd +ln -s /proc/self/fd/0 /dev/stdin +ln -s /proc/self/fd/1 /dev/stdout +ln -s /proc/self/fd/2 /dev/stderr + mkdir -p /dev/pts mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true @@ -70,13 +78,23 @@ udevadm trigger --type=subsystems --action=add udevadm trigger --type=devices --action=add udevadm settle || true +# On 6-series kernels we seem to be executed a lot earlier (or a lot faster) +# with many devices (Ethernet, USB storage) not being discovered yet (and +# devices that require firmware generally taking a while). So let's wait a +# bit for things to settle down. +# +for s in 5 4 3 2 1; do + info "waiting for devices ${s}s..." + sleep 1 +done + # Detect hardware sensors. # sensors-detect --auto # Initialize KVM. # -#if ! (modprobe kvm_intel || modprobe kvm_amd); then +#if ! (/sbin/modprobe kvm_intel || /sbin/modprobe kvm_amd); then # error "no virtualization support available (is it disabled in BIOS?)" #fi @@ -86,6 +104,11 @@ sensors-detect --auto # foo='foo fox' # bar="bar 'box'" # +# Or (as rewritten by GRUB): +# +# 'foo=foo fox' +# "bar=bar 'box'" +# # First we separete quoted variables and arguments with newlines (giving # priority to assignments). Then we replace whitespaces with newline on # lines that don't contain quotes. Finally, clean up by removing blank @@ -102,11 +125,24 @@ readarray -t cmdline < <(cat /proc/cmdline | \ # info "command line:" for v in "${cmdline[@]}"; do - var="$(sed -n -re 's/^buildos\.([^=]+)=.*$/\1/p' <<<"$v")" # Extract name. + + # Rewrite "x=y" as x="y" (as well as the single-quote variant). + # + v1="$(sed -n -re "s/^\"([^= ]+)=(.*)\"\$/\1=\"\2\"/p" <<<"$v")" + if [ -n "$v1" ]; then + v="$v1" + else + v1="$(sed -n -re "s/^'([^= ]+)=(.*)'\$/\1='\2'/p" <<<"$v")" + if [ -n "$v1" ]; then + v="$v1" + fi + fi + + var="$(sed -n -re 's/^buildos\.([^= ]+)=.*$/\1/p' <<<"$v")" # Extract name. if [ -n "$var" ]; then - val="$(sed -re 's/^[^=]+=(.*)$/\1/' <<<"$v")" # Extract value. - val="$(sed -re "s/^('(.*)'|\"(.*)\")$/\2\3/" <<<"$val")" # Strip quoted. + val="$(sed -re 's/^[^= ]+=(.*)$/\1/' <<<"$v")" # Extract value. + val="$(sed -re "s/^('(.*)'|\"(.*)\")\$/\2\3/" <<<"$val")" # Strip quoted. info " $var=$val" # If the variable contains a dot, then it is a toolchain variable and we @@ -242,10 +278,6 @@ priv_netbase="$(sed -e 's/^\(.*\)\.0\.0$/\1/' <<<"$priv_network")" # joins the bridge. Needless to say, constantly changing MAC will wreck # all kinds of networking havoc. # -# While bridge_hw should be (and used to be) enough, something was broken (bug -# #945466) and now we set it manually and also pass it to DHCP (which runs -# before post-up). -# cat <<EOF >/etc/network/interfaces auto lo iface lo inet loopback @@ -259,8 +291,6 @@ iface br0 inet dhcp bridge_maxwait 0 bridge_fd 0 bridge_hw $gmac - hwaddress $gmac - post-up ip link set br0 address $gmac post-up ip link set $eth txqueuelen 4000 post-up ip link set br0 txqueuelen 4000 @@ -275,7 +305,7 @@ iface br1 inet static bridge_maxwait 0 bridge_fd 0 bridge_hw $lmac - post-up ip link set br1 address $lmac + #post-up ip link set br1 address $lmac post-up ip link set br1 txqueuelen 4000 post-up iptables -t nat -A POSTROUTING -o br0 -j MASQUERADE post-up iptables -A FORWARD -i br0 -o br1 -m state --state RELATED,ESTABLISHED -j ACCEPT @@ -297,7 +327,8 @@ echo -n '' >$fstab l= state= -machines= +declare -A machines +machines_mode= while read l || [ -n "$l" ]; do d="$(sed -re 's/.*NAME=\"([^\"]+)\".*/\1/' <<<"$l")" t="$(sed -re 's/.*FSTYPE=\"([^\"]*)\".*/\1/' <<<"$l")" @@ -356,42 +387,29 @@ while read l || [ -n "$l" ]; do if [ "$l" = "machines" ]; then # Single mount. # - if [ -n "$machines" ]; then + if [ "$machines_mode" = "multiple" ]; then error "multiple disks labeled with buildos.machines/machines.*" fi m=/build/machines/default - machines="single" + machines["$m"]="${machines["$m"]} $d" + + machines_mode="single" else # Multiple mounts. # - if [ "$machines" = "single" ]; then + if [ "$machines_mode" = "single" ]; then error "multiple disks labeled with buildos.machines/machines.*" fi n="$(sed -n -re 's/^machines\.([^ ]+)$/\1/p' <<<"$l")" m="/build/machines/$n" - machines="multiple" - fi - - info "mounting $d (buildos.$l) on $m" + machines["$m"]="${machines["$m"]} $d" - # Check it. - # - if ! btrfs check -p "$d"; then - info "$d (buildos.$l) has errors; run btrfs check -p --repair $d" - error + machines_mode="multiple" fi - o="defaults,noatime,nodiratime,user_subvol_rm_allowed" - echo "$d $m btrfs $o 0 0" >>$fstab - - # Mount it and change the owner of the filesystem root. - # - mkdir -p "$m" - mount -t btrfs -o "$o" "$d" "$m" - chown build:build "$m" - + info "will be mounting $d (buildos.$l) on $m" continue fi done < <(lsblk --pairs --paths --output NAME,FSTYPE,LABEL) @@ -407,13 +425,47 @@ if [ -z "$state" ]; then error fi -if [ -z "$machines" ]; then +if [ "${#machines[@]}" -eq 0 ]; then info "no disks labaled with buildos.machines* among:" lsblk --paths --output NAME,TYPE,FSTYPE,SIZE,LABEL,UUID info "consider formatting and/or labelling a suitable disk" error fi +for m in "${!machines[@]}"; do + + ds="${machines["$m"]}" # Array-like list of devices. + + info "mounting $ds on $m" + + # Check the devices and collect them in options. + # + fd= + o= + for d in $ds; do + if ! btrfs check -p "$d"; then + info "$d has errors; run btrfs check -p --repair $d" + error + fi + + if [ -z "$fd" ]; then + fd="$d" + o="device=$d" + else + o="$o,device=$d" + fi + done + + o="$o,defaults,noatime,nodiratime,user_subvol_rm_allowed" + echo "$fd $m btrfs $o 0 0" >>$fstab + + # Mount it and change the owner of the filesystem root. + # + mkdir -p "$m" + mount -t btrfs -o "$o" "$fd" "$m" + chown build:build "$m" +done + # Create /build/tftp. We make it a size-limited tmpfs since potentially- # compromized VMs will be able to upload to. # @@ -1,6 +1,6 @@ : 1 name: buildos -version: 0.14.0-a.0.z +version: 0.17.0-a.0.z project: build2 summary: build2 Build OS license: other: TODO ; License is not yet decided, currently all rights reserved. @@ -17,5 +17,5 @@ src-url: https://git.build2.org/cgit/buildos/tree/ email: users@build2.org build-warning-email: builds@build2.org -depends: * build2 >= 0.13.0 -depends: * bpkg >= 0.13.0 +depends: * build2 >= 0.16.0- +depends: * bpkg >= 0.16.0- @@ -1,6 +1,8 @@ #! /usr/bin/env bash # Flatten a multi-line PXELINUX config file ignoring comments/blank lines. +# Can also be used for GRUB config files (note that GRUB rewrites x="y" as +# "x=y"). # # The text that appears after the 'append' clause in PXELINUX config files # must all be on a single line. There is no support even for line @@ -8,7 +10,8 @@ # # To help with this situation, this script will take a milti-line file (or # stdin if none is specified) and write flattened, single line output into -# stdout ignoring #-comments and blank lines while at it. +# stdout ignoring #-comments and blank lines while at it. Note that this +# script prints a leading space. # # If you have prepared your PXELINUX config file so that the 'append' clause # is the last line without a trailing newline (and without a trailing space), diff --git a/remove-machine b/remove-machine index 71c1fc1..13ce31f 100755 --- a/remove-machine +++ b/remove-machine @@ -69,7 +69,7 @@ sv=($(ssh "$host" "shopt -s nullglob; echo $machines/$mname/$mname-*-*/")) for d in "${sv[@]}"; do while ssh "$host" test -d "$d"; do echo "waiting for $d to disappear..." - sleep 10 + sleep 1 done done @@ -83,7 +83,9 @@ sv=($(ssh "$host" "shopt -s nullglob; echo $machines/$mname/$mname-*/")) for d in "${sv[@]}"; do set -x - ssh "$host" btrfs property set -ts "$d" ro false + # See upload-machine on the -f (force) flags. + # + ssh "$host" btrfs property set -f -ts "$d" ro false ssh "$host" btrfs subvolume delete "$d" { set +x; } 2>/dev/null done diff --git a/test-machine b/test-machine index 933a52e..cdd6001 100755 --- a/test-machine +++ b/test-machine @@ -6,6 +6,9 @@ # Network adapter to use, for example, virtio-net-pci (default), e1000, # or vmxnet3. # +# -t <tap> +# Existing tap interface to use instead of creating a new one (as tap9). +# usage="usage: $0 [-n <nic>] <machine-dir> [<qemu-option>...]" owd="$(pwd)" @@ -15,7 +18,14 @@ set -o errtrace # Trap in functions. function info () { echo "$*" 1>&2; } function error () { info "$*"; exit 1; } +br=br0 +mac="de:ad:be:ef:b8:da" + +arch="$(uname -m)" +kvm=("qemu-system-$arch" -enable-kvm) + nic=virtio-net-pci +etap= while [ "$#" -gt 0 ]; do case "$1" in @@ -24,6 +34,11 @@ while [ "$#" -gt 0 ]; do nic="$1" shift ;; + -t) + shift + etap="$1" + shift + ;; *) break ;; @@ -38,17 +53,16 @@ if [ -z "$dir" ]; then error "missing machine directory" fi -br=br0 -mac="de:ad:be:ef:b8:da" -kvm=(qemu-system-x86_64 -enable-kvm) - -tap=tap0 - -sudo ip tuntap delete "$tap" mode tap || true -sudo ip tuntap add "$tap" mode tap user "$(whoami)" -sudo ip link set "$tap" up -#sleep 0.5s -sudo ip link set "$tap" master "$br" +if [ -z "$etap" ]; then + tap=tap9 + sudo ip tuntap delete "$tap" mode tap || true + sudo ip tuntap add "$tap" mode tap user "$(whoami)" + sudo ip link set "$tap" up + #sleep 0.5s + sudo ip link set "$tap" master "$br" +else + tap="$etap" +fi "${kvm[@]}" \ -m 8G \ @@ -60,6 +74,9 @@ sudo ip link set "$tap" master "$br" -drive "if=none,id=disk0,file=$dir/disk.img,format=raw" \ -device "virtio-blk-pci,scsi=off,drive=disk0" \ \ + -usb \ + -device usb-tablet \ + \ -chardev stdio,id=qmp \ -mon chardev=qmp,mode=control,pretty=on \ \ @@ -84,7 +101,9 @@ sudo ip link set "$tap" master "$br" #-vnc "unix:$dir/vnc" \ #-monitor "unix:$dir/monitor,server,nowait" -sudo ip tuntap delete "$tap" mode tap +if [ -z "$etap" ]; then + sudo ip tuntap delete "$tap" mode tap +fi exit 0 diff --git a/test-machine-aarch64-m1 b/test-machine-aarch64-m1 new file mode 100755 index 0000000..990f5a8 --- /dev/null +++ b/test-machine-aarch64-m1 @@ -0,0 +1,115 @@ +#! /usr/bin/env bash + +# Test a virtual machine with KVM on Apple M1. Notes: +# +# - Need QEMU 7 or later. +# - Login via VNC to :5901. +# - Expect to find QEMU_EFI.fd and QEMU_VARS.fd next to disk.img. +# - Installing with -cdrom does not work, have to use scsi-cd: +# +# -device virtio-scsi-pci,id=scsi0 \ +# -drive if=none,id=cd,file=/tmp/debian-....iso \ +# -device scsi-cd,drive=cd +# +# - Replaced -usb (EHCI) with -device qemu-xhci (XHCI). +# - Added virtio-gpu-pci (otherwise no graphical output). +# - Added usb-kbd (otherwise no keyboard). +# - Note: position of netdev dictates enp0sN name, so keep first PCI device. +# - Must run on either P or E cores (https://gitlab.com/qemu-project/qemu/-/issues/1002). +# - On M1 0-3 are E, 4-7 are P (lscpu, lscpu -e). +# - On M1 Max 0-1 are E, 2-9 are P. +# +# -n <nic> +# Network adapter to use, for example, virtio-net-pci (default), e1000, +# or vmxnet3. +# +# -t <tap> +# Existing tap interface to use instead of creating a new one (as tap9). +# +usage="usage: $0 [-n <nic>] <machine-dir> [<qemu-option>...]" + +owd="$(pwd)" +trap "{ cd '$owd'; exit 1; }" ERR +set -o errtrace # Trap in functions. + +function info () { echo "$*" 1>&2; } +function error () { info "$*"; exit 1; } + +br=br0 +mac="de:ad:be:ef:b8:da" + +arch="$(uname -m)" +kvm=(taskset -c 4-7 "qemu-system-$arch" -enable-kvm) + +nic=virtio-net-pci +etap= + +while [ "$#" -gt 0 ]; do + case "$1" in + -n) + shift + nic="$1" + shift + ;; + -t) + shift + etap="$1" + shift + ;; + *) + break + ;; + esac +done + + +dir="${1%/}" +shift + +if [ -z "$dir" ]; then + error "missing machine directory" +fi + +if [ -z "$etap" ]; then + tap=tap9 + sudo ip tuntap delete "$tap" mode tap || true + sudo ip tuntap add "$tap" mode tap user "$(whoami)" + sudo ip link set "$tap" up + #sleep 0.5s + sudo ip link set "$tap" master "$br" +else + tap="$etap" +fi + +"${kvm[@]}" \ + -machine virt \ + \ + -m 4G \ + -cpu host -smp "4,sockets=1,cores=4,threads=1" \ + \ + -drive "if=pflash,format=raw,readonly=on,file=$dir/QEMU_EFI.fd" \ + -drive "if=pflash,format=raw,file=$dir/QEMU_VARS.fd" \ + \ + -netdev "tap,id=net0,ifname=$tap,script=no" \ + -device "$nic,netdev=net0,mac=$mac" \ + \ + -drive "if=none,id=disk0,file=$dir/disk.img,format=raw" \ + -device "virtio-blk-pci,scsi=off,drive=disk0" \ + \ + -device qemu-xhci \ + -device usb-kbd \ + -device usb-tablet \ + \ + -device virtio-gpu-pci \ + -display default,show-cursor=on \ + \ + -vnc :1 \ + \ + -chardev stdio,id=qmp \ + -mon chardev=qmp,mode=control,pretty=on \ + \ + -boot c "$@" + +if [ -z "$etap" ]; then + sudo ip tuntap delete "$tap" mode tap +fi diff --git a/upload-machine b/upload-machine index cffcd7b..b726429 100755 --- a/upload-machine +++ b/upload-machine @@ -14,7 +14,8 @@ usage="usage: $0 [<options>] <host> <new-subvol> [<old-subvol>]" owd="$(pwd)" trap "{ cd '$owd'; exit 1; }" ERR -set -o errtrace # Trap in functions. +set -o errtrace -o pipefail # Trap in functions. + # Fail if any pipe command fails. function info () { echo "$*" 1>&2; } function error () { info "$*"; exit 1; } @@ -85,25 +86,24 @@ oldsv_host="$machines/$mname/$oldsv_name" # Make sure subvolumes are read-only. # -function check_ro () # <subvol> +function make_ro () # <subvol> { local r; r="$(btrfs property get -ts "$1" ro)" if [ "$r" != "ro=true" ]; then - info "subvolume '$1' is not read-only; to change, run:" - info " btrfs property set -ts $1 ro true" - exit 1 + info "subvolume '$1' is read-write, changing to read-only" + btrfs property set -ts "$1" ro true fi } -check_ro "$newsv" +make_ro "$newsv" if [ -n "$oldsv" ]; then - check_ro "$oldsv" + make_ro "$oldsv" fi # btrfs send command # -send=(sudo btrfs send) +send=(btrfs send) if [ -n "$oldsv" ]; then send+=(-p "$oldsv") fi @@ -121,7 +121,15 @@ sudo "${send[@]}" | ssh "$host" sudo btrfs receive "$machines/$mname/" # Adjust machine ownership. # -ssh "$host" sudo btrfs property set -ts "$newsv_host" ro false +# Recent btrfs-progs require the force flag (-f) (Debian bug #1019377). +# Turns out btrfs now strips the subvolume uuid if we make it rw, which +# will prevent it from being used as a base for incremental send. +# +# @@ Maybe we should just keep the original as is (for incremental send) +# and make another copy where we change the ownership? Note: will also +# need to update remove-machine. +# +ssh "$host" sudo btrfs property set -f -ts "$newsv_host" ro false ssh "$host" sudo chown "$user:$user" "$newsv_host" ssh "$host" sudo chown "$user:$user" "$newsv_host/*" ssh "$host" btrfs property set -ts "$newsv_host" ro true @@ -5,7 +5,7 @@ # If the tftp server host is not specified, then build@build-cache is # assumed. The images are uploaded to /var/lib/tftpboot/buildos-devel/. # -usage="usage: $0 [<user>@<host>]" +usage="usage: $0 [-a <arch>] [<user>@<host>]" owd="$(pwd)" trap "{ cd '$owd'; exit 1; }" ERR @@ -14,6 +14,26 @@ set -o errtrace # Trap in functions. function info () { echo "$*" 1>&2; } function error () { info "$*"; exit 1; } +arch= + +while [ "$#" -gt 0 ]; do + case "$1" in + -a) + shift + arch="$1" + shift + break + ;; + *) + break + ;; + esac +done + +if [ -z "$arch" ]; then + arch="$(uname -m)" +fi + if [ -z "$1" ]; then host="build@build-cache" else @@ -25,5 +45,5 @@ fi # is a bit more disk space used to temporarily hold copies. # rsync -v --progress -lpt -c --copy-unsafe-links --delay-updates \ - buildos-image buildos-initrd buildos-buildid \ + "buildos-image-$arch" "buildos-initrd-$arch" "buildos-buildid-$arch" \ $host:/var/lib/tftpboot/buildos-devel/ |