2 files changed, 92 insertions, 28 deletions
diff --git a/buildos b/buildos
index dcdde65..25029b0 100755
--- a/buildos
+++ b/buildos
@@ -155,6 +155,7 @@ for tn in "${!toolchains[@]}"; do
   declare "${tp}toolchain_csum=$(sed -n -re 's%^.+\.([^.]+)$%\1%p' <<<"$tf")"
   declare "${tp}toolchain_root=/build/tftp/toolchains/$tn"
   declare "${tp}toolchain_ver="
+  declare "${tp}toolchain_fver=" # Full version (with snapshot).
 
   # If buildos.toolchain_trust was not specified, set it to "no" so that
   # we don't prompt if the repository happens to be signed.
@@ -182,12 +183,20 @@ fi
 ram_total="$(sed -n -re 's/^MemTotal: *([0-9]+) *kB$/\1/p' </proc/meminfo)"
 cpu_total="$(lscpu | sed -n -re 's/^CPU\(s\): *([0-9]+)$/\1/p')"
 
+if [ -z "$ram_overcommit" ]; then
+  ram_overcommit=1
+fi
+
+if [ -z "$cpu_overcommit" ]; then
+  cpu_overcommit=1
+fi
+
 ram_slice=$(("$ram_total" - 4 * 1024 * 1024))
 cpu_slice="$cpu_total"
 
 if [ "${#toolchain_names[@]}" -gt 1 ]; then
-  ram_slice=$(("$ram_slice" / "${#toolchain_names[@]}"))
-  cpu_slice=$(("$cpu_slice" / "${#toolchain_names[@]}"))
+  ram_slice=$(("$ram_slice" * "$ram_overcommit" / "${#toolchain_names[@]}"))
+  cpu_slice=$(("$cpu_slice" * "$cpu_overcommit" / "${#toolchain_names[@]}"))
 
   if [ "$cpu_slice" -eq 0 ]; then
     cpu_slice=1
@@ -198,15 +207,18 @@ fi
 #
 function print ()
 {
-  echo "cpu_total: $cpu_total"
-  echo "cpu_slice: $cpu_slice"
+  echo "cpu_total:      $cpu_total"
+  echo "cpu_overcommit: $cpu_overcommit"
+  echo "cpu_slice:      $cpu_slice"
+  echo
 
-  echo "ram_total: $ram_total kB"
-  echo "ram_slice: $ram_slice kB"
+  echo "ram_total:      $ram_total kB"
+  echo "ram_overcommit: $ram_overcommit"
+  echo "ram_slice:      $ram_slice kB"
   echo
 
-  echo "buildid:       $buildid"
-  echo "buildid_url:   $buildid_url"
+  echo "buildid:        $buildid"
+  echo "buildid_url:    $buildid_url"
   echo
 
   local n i tn tp tu tt
@@ -493,6 +505,8 @@ function toolchain_fetch () # <toolchain-prefix> <line>
 
     info "toolchain version $tv"
 
+    declare -g "${tp}toolchain_fver=$tv" # Full version.
+    echo "$tv" >"$tr/version-full"
     l="$(sed -n -re "s/^(.+)-$tv(.*)$/\1\2/p" <<<"$f")" # Use full version.
 
     # Strip snapshot.
@@ -542,6 +556,9 @@ function toolchain_fetch () # <toolchain-prefix> <line>
 
 # Bootstrap the toolchain.
 #
+# Return 0 on success, 1 if the toolchain is disabled, and 2 in case of
+# an error.
+#
 function toolchain_bootstrap () # <toolchain-name>
 {
   local tn="$1"
@@ -556,13 +573,24 @@ function toolchain_bootstrap () # <toolchain-name>
 
   readarray -t ls < <(sed -e '/^\s*#/d;/^\s*$/d' "$tr/$tf")
 
+  if [ "${#ls[@]}" -eq 0 ]; then
+    info "empty $tr/$tf"
+    return 2
+  fi
+
+  # Check if this toolchain is disabled.
+  #
+  if [ "${ls[0]}" = "disabled" ]; then
+    return 1
+  fi
+
   for l in "${ls[@]}"; do
     if ! toolchain_fetch "$tp" "$l"; then
-      return 1 # Diagnostics has already been issued.
+      return 2 # Diagnostics has already been issued.
     fi
   done
 
-  local tv="$(toolchain_value "$tp" toolchain_ver)" # Should be set by fetch().
+  local tv="$(toolchain_value "$tp" toolchain_fver)" # Set by fetch().
   local tt="$(toolchain_value "$tp" toolchain_trust)"
 
   # Save the repository certificate fingerprint into the trust file (used
@@ -578,7 +606,7 @@ function toolchain_bootstrap () # <toolchain-name>
   mkdir -p "$wd"
   mkdir -p "$id"
 
-  local r=1
+  local r=2
 
   cd "$wd"
   while true; do # The "breakout loop".
@@ -669,6 +697,7 @@ function bbot_start () # <toolchain-name> <toolchain-index>
   local ti="$2"
 
   local tp="${toolchains["$tn"]}"
+  local tv="$(toolchain_value "$tp" toolchain_fver)"
   local ts="$(toolchain_value "$tp" toolchain_file_csum)"
 
   local id="/build/bots/$tn"
@@ -727,12 +756,15 @@ function bbot_start () # <toolchain-name> <toolchain-index>
 	-e "s/^(Environment=RAM)=.*/\1=$ram_slice/" \
 	-e "s/^(Environment=TOOLCHAIN_ID)=.*/\1=$ts/" \
 	-e "s/^(Environment=TOOLCHAIN_NUM)=.*/\1=$ti/" \
+	-e "s/^(Environment=TOOLCHAIN_VER)=.*/\1=$tv/" \
 	"$id/lib/systemd/system/bbot-agent@.service"
 
-    # Patch in the controller URLs.
+    # Patch in the controller URLs. These can contain special characters
+    # like & so we have to escape them.
     #
     n="${tp}controller_url[@]"
     for i in "${!n}"; do
+      i="$(sed -e 's/[&/\]/\\&/g' <<<"$i")"
       sed -i -r \
           -e "s#^(Environment=\"CONTROLLER_URL=[^\"]*)\"\$#\1 $i\"#" \
 	  "$id/lib/systemd/system/bbot-agent@.service"
@@ -855,16 +887,26 @@ EOF
 
 	toolchain_bootstrap "$tn" 2>&1 | tee "$tr/toolchain-$count.log" 1>&2
 
-	if [ "${PIPESTATUS[0]}" -eq 0 ]; then
-	  v="$(cat $tr/version)"
-	  declare "${tp}toolchain_ver=$v"
-
-	  s="bootstrapped $tn toolchain $v"
-	  toolchain_boots+=("$tn")
-	else
-	  s="failed to bootstrap $tn toolchain, waiting for new version"
-	  toolchain_boots+=("") # Skip.
-	fi
+	case "${PIPESTATUS[0]}" in
+	  0)
+	    tv="$(cat $tr/version)"
+	    declare "${tp}toolchain_ver=$tv"
+
+	    tv="$(cat $tr/version-full)"
+	    declare "${tp}toolchain_fver=$tv"
+
+	    s="bootstrapped $tn toolchain $tv"
+	    toolchain_boots+=("$tn")
+	    ;;
+	  1)
+	    s="skipping disabled $tn toolchain, waiting for new version"
+	    toolchain_boots+=("") # Skip.
+	    ;;
+	  *)
+	    s="failed to bootstrap $tn toolchain, waiting for new version"
+	    toolchain_boots+=("") # Skip.
+	    ;;
+	esac
 
 	info "$s"
 	email "$s" <<EOF
@@ -893,6 +935,8 @@ EOF
 	continue
       fi
 
+      tp="${toolchains["$tn"]}"
+
       # Or those that have no controllers (maybe it would have been better
       # to build it but not start).
       #
diff --git a/doc/manual.cli b/doc/manual.cli
index 1acf526..0178738 100644
--- a/doc/manual.cli
+++ b/doc/manual.cli
@@ -178,6 +178,24 @@ sudo kvm \
 
 \h1#config|Configuration|
 
+\h#config-cpu-ram|CPU and RAM|
+
+A Build OS instances divides available CPUs and RAM (minus 4G) into \i{slices}
+that are then \i{committed} to each toolchain. If you don't expect your
+toolchains to utilize these resources at the same time, then it may make
+sense to overcommit them to improve utilization. The respective overcommit
+values can be specified as ratios with the \c{buildos.cpu_overcommit}
+and \c{buildos.ram_overcommit} kernel command line parameters. For example,
+given the following CPU overcommit:
+
+\
+buildos.cpu_overcommit=3/2
+\
+
+A Build OS machine with 8 CPUs (hardware threads) and three toolchains will
+assign 4 CPUs (\c{8 * 3/2 / 3}) to each slice.
+
+
 \h#config-storage|Storage|
 
 Build OS configures storage based on the labels assigned to disks and
@@ -324,12 +342,14 @@ for example, \c{buildos.toolchain_url.<name>} (values without the toolchain
 name use the toolchain name \c{default}). The toolchain name may not contain
 \c{-}.
 
-Each line in the checksums file is the output of the \c{shaNNNsum(1)} utility,
-that is, the SHANNN sum following by space, an asterisk (\c{*}) which signals
-the binary mode), and the relative file path. Blank lines and lines that start
-with \c{#} are ignored. The extension of the checksums file should be
-\c{.shaNNN} and the first entry should be for the \c{build2-toolchain} \c{tar}
-archive itself (used to derive the toolchain version). For example:
+In the checksums file blank lines and lines that start with \c{#} are ignored.
+If the first line is the special \c{disabled} value, then this toolchain is
+ignored. Otherwise, each line in the checksums file is the output of the
+\c{shaNNNsum(1)} utility, that is, the SHANNN sum following by space, an
+asterisk (\c{*}, which signals the binary mode), and the relative file path.
+The extension of the checksums file should be \c{.shaNNN} and the first line
+should be for the \c{build2-toolchain} \c{tar} archive itself (used to derive
+the toolchain version). For example:
 
 \
 # toolchain.sha256