1 files changed, 394 insertions, 67 deletions
diff --git a/doc/bash-style.cli b/doc/bash-style.cli
index ef81af2..4edc984 100644
--- a/doc/bash-style.cli
+++ b/doc/bash-style.cli
@@ -15,7 +15,7 @@
 \h1#intro|Introduction|
 
 Bash works best for simple tasks. Needing arrays, arithmetic, and so on, is
-usually a good indication that the task at hand is too complex for Bash.
+usually a good indication that the task at hand may be too complex for Bash.
 
 Most of the below rules can be broken if there is a good reason for it.
 Besides making things consistent, rules free you from having to stop and think
@@ -31,8 +31,11 @@ the former provides a lot more rationale compared to this guide.
 \h1#style|Style|
 
 Don't use any extensions for your scripts. That is, call it just \c{foo}
-rather than \c{foo.sh} or \c{foo.bash}. Use lower-case letters and dash
-to separate words, for example \c{foo-bar}.
+rather than \c{foo.sh} or \c{foo.bash} (though we do use the \c{.bash}
+extension for
+\l{https://build2.org/build2/doc/build2-build-system-manual.xhtml#module-bash
+Bash modules}). Use lower-case letters and dash to separate words, for example
+\c{foo-bar}.
 
 Indentation is two spaces (not tabs). Maximum line length is 79 characters
 (excluding newline). Use blank lines between logical blocks to improve
@@ -45,16 +48,29 @@ For \c{if}/\c{while} and \c{for}/\c{do} the corresponding \c{then} or \c{do}
 is written on the same line after a semicolon, for example:
 
 \
-if [ ... ]; then
+if [[ ... ]]; then
+  ...
 fi
 
 for x in ...; do
+  ...
 done
 \
 
-For \c{if} use \c{[ ]} for basic tests and \c{[[ ]]} only if the previous form
-is not sufficient. Use \c{test} for filesystem tests (presence of files,
-etc). Do use \c{elif}.
+Do use \c{elif} instead of nested \c{else} and \c{if} (and consider if
+\c{case} can be used instead).
+
+For \c{if}/\c{while} use \c{[[ ]]} since it results in cleaner code for
+complex expressions, for example:
+
+\
+if [[ \"$foo\" && (\"$bar\" || \"$baz\") ]]; then
+  ...
+fi
+\
+
+\N|If for some reason you need the semantics of \c{[}, use \c{test} instead to
+make it clear this is intentional.|
 
 \h1#struct|Structure|
 
@@ -73,7 +89,10 @@ usage=\"usage: $0 <OPTIONS>\"
 
 owd=\"$(pwd)\"
 trap \"{ cd '$owd'; exit 1; }\" ERR
-set -o errtrace # Trap in functions.
+set -o errtrace   # Trap in functions and subshells.
+set -o pipefail   # Fail if any pipeline command fails.
+shopt -s lastpipe # Execute last pipeline command in the current shell.
+shopt -s nullglob # Expand no-match globs to nothing rather than themselves.
 
 function info () { echo \"$*\" 1>&2; }
 function error () { info \"$*\"; exit 1; }
@@ -131,7 +150,7 @@ file=
 Parse the command line options/arguments. For example:
 
 \
-while [ \"$#\" -gt 0 ]; do
+while [[ \"$#\" -gt 0 ]]; do
   case \"$1\" in
     -q)
       quiet=\"y\"
@@ -143,7 +162,7 @@ while [ \"$#\" -gt 0 ]; do
       shift
       ;;
     *)
-      if [ -n \"$file\" ]; then
+      if [[ -n \"$file\" ]]; then
         error \"$usage\"
       fi
 
@@ -155,19 +174,19 @@ done
 \
 
 If the value you are expecting from the command line is a directory path,
-the always strip the trailing slash (as shown above for the \c{-t} option).
+then always strip the trailing slash (as shown above for the \c{-t} option).
 
 \h#struct-opt-arg-valid|OPTIONS-ARGUMENTS-VALIDATION|
 
 Validate option/argument values. For example:
 
 \
-if [ -z \"$file\" ]; then
+if [[ -z \"$file\" ]]; then
   error \"$usage\"
 fi
 
-if [ ! -d \"$file\" ]; then
-  fail \"'$file' does not exist or is not a directory\"
+if [[ ! -d \"$file\" ]]; then
+  error \"'$file' does not exist or is not a directory\"
 fi
 \
 
@@ -182,11 +201,14 @@ functions, then define them just before use.
 We quote every variable expansion, no exceptions. For example:
 
 \
-if [ -n \"$foo\" ]; then
+if [[ -n \"$foo\" ]]; then
   ...
 fi
 \
 
+\N|While there is no word splitting in the \c{[[ ]]} context, we still quote
+variable expansions for consistency.|
+
 This also applies to command substitution (which we always write as
 \c{$(foo arg)} rather than \c{`foo arg`}), for example:
 
@@ -201,28 +223,42 @@ list=\"$(basename \"$1\")\"
 \
 
 We also quote values that are \i{strings} as opposed to options/file names,
-paths, or integers. If setting a variable that will contain one of these
-unquoted values, try to give it a name that reflects its type (e.g.,
-\c{foo_file} rather than \c{foo_name}). Prefer single quotes for \c{sed}
+paths, enum-like values, or integers. Prefer single quotes for \c{sed}
 scripts, for example:
 
 \
-proto=\"https\"
-quiet=\"y\"
-verbosity=1
-dir=/etc
-out=/dev/null
-file=manifest
-seds='s%^./%%'
+url=\"https://example.org\"  # String.
+quiet=y                    # Enum-like.
+verbosity=1                # Integer.
+dir=/etc                   # Directory path.
+out=/dev/null              # File path.
+file=manifest              # File name.
+option=--quiet             # Option name.
+seds='s%^./%%'             # sed script.
 \
 
-Note that quoting will inhibit globbing so you may end up with expansions
-along these lines:
+Take care to quote globs that are not meant to be expanded, for example:
+
+\
+unset \"array[0]\"
+\
+
+And since quoting will inhibit globbing, you may end up with expansions along
+these lines:
 
 \
 rm -f \"$dir/$name\".*
 \
 
+Note also that globbing is not performed in the \c{[[ ]]} context so this is
+ok:
+
+\
+if [[ -v array[0] ]]; then
+  ...
+fi
+\
+
 \N|One exception to this quoting rule is arithmetic expansion (\c{$((\ ))}):
 Bash treats it as if it was double-quoted and, as a result, any inner quoting
 is treated literally. For example:
@@ -243,14 +279,13 @@ typical example of a space-aware argument handling:
 \
 files=()
 
-while [ \"$#\" -gt 0 ]; do
+while [[ \"$#\" -gt 0 ]]; do
   case \"$1\" in
 
     ...
 
     *)
-      shift
-      files=(\"${files[@]}\" \"$1\")
+      files+=(\"$1\")
       shift
       ;;
   esac
@@ -279,58 +314,87 @@ echo \"files: ${files[@]}\"  # $1='files: one', $2='2 two', $3='three'
 echo \"files: ${files[*]}\"  # $1='files: one 2 two three'
 \
 
-\h1#trap|Trap|
 
-Our scripts use the error trap to automatically terminate the script in case
-any command fails. If you need to check the exit status of a command, use
-\c{if}, for example:
+\h1#bool|Boolean|
 
-\
-if grep \"foo\" /tmp/bar; then
-  info \"found\"
-fi
+For boolean values use empty for false and \c{true} for true. This way you
+can have terse and natural looking conditions, for example:
 
-if ! grep \"foo\" /tmp/bar; then
-  info \"not found\"
-fi
 \
+first=true
+while ...; do
 
-Note that the \c{if}-condition can be combined with capturing the output, for
-example:
+  if [[ ! \"$first\" ]]; then
+     ...
+  fi
 
-\
-if v=\"$(...)\"; then
-  ...
-fi
+  if [[ \"$first\" ]]; then
+     first=
+  fi
+
+done
 \
 
-If you need to ignore the exit status, you can use \c{|| true}, for example:
+
+\h1#subshell|Subshell|
+
+Bush executes certain constructs in \i{subshells} and some of these constructs
+may not be obvious:
+
+\ul|
+
+\li|Explicit subshell: \c{(...)}|
+
+\li|Pipeline: \c{...|...}|
+
+\li|Command substitution: \c{$(...)}|
+
+\li|Process substitution: \c{<(...)}, \c{>(...)}|
+
+\li|Background: \c{...&}, \c{coproc ...}|
+
+|
+
+Naturally, a subshell cannot modify any state in the parent shell, which
+sometimes leads to counter-intuitive behavior, for example:
 
 \
-foo || true
-\
+lines=()
 
+... | while read l; do
+  lines+=(\"$l\")
+done
+\
 
-\h1#bool|Boolean|
+At the end of the loop, \c{lines} will remain empty since the loop body is
+executed in a subshell. One way to resolve this is to use the program
+substitution instead of the pipeline:
 
-For boolean values use empty for false and \c{true} for true. This way you
-can have terse and natural looking conditions, for example:
+\
+lines=()
 
+while read l; do
+  lines+=(\"$l\")
+done < <(...)
 \
-first=true
-while ...; do
 
-  if [ ! \"$first\" ]; then
-     ...
-  fi
+This, however, results in an unnatural, backwards-looking (compared to the
+pipeline) code. Instead, we can request the last command of the pipeline to be
+executed in the parent shell with the \c{lastpipe} shell option, for example:
 
-  if [ \"$first\" ]; then
-     first=
-  fi
+\
+shopt -s lastpipe
 
+lines=()
+
+... | while read l; do
+  lines+=(\"$l\")
 done
 \
 
+\N|The \c{lastpipe} shell option is inherited by functions and subshells.|
+
+
 \h1#function|Functions|
 
 If a function takes arguments, provide a brief usage after the function
@@ -347,8 +411,8 @@ For non-trivial/obvious functions also provide a short description of its
 functionality/purpose, for example:
 
 \
-# Prepare a distribution of the specified packages and place it into the
-# specified directory.
+# Prepare a distribution of the specified packages and place it
+# into the specified directory.
 #
 function dist() # <pkg> <dir>
 {
@@ -367,7 +431,7 @@ function dist()
 
 If the evaluation of the value may fail (e.g., it contains a program
 substitution), then place the assignment on a separate line since \c{local}
-will cause the error to be ignore. For example:
+will cause the error to be ignored. For example:
 
 \
 function dist()
@@ -377,10 +441,273 @@ function dist()
 }
 \
 
+A function can return data in two primary ways: exit code and stdout.
+Normally, exit code 0 means success and exit code 1 means failure though
+additional codes can be used to distinguish between different kinds of
+failures (for example, \"hard\" and \"soft\" failures), signify special
+conditions, etc., see \l{#error-handing Error Handling} for details.
+
+A function can also write to stdout with the result available to the caller in
+the same way as from programs (command substitution, pipeline, etc). If a
+function needs to return multiple values, then it can print them separated
+with newlines with the caller using the \c{readarray} builtin to read them
+into an indexed array, for example:
+
+\
+function func ()
+{
+  echo one
+  echo two
+  echo three
+}
+
+func | readarray -t r
+\
+
+\N|The use of the newline as a separator means that values may not contain
+newlines. While \c{readarray} supports specifying a custom separator with the
+\c{-d} option, including a \c{NUL} separator, this support is only available
+since Bash 4.4.|
+
+This technique can also be extended to return an associative array by first
+returning the values as an indexed array and then converting them to
+an associative array with \c{eval}, for example:
+
+\
+function func ()
+{
+  echo \"[a]=one\"
+  echo \"[b]=two\"
+  echo \"[c]=three\"
+}
+
+func | readarray -t ia
+
+eval declare -A aa=(\"${ia[@]}\")
+\
+
+Note that if a key or a value contains whitespaces, then it must be quoted.
+The recommendation is to always quote both, for example:
+
+\
+function func ()
+{
+  echo \"['a']='one ONE'\"
+  echo \"['b']='two'\"
+  echo \"['c']='three'\"
+}
+\
+
+Or, if returning a local array:
+
+\
+function func ()
+{
+  declare -A a=([a]='one ONE' [b]=two [c]=three)
+
+  for k in \"${!a[@]}\"; do
+    echo \"['$k']='${a[$k]}'\"
+  done
+}
+\
+
 For more information on returning data from functions, see
 \l{https://mywiki.wooledge.org/BashFAQ/084 BashFAQ#084}.
 
-For more information on writing reusable functions, see
-\l{https://stackoverflow.com/questions/11369522/bash-utility-script-library
-Bash Utility Script Library}.
+
+\h1#error-handing|Error Handling|
+
+Our scripts use the \c{ERR} trap to automatically terminate the script in case
+any command fail. This semantics is also propagated to functions and subshells
+by specifying the \c{errtrace} shell option and to all the commands of a
+pipeline by specifying the \c{pipefail} option.
+
+\N|Without \c{pipefail}, a non-zero exit of any command in the pipeline except
+the last is ignored. The \c{pipefail} shell option is inherited by functions
+and subshells.|
+
+\N|While the \c{nounset} options may also seem like a good idea, it has
+subtle, often latent pitfalls that make it more trouble than it's worth (see
+\l{https://mywiki.wooledge.org/BashPitfalls#nounset \c{nounset} pitfalls}).|
+
+The \c{pipefail} semantics is not without pitfalls which should be kept in
+mind. In particular, if a command in a pipeline exits before reading the
+preceding command's output in its entirety, such a command may exit with a
+non-zero exit status (see \l{https://mywiki.wooledge.org/BashPitfalls#pipefail
+\c{pipefail} pitfalls} for details).
+
+\N|Note that in such a situation the preceding command may exit with zero
+status not only because it gracefully handled \c{SIGPIPE} but also because all
+of its output happened to fit into the pipe buffer.|
+
+For example, these are the two common pipelines that may exhibit this issue:
+
+\
+prog | head -n 1
+prog | grep -q foo
+\
+
+In these two cases, the simplest (though not the most efficient) way to work
+around this issue is to reimplement \c{head} with \c{sed} and to get rid of
+\c{-q} in \c{grep}, for example:
+
+\
+prog | sed -n -e '1p'
+prog | grep foo >/dev/null
+\
+
+If you need to check the exit status of a command, use \c{if}, for example:
+
+\
+if grep -q \"foo\" /tmp/bar; then
+  info \"found\"
+fi
+
+if ! grep -q \"foo\" /tmp/bar; then
+  info \"not found\"
+fi
+\
+
+Note that the \c{if}-condition can be combined with capturing the output, for
+example:
+
+\
+if v=\"$(...)\"; then
+  ...
+fi
+\
+
+But keep in mind that in Bash a failure is often indistinguishable from a
+true/false result. For example, in the above \c{grep} command, the result will
+be the same whether there is no match or if the file does not exist.
+
+Furthermore, in certain contexts, the above-mentioned \c{ERR} trap is ignored.
+Quoting from the Bash manual:
+
+\i{The \c{ERR} trap is not executed if the failed command is part of the
+command list immediately following an \c{until} or \c{while} keyword, part of
+the test following the \c{if} or \c{elif} reserved words, part of a command
+executed in a \c{&&} or \c{||} list except the command following the final
+\c{&&} or \c{||}, any command in a pipeline but the last, or if the command’s
+return status is being inverted using \c{!}. These are the same conditions
+obeyed by the \c{errexit} (\c{-e}) option.}
+
+To illustrate the gravity of this point, consider the following example:
+
+\
+function cleanup()
+{
+  cd \"$1\"
+  rm -f *
+}
+
+if ! cleanup /no/such/dir; then
+  ...
+fi
+\
+
+Here, the \c{cleanup()} function will continue executing (and may succeed)
+even if the \c{cd} command has failed.
+
+Note, however, that notwithstanding the above statement from the Bash manual,
+the \c{ERR} trap is executed inside all the subshell commands of a pipeline
+provided the \c{errtrace} option is specified. As a result, the above code can
+be made to work by temporarily disabling \c{pipefail} and reimplementing it as
+a pipeline:
+
+\
+set +o pipefail
+cleanup /no/such/dir | cat
+r=\"${PIPESTATUS[0]}\"
+set -o pipefail
+
+if [[ \"$r\" -ne 0 ]]; then
+  ...
+fi
+\
+
+\N|Here, if \c{cleanup}'s \c{cd} fails, the \c{ERR} trap will be executed in
+the subshell, causing it to exit with an error status, which the parent shell
+then makes available in \c{PIPESTATUS}.|
+
+The recommendation is then to avoid calling functions in contexts where the
+\c{ERR} trap is ignored resorting to the above pipe trick where that's not
+possible.  And to be mindful of the potential ambiguity between the true/false
+result and failure for other commands. The use of the \c{&&} and \c{||}
+command expressions is best left to the interactive shell.
+
+\N|The pipe trick cannot be used if the function needs to modify the global
+state. Such a function, however, might as well return the exit status also as
+part of the global state. The pipe trick can also be used to ignore the exit
+status of a command.|
+
+The pipe trick can also be used to distinguish between different exit codes,
+for example:
+
+\
+function func()
+{
+  bar  # If this command fails, the function returns 1.
+
+  if ... ; then
+    return 2
+  fi
+}
+
+set +o pipefail
+func | cat
+r=\"${PIPESTATUS[0]}\"
+set -o pipefail
+
+case \"$r\" in
+  0)
+    ;;
+  1)
+    exit 1
+    ;;
+  2)
+    ...
+    ;;
+esac
+\
+
+\N|In such functions it makes sense to keep exit code 1 to mean failure so
+that the inherited \c{ERR} trap can be re-used.|
+
+This technique can be further extended to implement functions that both
+return multiple exit codes and produce output, for example:
+
+\
+function func()
+{
+  bar  # If this command fails, the function returns 1.
+
+  if ... ; then
+    return 2
+  fi
+
+  echo result
+}
+
+set +o pipefail
+func | readarray -t ro
+r=\"${PIPESTATUS[0]}\"
+set -o pipefail
+
+case \"$r\" in
+  0)
+    echo \"${ro[0]}\"
+    ;;
+  1)
+    exit 1
+    ;;
+  2)
+    ...
+    ;;
+esac
+\
+
+\N|We use \c{readarray} instead of \c{read} since the latter fails if the left
+hand side of the pipeline does not produce anything.|
+
 "