aboutsummaryrefslogtreecommitdiff
path: root/buildos
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-05-24 15:51:50 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-05-24 15:51:50 +0200
commit45c9fbacc4b21efd54173bad11998ceffd175676 (patch)
treeefb8876ca8ee7554d94140aa1791991b104a4b49 /buildos
parent49cce16baa7ec26af089a50c9bcb6d2eebbcaf2a (diff)
Improve bbot diagnostics email
Diffstat (limited to 'buildos')
-rwxr-xr-xbuildos86
1 files changed, 63 insertions, 23 deletions
diff --git a/buildos b/buildos
index 78c16c2..0845ed1 100755
--- a/buildos
+++ b/buildos
@@ -976,6 +976,9 @@ EOF
# (not even the cursor) if there are no new entries. The new
# versions output the old cursor.
#
+ # Plus, it sometimes changes the cursor even without any errors in
+ # it (journal rewind/truncation maybe?) so we have to detect that.
+ #
c=(sudo journalctl --unit "bbot-agent@$tn")
# Get the last cursor if any.
@@ -985,36 +988,73 @@ EOF
c+=("--after-cursor" "$oc")
fi
- # Get the log range: the first line is the date of the first error
- # and the second line is the end cursor.
+ # Get the "log range": the first line is the date of the first
+ # error, the second line is the date of the last error, and the
+ # third line is the end cursor. It can also be just one line in
+ # which case it is the new cursor (that rewind stuff).
+ #
+ # Here is what's going on in that sed script:
+ #
+ # The first chunk matches the first line. We first put it into
+ # the hold space (in case that's the only line) and then extract
+ # and print the date.
+ #
+ # The second chunk matches the last line. We first handle the hold
+ # space which by now should contain the last error line and then
+ # the cursor.
+ #
+ # The last chunk matches every other line. We simply replace the
+ # hold space with the next line so that at the end we have the
+ # last line there.
#
lr="$("${c[@]}" --no-pager --quiet --output short-full \
---priority 4 --show-cursor | \
-sed -n -re '1s/^... ([^ ]+ [^ ]+) .*$/\1/p;s/^-- cursor: (.+)$/\1/p')"
-
- nc="$(sed -n -e '2p' <<<"$lr")"
+--priority 4 --show-cursor | sed -n -r \
+-e '1{h;s/^[MTWFS].. ([^ ]+ [^ ]+) .*$/\1/p;t}' \
+-e '${x;s/^[MTWFS].. ([^ ]+ [^ ]+) .*$/\1/p;x;s/^-- cursor: (.+)$/\1/p;t}' \
+-e 'h')"
+ lc="$(wc -l <<<"$lr")"
+ nc="$(sed -n -e "${lc}p" <<<"$lr")"
# If we have no new entries, then nothing to do.
#
- if [ -n "$nc" -a "$nc" != "$oc" ]; then
+ if [ "$nc" != "$oc" ]; then
- # Try to get some context for the first error entry. This is
- # unexpectedly hard in systemd. So we are going to get all the
- # entries that start a minute ago.
+ # We may have no actual entries (cursor rewind).
#
- sd="$(sed -n -e '1p' <<<"$lr")"
- sd="$(date '+%Y-%m-%d %H:%M:%S' -d "$sd +00:01")"
-
- s="bbot-agent@$tn service issued new diagnostics"
-
- info "$s"
- {
- echo "$tn.bbot_cmd: ssh build@$hname ${c[@]}";
- echo;
- echo;
- "${c[@]}" --no-pager --quiet --output short-full \
- --since "$sd" --lines 100
- } | email "$s"
+ if [ "$lc" -ne 1 ]; then
+
+ # Try to get some context before the first error and after the
+ # last. This is unexpectedly hard in systemd.
+ #
+ # This can be a lot of output which makes it hard to spot the
+ # error so we are going to print just the error summary first.
+ # Quite a mess, I agree.
+ #
+ sd="$(sed -n -e '1p' <<<"$lr")"
+ sd="$(date '+%s' -d "$sd")" # sec
+ sd="$(date '+%Y-%m-%d %H:%M:%S' -d "@$(($sd - 10))")" # -10sec
+
+ ed="$(sed -n -e '2p' <<<"$lr")"
+ ed="$(date '+%s' -d "$ed")" # sec
+ ed="$(date '+%Y-%m-%d %H:%M:%S' -d "@$(($ed + 10))")" # +10sec
+
+ s="bbot-agent@$tn service issued new diagnostics"
+
+ info "$s"
+ {
+ echo "$tn.bbot_cmd: ssh build@$hname ${c[@]}";
+ echo;
+ echo "summary:";
+ echo;
+ "${c[@]}" --no-pager --quiet --output short-full \
+ --priority 4 | head -n 200;
+ echo;
+ echo "context [$sd -- $ed]:";
+ echo;
+ "${c[@]}" --no-pager --quiet --output short-full \
+ --since "$sd" --until "$ed" | head -n 200
+ } | email "$s"
+ fi
toolchain_cursors["$tn"]="$nc"
fi