From 45c9fbacc4b21efd54173bad11998ceffd175676 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Wed, 24 May 2017 15:51:50 +0200
Subject: Improve bbot diagnostics email

---
 buildos | 86 +++++++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 23 deletions(-)

(limited to 'buildos')

diff --git a/buildos b/buildos
index 78c16c2..0845ed1 100755
--- a/buildos
+++ b/buildos
@@ -976,6 +976,9 @@ EOF
 	    # (not even the cursor) if there are no new entries. The new
 	    # versions output the old cursor.
 	    #
+	    # Plus, it sometimes changes the cursor even without any errors in
+	    # it (journal rewind/truncation maybe?) so we have to detect that.
+	    #
 	    c=(sudo journalctl --unit "bbot-agent@$tn")
 
 	    # Get the last cursor if any.
@@ -985,36 +988,73 @@ EOF
 	      c+=("--after-cursor" "$oc")
 	    fi
 
-	    # Get the log range: the first line is the date of the first error
-	    # and the second line is the end cursor.
+	    # Get the "log range": the first line is the date of the first
+	    # error, the second line is the date of the last error, and the
+	    # third line is the end cursor. It can also be just one line in
+	    # which case it is the new cursor (that rewind stuff).
+	    #
+	    # Here is what's going on in that sed script:
+	    #
+	    # The first chunk matches the first line. We first put it into
+	    # the hold space (in case that's the only line) and then extract
+	    # and print the date.
+	    #
+	    # The second chunk matches the last line. We first handle the hold
+	    # space which by now should contain the last error line and then
+	    # the cursor.
+	    #
+	    # The last chunk matches every other line. We simply replace the
+	    # hold space with the next line so that at the end we have the
+	    # last line there.
 	    #
 	    lr="$("${c[@]}" --no-pager --quiet --output short-full \
---priority 4 --show-cursor | \
-sed -n -re '1s/^... ([^ ]+ [^ ]+) .*$/\1/p;s/^-- cursor: (.+)$/\1/p')"
-
-	    nc="$(sed -n -e '2p' <<<"$lr")"
+--priority 4 --show-cursor | sed -n -r \
+-e '1{h;s/^[MTWFS].. ([^ ]+ [^ ]+) .*$/\1/p;t}' \
+-e '${x;s/^[MTWFS].. ([^ ]+ [^ ]+) .*$/\1/p;x;s/^-- cursor: (.+)$/\1/p;t}' \
+-e 'h')"
+	    lc="$(wc -l <<<"$lr")"
+	    nc="$(sed -n -e "${lc}p" <<<"$lr")"
 
 	    # If we have no new entries, then nothing to do.
 	    #
-	    if [ -n "$nc" -a "$nc" != "$oc" ]; then
+	    if [ "$nc" != "$oc" ]; then
 
-	      # Try to get some context for the first error entry. This is
-	      # unexpectedly hard in systemd. So we are going to get all the
-	      # entries that start a minute ago.
+	      # We may have no actual entries (cursor rewind).
 	      #
-	      sd="$(sed -n -e '1p' <<<"$lr")"
-	      sd="$(date '+%Y-%m-%d %H:%M:%S' -d "$sd +00:01")"
-
-	      s="bbot-agent@$tn service issued new diagnostics"
-
-	      info "$s"
-	      {
-		echo "$tn.bbot_cmd: ssh build@$hname ${c[@]}";
-		echo;
-		echo;
-		"${c[@]}" --no-pager --quiet --output short-full \
-			  --since "$sd" --lines 100
-	      } | email "$s"
+	      if [ "$lc" -ne 1 ]; then
+
+		# Try to get some context before the first error and after the
+		# last. This is unexpectedly hard in systemd.
+		#
+		# This can be a lot of output which makes it hard to spot the
+		# error so we are going to print just the error summary first.
+		# Quite a mess, I agree.
+		#
+		sd="$(sed -n -e '1p' <<<"$lr")"
+		sd="$(date '+%s' -d "$sd")"                           # sec
+		sd="$(date '+%Y-%m-%d %H:%M:%S' -d "@$(($sd - 10))")" # -10sec
+
+		ed="$(sed -n -e '2p' <<<"$lr")"
+		ed="$(date '+%s' -d "$ed")"                           # sec
+		ed="$(date '+%Y-%m-%d %H:%M:%S' -d "@$(($ed + 10))")" # +10sec
+
+		s="bbot-agent@$tn service issued new diagnostics"
+
+		info "$s"
+		{
+		  echo "$tn.bbot_cmd: ssh build@$hname ${c[@]}";
+		  echo;
+		  echo "summary:";
+		  echo;
+		  "${c[@]}" --no-pager --quiet --output short-full \
+			    --priority 4 | head -n 200;
+		  echo;
+		  echo "context [$sd -- $ed]:";
+		  echo;
+		  "${c[@]}" --no-pager --quiet --output short-full \
+			    --since "$sd" --until "$ed" | head -n 200
+		} | email "$s"
+	      fi
 
 	      toolchain_cursors["$tn"]="$nc"
 	    fi
-- 
cgit v1.1