1 files changed, 3478 insertions, 0 deletions
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
new file mode 100644
index 0000000..d53c46e
--- /dev/null
+++ b/mod/mod-ci-github.cxx
@@ -0,0 +1,3478 @@
+// file      : mod/mod-ci-github.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#include <mod/mod-ci-github.hxx>
+
+#include <libbutl/json/parser.hxx>
+
+#include <web/xhtml/serialization.hxx>
+#include <web/server/mime-url-encoding.hxx> // mime_url_encode()
+
+#include <mod/jwt.hxx>
+#include <mod/hmac.hxx>
+#include <mod/build.hxx> // build_log_url()
+#include <mod/module-options.hxx>
+
+#include <mod/mod-ci-github-gq.hxx>
+#include <mod/mod-ci-github-post.hxx>
+#include <mod/mod-ci-github-service-data.hxx>
+
+#include <cerrno>
+#include <cstdlib>   // strtoull()
+#include <stdexcept>
+
+// Resources:
+//
+//    Creating an App:
+//    https://docs.github.com/en/apps/creating-github-apps/about-creating-github-apps/best-practices-for-creating-a-github-app
+//
+//    Webhooks:
+//    https://docs.github.com/en/webhooks/using-webhooks/best-practices-for-using-webhooks
+//    https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries
+//
+//    REST API:
+//    All docs:       https://docs.github.com/en/rest#all-docs
+//    Best practices: https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api
+//
+//    GraphQL API:
+//    Reference: https://docs.github.com/en/graphql/reference
+//
+
+using namespace std;
+using namespace butl;
+using namespace web;
+using namespace brep::cli;
+
+namespace brep
+{
+  ci_github::
+  ci_github (tenant_service_map& tsm)
+      : tenant_service_map_ (tsm)
+  {
+  }
+
+  ci_github::
+  ci_github (const ci_github& r, tenant_service_map& tsm)
+      : database_module (r),
+        ci_start (r),
+        options_ (r.initialized_ ? r.options_ : nullptr),
+        tenant_service_map_ (tsm)
+  {
+  }
+
+  void ci_github::
+  init (scanner& s)
+  {
+    HANDLER_DIAG;
+
+    {
+      shared_ptr<tenant_service_base> ts (
+        dynamic_pointer_cast<tenant_service_base> (shared_from_this ()));
+
+      assert (ts != nullptr); // By definition.
+
+      tenant_service_map_["ci-github"] = move (ts);
+    }
+
+    options_ = make_shared<options::ci_github> (
+      s, unknown_mode::fail, unknown_mode::fail);
+
+    // Prepare for the CI requests handling, if configured.
+    //
+    if (options_->ci_github_app_webhook_secret_specified ())
+    {
+      if (!options_->build_config_specified ())
+        fail << "package building functionality must be enabled";
+
+      if (!options_->ci_github_app_id_private_key_specified ())
+        fail << "no app id/private key mappings configured";
+
+      for (const auto& pr: options_->ci_github_app_id_private_key ())
+      {
+        if (pr.second.relative ())
+          fail << "ci-github-app-id-private-key path must be absolute";
+      }
+
+      // Read the webhook secret from the configured path.
+      //
+      {
+        const path& p (options_->ci_github_app_webhook_secret ());
+
+        if (p.relative ())
+          fail << "ci-github-app-webhook-secret path must be absolute";
+
+        try
+        {
+          ifdstream is (p);
+          getline (is, webhook_secret_, '\0');
+
+          // Trim leading/trailing whitespaces (presumably GitHub does the
+          // same in its web UI).
+          //
+          if (trim (webhook_secret_).empty ())
+            fail << "empty webhook secret in " << p;
+        }
+        catch (const io_error& e)
+        {
+          fail << "unable to read webhook secret from " << p << ": " << e;
+        }
+      }
+
+      ci_start::init (make_shared<options::ci_start> (*options_));
+
+      database_module::init (*options_, options_->build_db_retry ());
+    }
+  }
+
+  bool ci_github::
+  handle (request& rq, response&)
+  {
+    using namespace bpkg;
+
+    HANDLER_DIAG;
+
+    if (build_db_ == nullptr)
+      throw invalid_request (501, "GitHub CI submission not implemented");
+
+    // Process headers.
+    //
+    string event; // Webhook event.
+    string hmac;  // Received HMAC.
+    try
+    {
+      bool content_type (false);
+
+      for (const name_value& h: rq.headers ())
+      {
+        // HMAC authenticating this request. Note that it won't be present
+        // unless a webhook secret has been set in the GitHub app's settings.
+        //
+        if (icasecmp (h.name, "x-hub-signature-256") == 0)
+        {
+          if (!h.value)
+            throw invalid_request (400, "missing x-hub-signature-256 value");
+
+          // Parse the x-hub-signature-256 header value. For example:
+          //
+          // sha256=5e82258...
+          //
+          // Check for the presence of the "sha256=" prefix and then strip it
+          // to leave only the HMAC value.
+          //
+          if (h.value->find ("sha256=", 0, 7) == string::npos)
+            throw invalid_request (400, "invalid x-hub-signature-256 value");
+
+          hmac = h.value->substr (7);
+        }
+        // This event's UUID.
+        //
+        else if (icasecmp (h.name, "x-github-delivery") == 0)
+        {
+          // @@ TODO Check that delivery UUID has not been received before
+          //         (replay attack).
+        }
+        else if (icasecmp (h.name, "content-type") == 0)
+        {
+          if (!h.value)
+            throw invalid_request (400, "missing content-type value");
+
+          if (icasecmp (*h.value, "application/json") != 0)
+          {
+            throw invalid_request (400,
+                                   "invalid content-type value: '" + *h.value +
+                                   '\'');
+          }
+
+          content_type = true;
+        }
+        // The webhook event.
+        //
+        else if (icasecmp (h.name, "x-github-event") == 0)
+        {
+          if (!h.value)
+            throw invalid_request (400, "missing x-github-event value");
+
+          event = *h.value;
+        }
+      }
+
+      if (!content_type)
+        throw invalid_request (400, "missing content-type header");
+
+      if (event.empty ())
+        throw invalid_request (400, "missing x-github-event header");
+
+      if (hmac.empty ())
+        throw invalid_request (400, "missing x-hub-signature-256 header");
+    }
+    catch (const invalid_request& e)
+    {
+      error << "request header error: " << e.content;
+      throw;
+    }
+
+    // Read the entire request body into a buffer because we need to compute
+    // an HMAC over it and then parse it as JSON. The alternative of reading
+    // from the stream twice works out to be more complicated (see also a TODO
+    // item in web/server/module.hxx).
+    //
+    string body;
+    {
+      // Note that even though we may not need caching right now, we may later
+      // (e.g., to support cancel) so let's just enable it right away.
+      //
+      size_t limit (128 * 1024);
+
+      istream& is (rq.content (limit, limit));
+
+      try
+      {
+        getline (is, body, '\0');
+      }
+      catch (const io_error& e)
+      {
+        fail << "unable to read request body: " << e;
+      }
+    }
+
+    // Verify the received HMAC.
+    //
+    // Compute the HMAC value over the request body using the configured
+    // webhook secret as key and compare it to the received HMAC.
+    //
+    try
+    {
+      string h (compute_hmac (*options_,
+                              body.data (),
+                              body.size (),
+                              webhook_secret_.c_str ()));
+
+      if (!icasecmp (h, hmac))
+      {
+        string m ("computed HMAC does not match received HMAC");
+
+        error << m;
+
+        throw invalid_request (400, move (m));
+      }
+    }
+    catch (const system_error& e)
+    {
+      fail << "unable to compute request HMAC: " << e;
+    }
+
+    // Process the `app-id` and `warning` webhook request query parameters.
+    //
+    uint64_t app_id;
+    bool warning_success;
+    {
+      const name_values& rps (rq.parameters (1024, true /* url_only */));
+
+      bool ai (false), wa (false);
+
+      auto badreq = [] (const string& m)
+      {
+        throw invalid_request (400, m);
+      };
+
+      for (const name_value& rp: rps)
+      {
+        if (rp.name == "app-id")
+        {
+          if (!rp.value)
+            badreq ("missing 'app-id' webhook query parameter value");
+
+          ai = true;
+
+          // Parse the app id value.
+          //
+          const char* b (rp.value->c_str ());
+          char* e (nullptr);
+          errno = 0; // We must clear it according to POSIX.
+          app_id = strtoull (b, &e, 10);
+          if (errno == ERANGE || e == b || *e != '\0')
+          {
+            badreq ("invalid 'app-id' webhook query parameter value: '" +
+                    *rp.value + '\'');
+          }
+        }
+        else if (rp.name == "warning")
+        {
+          if (!rp.value)
+            badreq ("missing 'warning' webhook query parameter value");
+
+          wa = true;
+          const string& v (*rp.value);
+
+          if      (v == "success") warning_success = true;
+          else if (v == "failure") warning_success = false;
+          else
+            badreq ("invalid 'warning' webhook query parameter value: '" + v +
+                    '\'');
+        }
+      }
+
+      if (!ai) badreq ("missing 'app-id' webhook query parameter");
+      if (!wa) badreq ("missing 'warning' webhook query parameter");
+    }
+
+    // There is a webhook event (specified in the x-github-event header) and
+    // each event contains a bunch of actions (specified in the JSON request
+    // body).
+    //
+    // Note: "GitHub continues to add new event types and new actions to
+    // existing event types." As a result we ignore known actions that we are
+    // not interested in and log and ignore unknown actions. The thinking here
+    // is that we want to be "notified" of new actions at which point we can
+    // decide whether to ignore them or to handle.
+    //
+    if (event == "check_suite")
+    {
+      gh_check_suite_event cs;
+      try
+      {
+        json::parser p (body.data (), body.size (), "check_suite event");
+
+        cs = gh_check_suite_event (p);
+      }
+      catch (const json::invalid_json_input& e)
+      {
+        string m ("malformed JSON in " + e.name + " request body");
+
+        error << m << ", line: " << e.line << ", column: " << e.column
+              << ", byte offset: " << e.position << ", error: " << e;
+
+        throw invalid_request (400, move (m));
+      }
+
+      if (cs.check_suite.app_id != app_id)
+      {
+        fail << "webhook check_suite app.id " << cs.check_suite.app_id
+             << " does not match app-id query parameter " << app_id;
+      }
+
+      if (cs.action == "requested")
+      {
+        // Branch pushes are handled in handle_branch_push() so ignore this
+        // event.
+        //
+        return true;
+      }
+      else if (cs.action == "rerequested")
+      {
+        // Someone manually requested to re-run all the check runs in this
+        // check suite. Treat as a new request.
+        //
+        return handle_check_suite_rerequest (move (cs), warning_success);
+      }
+      else if (cs.action == "completed")
+      {
+        // GitHub thinks that "all the check runs in this check suite have
+        // completed and a conclusion is available". Check with our own
+        // bookkeeping and log an error if there is a mismatch.
+        //
+        return handle_check_suite_completed (move (cs), warning_success);
+      }
+      else
+      {
+        // Ignore unknown actions by sending a 200 response with empty body
+        // but also log as an error since we want to notice new actions.
+        //
+        error << "unknown action '" << cs.action << "' in check_suite event";
+
+        return true;
+      }
+    }
+    else if (event == "check_run")
+    {
+      gh_check_run_event cr;
+      try
+      {
+        json::parser p (body.data (), body.size (), "check_run event");
+
+        cr = gh_check_run_event (p);
+      }
+      catch (const json::invalid_json_input& e)
+      {
+        string m ("malformed JSON in " + e.name + " request body");
+
+        error << m << ", line: " << e.line << ", column: " << e.column
+              << ", byte offset: " << e.position << ", error: " << e;
+
+        throw invalid_request (400, move (m));
+      }
+
+      if (cr.check_run.app_id != app_id)
+      {
+        fail << "webhook check_run app.id " << cr.check_run.app_id
+             << " does not match app-id query parameter " << app_id;
+      }
+
+      if (cr.action == "rerequested")
+      {
+        // Someone manually requested to re-run a specific check run.
+        //
+        return handle_check_run_rerequest (move (cr), warning_success);
+      }
+#if 0
+      // It looks like we shouldn't be receiving these since we are not
+      // subscribed to them.
+      //
+      else if (cr.action == "created"   ||
+               cr.action == "completed" ||
+               cr.action == "requested_action")
+      {
+      }
+#endif
+      else
+      {
+        // Ignore unknown actions by sending a 200 response with empty body
+        // but also log as an error since we want to notice new actions.
+        //
+        error << "unknown action '" << cr.action << "' in check_run event";
+
+        return true;
+      }
+    }
+    else if (event == "pull_request")
+    {
+      gh_pull_request_event pr;
+      try
+      {
+        json::parser p (body.data (), body.size (), "pull_request event");
+
+        pr = gh_pull_request_event (p);
+      }
+      catch (const json::invalid_json_input& e)
+      {
+        string m ("malformed JSON in " + e.name + " request body");
+
+        error << m << ", line: " << e.line << ", column: " << e.column
+              << ", byte offset: " << e.position << ", error: " << e;
+
+        throw invalid_request (400, move (m));
+      }
+
+      // Store the app-id webhook query parameter in the gh_pull_request_event
+      // object (see gh_pull_request for an explanation).
+      //
+      // When we receive the other webhooks we do check that the app ids in
+      // the payload and query match but here we have to assume it is valid.
+      //
+      pr.pull_request.app_id = app_id;
+
+      if (pr.action == "opened" ||
+          pr.action == "synchronize")
+      {
+        // opened
+        //   A pull request was opened.
+        //
+        // synchronize
+        //   A pull request's head branch was updated from the base branch or
+        //   new commits were pushed to the head branch. (Note that there is
+        //   no equivalent event for the base branch.)
+        //
+        // Note that both cases are handled similarly: we start a new CI
+        // request which will be reported on the new commit id.
+        //
+        return handle_pull_request (move (pr), warning_success);
+      }
+      else if (pr.action == "edited")
+      {
+        // PR base branch changed (to a different branch) besides other
+        // irrelevant changes (title, body, etc).
+        //
+        // This is in a sense a special case of the base branch moving. In
+        // that case we don't do anything (due to the head sharing problem)
+        // relying instead on the branch protection rule. So it makes sense
+        // to do the same here.
+        //
+        return true;
+      }
+      else if (pr.action == "closed")
+      {
+        // PR has been closed (as merged or not; see merged member). Also
+        // apparently received if base branch is deleted (and the same
+        // for head branch). See also the reopened event below.
+        //
+        // While it may seem natural to cancel the CI for the closed PR, it
+        // might actually be useful to have a completed CI record. GitHub
+        // doesn't prevent us from publishing CI results for the closed PR
+        // (even if both base and head branches were deleted). And if such a
+        // PR is reopened, the CI results remain.
+        //
+        return true;
+      }
+      else if (pr.action == "reopened")
+      {
+        // Previously closed PR has been reopened.
+        //
+        // Since we don't cancel the CI for a closed PR, there is nothing
+        // to do if it is reopened.
+        //
+        return true;
+      }
+      else if (pr.action == "assigned"               ||
+               pr.action == "auto_merge_disabled"    ||
+               pr.action == "auto_merge_enabled"     ||
+               pr.action == "converted_to_draft"     ||
+               pr.action == "demilestoned"           ||
+               pr.action == "dequeued"               ||
+               pr.action == "enqueued"               ||
+               pr.action == "labeled"                ||
+               pr.action == "locked"                 ||
+               pr.action == "milestoned"             ||
+               pr.action == "ready_for_review"       ||
+               pr.action == "review_request_removed" ||
+               pr.action == "review_requested"       ||
+               pr.action == "unassigned"             ||
+               pr.action == "unlabeled"              ||
+               pr.action == "unlocked")
+      {
+        // These have no relation to CI.
+        //
+        return true;
+      }
+      else
+      {
+        // Ignore unknown actions by sending a 200 response with empty body
+        // but also log as an error since we want to notice new actions.
+        //
+        error << "unknown action '" << pr.action << "' in pull_request event";
+
+        return true;
+      }
+    }
+    else if (event == "push")
+    {
+      // Push events are triggered by branch pushes, branch creation, and
+      // branch deletion.
+      //
+      gh_push_event ps;
+      try
+      {
+        json::parser p (body.data (), body.size (), "push event");
+
+        ps = gh_push_event (p);
+      }
+      catch (const json::invalid_json_input& e)
+      {
+        string m ("malformed JSON in " + e.name + " request body");
+
+        error << m << ", line: " << e.line << ", column: " << e.column
+              << ", byte offset: " << e.position << ", error: " << e;
+
+        throw invalid_request (400, move (m));
+      }
+
+      // Store the app-id webhook query parameter in the gh_push_event
+      // object (see gh_push_event for an explanation).
+      //
+      // When we receive the other webhooks we do check that the app ids in
+      // the payload and query match but here we have to assume it is valid.
+      //
+      ps.app_id = app_id;
+
+      // Note that the push request event has no action.
+      //
+      return handle_branch_push (move (ps), warning_success);
+    }
+    // Ignore marketplace_purchase events (sent by the GitHub Marketplace) by
+    // sending a 200 response with empty body. We offer a free plan only and
+    // do not support user accounts so there is nothing to be done.
+    //
+    else if (event == "marketplace_purchase")
+    {
+      return true;
+    }
+    // Ignore GitHub App installation events by sending a 200 response with
+    // empty body. These are triggered when a user installs a GitHub App in a
+    // repository or organization.
+    //
+    else if (event == "installation")
+    {
+      return true;
+    }
+    // Ignore ping events by sending a 200 response with empty body. This
+    // event occurs when you create a new webhook. The ping event is a
+    // confirmation from GitHub that you configured the webhook correctly. One
+    // of its triggers is listing an App on the GitHub Marketplace.
+    //
+    else if (event == "ping")
+    {
+      return true;
+    }
+    else
+    {
+      // Log to investigate.
+      //
+      error << "unexpected event '" << event << "'";
+
+      throw invalid_request (400, "unexpected event: '" + event + "'");
+    }
+  }
+
+  // Let's capitalize the synthetic conclusion check run name to make it
+  // easier to distinguish from the regular ones.
+  //
+  static const string conclusion_check_run_name ("CONCLUSION");
+
+  // Yellow circle.
+  //
+  static const string conclusion_building_title ("\U0001F7E1 IN PROGRESS");
+  static const string conclusion_building_summary (
+    "Waiting for all the builds to complete.");
+
+  // "Medium white" circle.
+  //
+  static const string check_run_queued_title ("\U000026AA QUEUED");
+  static const string check_run_queued_summary (
+    "Waiting for the build to start.");
+
+  // Yellow circle.
+  //
+  static const string check_run_building_title ("\U0001F7E1 BUILDING");
+  static const string check_run_building_summary (
+    "Waiting for the build to complete.");
+
+  // Return the colored circle corresponding to a result_status.
+  //
+  // Note: the rest of the title is produced by to_string(result_status).
+  //
+  static string
+  circle (result_status rs)
+  {
+    switch (rs)
+    {
+    case result_status::success:  return "\U0001F7E2"; // Green circle.
+    case result_status::warning:  return "\U0001F7E0"; // Orange circle.
+    case result_status::error:
+    case result_status::abort:
+    case result_status::abnormal: return "\U0001F534"; // Red circle.
+
+      // Valid values we should never encounter.
+      //
+    case result_status::skip:
+    case result_status::interrupt:
+      throw invalid_argument ("unexpected result_status value: " +
+                              to_string (rs));
+    }
+
+    return ""; // Should never reach.
+  }
+
+  bool ci_github::
+  handle_branch_push (gh_push_event ps, bool warning_success)
+  {
+    HANDLER_DIAG;
+
+    l3 ([&]{trace << "push event { " << ps << " }";});
+
+    // Cancel the CI tenant associated with the overwritten/deleted previous
+    // head commit if this is a forced push or a branch deletion.
+    //
+    if (ps.forced || ps.deleted)
+    {
+      // Service id that will uniquely identify the CI tenant.
+      //
+      string sid (ps.repository.node_id + ':' + ps.before);
+
+      // Note that it's possible this commit still exists in another branch so
+      // we do refcount-aware cancel.
+      //
+      if (optional<tenant_service> ts = cancel (error, warn,
+                                                verb_ ? &trace : nullptr,
+                                                *build_db_, retry_max_,
+                                                "ci-github", sid,
+                                                true /* ref_count */))
+      {
+        l3 ([&]{trace << (ps.forced ? "forced push " + ps.after + " to "
+                                    : "deletion of ")
+                      << ps.ref << ": attempted to cancel CI of previous"
+                      << " head commit with tenant_service id " << sid
+                      << " (ref_count: " << ts->ref_count << ')';});
+      }
+      else
+      {
+        // It's possible that there was no CI for the previous commit for
+        // various reasons (e.g., CI was not enabled).
+        //
+        l3 ([&]{trace << (ps.forced ? "forced push " + ps.after + " to "
+                                    : "deletion of ")
+                      << ps.ref << ": failed to cancel CI of previous"
+                      << " head commit with tenant_service id " << sid;});
+      }
+    }
+
+    if (ps.deleted)
+      return true; // Do nothing further if this was a branch deletion.
+
+    // While we don't need the installation access token in this request,
+    // let's obtain it to flush out any permission issues early. Also, it is
+    // valid for an hour so we will most likely make use of it.
+    //
+    optional<string> jwt (generate_jwt (ps.app_id, trace, error));
+    if (!jwt)
+      throw server_error ();
+
+    optional<gh_installation_access_token> iat (
+        obtain_installation_access_token (ps.installation.id,
+                                          move (*jwt),
+                                          error));
+    if (!iat)
+      throw server_error ();
+
+    l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+    // While it would have been nice to cancel CIs of PRs with this branch as
+    // base not to waste resources, there are complications: Firstly, we can
+    // only do this for remote PRs (since local PRs will most likely share the
+    // result with branch push). Secondly, we try to do our best even if the
+    // branch protection rule for head behind is not enabled. In this case, it
+    // would be good to complete the CI. So maybe/later. See also the head
+    // case in handle_pull_request(), where we do cancel remote PRs that are
+    // not shared.
+
+    // Service id that uniquely identifies the CI tenant.
+    //
+    string sid (ps.repository.node_id + ':' + ps.after);
+
+    service_data sd (warning_success,
+                     iat->token,
+                     iat->expires_at,
+                     ps.app_id,
+                     ps.installation.id,
+                     move (ps.repository.node_id),
+                     move (ps.repository.clone_url),
+                     service_data::local,
+                     false /* pre_check */,
+                     false /* re_requested */,
+                     ps.after /* check_sha */,
+                     ps.after /* report_sha */);
+
+    // Create an unloaded CI tenant, doing nothing if one already exists
+    // (which could've been created by handle_pull_request() or by us as a
+    // result of a push to another branch). Note that the tenant's reference
+    // count is incremented in all cases.
+    //
+    // Note: use no delay since we need to (re)create the synthetic conclusion
+    // check run as soon as possible.
+    //
+    // Note that we use the create() API instead of start() since duplicate
+    // management is not available in start().
+    //
+    // After this call we will start getting the build_unloaded()
+    // notifications until (1) we load the tenant, (2) we cancel it, or (3)
+    // it gets archived after some timeout.
+    //
+    if (!create (error, warn, verb_ ? &trace : nullptr,
+                 *build_db_, retry_max_,
+                 tenant_service (sid, "ci-github", sd.json ()),
+                 chrono::seconds (30) /* interval */,
+                 chrono::seconds (0) /* delay */,
+                 duplicate_tenant_mode::ignore))
+    {
+      fail << "push " + ps.after + " to " + ps.ref
+           << ": unable to create unloaded CI tenant";
+    }
+
+    return true;
+  }
+
+  // Miscellaneous pull request facts
+  //
+  // - Although some of the GitHub documentation makes it sound like they
+  //   expect check runs to be added to both the PR head commit and the merge
+  //   commit, the PR UI does not react to the merge commit's check runs
+  //   consistently. It actually seems to be quite broken. The only thing it
+  //   does seem to do reliably is blocking the PR merge if the merge commit's
+  //   check runs are not successful (i.e, overriding the PR head commit's
+  //   check runs). But the UI looks quite messed up generally in this state.
+  //
+  // - When new commits are added to a PR base branch, pull_request.base.sha
+  //   does not change, but the test merge commit will be updated to include
+  //   the new commits to the base branch.
+  //
+  // - When new commits are added to a PR head branch, pull_request.head.sha
+  //   gets updated with the head commit's SHA and check_suite.pull_requests[]
+  //   will contain all PRs with this branch as head.
+  //
+  bool ci_github::
+  handle_pull_request (gh_pull_request_event pr, bool warning_success)
+  {
+    HANDLER_DIAG;
+
+    l3 ([&]{trace << "pull_request event { " << pr << " }";});
+
+    // While we don't need the installation access token in this request,
+    // let's obtain it to flush out any permission issues early. Also, it is
+    // valid for an hour so we will most likely make use of it.
+    //
+    optional<string> jwt (generate_jwt (pr.pull_request.app_id, trace, error));
+    if (!jwt)
+      throw server_error ();
+
+    optional<gh_installation_access_token> iat (
+      obtain_installation_access_token (pr.installation.id,
+                                        move (*jwt),
+                                        error));
+    if (!iat)
+      throw server_error ();
+
+    l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+    // Distinguish between local and remote PRs by comparing the head and base
+    // repositories' paths.
+    //
+    service_data::kind_type kind (
+      pr.pull_request.head_path == pr.pull_request.base_path
+      ? service_data::local
+      : service_data::remote);
+
+    // Note that similar to the branch push case above, while it would have
+    // been nice to cancel the previous CI job once the PR head moves (the
+    // "synchronize" event), due to the head sharing problem the previous CI
+    // job might actually still be relevant (in both local and remote PR
+    // cases). So we only do it for the remote PRs and only if the head is not
+    // shared (via tenant reference counting).
+    //
+    if (kind == service_data::remote && pr.action == "synchronize")
+    {
+      if (pr.before)
+      {
+        // Service id that will uniquely identify the CI tenant.
+        //
+        string sid (pr.repository.node_id + ':' + *pr.before);
+
+        if (optional<tenant_service> ts = cancel (error, warn,
+                                                  verb_ ? &trace : nullptr,
+                                                  *build_db_, retry_max_,
+                                                  "ci-github", sid,
+                                                  true /* ref_count */))
+        {
+          l3 ([&]{trace << "pull request " << pr.pull_request.node_id
+                        << ": attempted to cancel CI of previous head commit"
+                        << " (ref_count: " << ts->ref_count << ')';});
+        }
+        else
+        {
+          // It's possible that there was no CI for the previous commit for
+          // various reasons (e.g., CI was not enabled).
+          //
+          l3 ([&]{trace << "pull request " << pr.pull_request.node_id
+                        << ": failed to cancel CI of previous head commit "
+                        << "with tenant_service id " << sid;});
+        }
+      }
+      else
+      {
+        error << "pull request " << pr.pull_request.node_id
+              << ": before commit is missing in synchronize event";
+      }
+    }
+
+    // Note: for remote PRs the check_sha will be set later, in
+    // build_unloaded_pre_check(), to test merge commit id.
+    //
+    string check_sha (kind == service_data::local
+                      ? pr.pull_request.head_sha
+                      : "");
+
+    // Note that PR rebuilds (re-requested) are handled by
+    // handle_check_suite_rerequest().
+    //
+    // Note that, in the case of a remote PR, GitHub will copy the PR head
+    // commit from the head (forked) repository into the base repository. So
+    // the check runs must always be added to the base repository, whether the
+    // PR is local or remote. The head commit refs are located at
+    // refs/pull/<PR-number>/head.
+    //
+    service_data sd (warning_success,
+                     move (iat->token),
+                     iat->expires_at,
+                     pr.pull_request.app_id,
+                     pr.installation.id,
+                     move (pr.repository.node_id),
+                     move (pr.repository.clone_url),
+                     kind, true /* pre_check */, false /* re_request */,
+                     move (check_sha),
+                     move (pr.pull_request.head_sha) /* report_sha */,
+                     pr.pull_request.node_id,
+                     pr.pull_request.number);
+
+    // Create an unloaded CI tenant for the pre-check phase (during which we
+    // wait for the PR's merge commit and behindness to become available).
+    //
+    // Create with an empty service id so that the generated tenant id is used
+    // instead during the pre-check phase (so as not to clash with a proper
+    // service id for this head commit, potentially created in
+    // handle_branch_push() or as another PR).
+    //
+    tenant_service ts ("", "ci-github", sd.json ());
+
+    // Note: use no delay since we need to start the actual CI (which in turn
+    // (re)creates the synthetic conclusion check run) as soon as possible.
+    //
+    // After this call we will start getting the build_unloaded()
+    // notifications -- which will be routed to build_unloaded_pre_check() --
+    // until we cancel the tenant or it gets archived after some timeout.
+    // (Note that we never actually load this request, we always cancel it;
+    // see build_unloaded_pre_check() for details.)
+    //
+    if (!create (error,
+                 warn,
+                 verb_ ? &trace : nullptr,
+                 *build_db_, retry_max_,
+                 move (ts),
+                 chrono::seconds (30) /* interval */,
+                 chrono::seconds (0) /* delay */))
+    {
+      fail << "pull request " << pr.pull_request.node_id
+           << ": unable to create unloaded pre-check tenant";
+    }
+
+    return true;
+  }
+
+  bool ci_github::
+  handle_check_suite_rerequest (gh_check_suite_event cs, bool warning_success)
+  {
+    HANDLER_DIAG;
+
+    l3 ([&]{trace << "check_suite event { " << cs << " }";});
+
+    assert (cs.action == "rerequested");
+
+    // While we don't need the installation access token in this request,
+    // let's obtain it to flush out any permission issues early. Also, it is
+    // valid for an hour so we will most likely make use of it.
+    //
+    optional<string> jwt (generate_jwt (cs.check_suite.app_id, trace, error));
+    if (!jwt)
+      throw server_error ();
+
+    optional<gh_installation_access_token> iat (
+        obtain_installation_access_token (cs.installation.id,
+                                          move (*jwt),
+                                          error));
+    if (!iat)
+      throw server_error ();
+
+    l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+    // Service id that uniquely identifies the CI tenant.
+    //
+    string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha);
+
+    // If the user requests a rebuild of the (entire) PR, then this manifests
+    // as the check_suite rather than pull_request event. Specifically:
+    //
+    // - For a local PR, this event is shared with the branch push and all we
+    //   need to do is restart the CI for the head commit.
+    //
+    // - For a remote PR, this event will have no gh_check_suite::head_branch.
+    //   In this case we need to load the existing service data for this head
+    //   commit, extract the test merge commit, and restart the CI for that.
+    //
+    //   Note that it's possible the base branch has moved in the meantime and
+    //   ideally we would want to re-request the test merge commit, etc.
+    //   However, this will only be necessary if the user does not follow our
+    //   recommendation of enabling the head-behind-base protection. And it
+    //   seems all this extra complexity would not be warranted.
+    //
+    string check_sha;
+    service_data::kind_type kind;
+
+    if (!cs.check_suite.head_branch)
+    {
+      // Rebuild of remote PR.
+      //
+      kind = service_data::remote;
+
+      if (optional<tenant_data> d = find (*build_db_, "ci-github", sid))
+      {
+        tenant_service& ts (d->service);
+
+        try
+        {
+          service_data sd (*ts.data);
+          check_sha = move (sd.check_sha); // Test merge commit.
+        }
+        catch (const invalid_argument& e)
+        {
+          fail << "failed to parse service data: " << e;
+        }
+      }
+      else
+      {
+        error << "check suite " << cs.check_suite.node_id
+              << " for remote pull request:"
+              << " re-requested but tenant_service with id " << sid
+              << " did not exist";
+        return true;
+      }
+    }
+    else
+    {
+      // Rebuild of branch push or local PR.
+      //
+      kind = service_data::local;
+      check_sha = cs.check_suite.head_sha;
+    }
+
+    service_data sd (warning_success,
+                     iat->token,
+                     iat->expires_at,
+                     cs.check_suite.app_id,
+                     cs.installation.id,
+                     move (cs.repository.node_id),
+                     move (cs.repository.clone_url),
+                     kind, false /* pre_check */, true /* re_requested */,
+                     move (check_sha),
+                     move (cs.check_suite.head_sha) /* report_sha */);
+
+    // Replace the existing CI tenant if it exists.
+    //
+    // Note that GitHub UI does not allow re-running the entire check suite
+    // until all the check runs are completed.
+    //
+
+    // Create an unloaded CI tenant.
+    //
+    // Note: use no delay since we need to (re)create the synthetic conclusion
+    // check run as soon as possible.
+    //
+    // Note that we use the create() API instead of start() since duplicate
+    // management is not available in start().
+    //
+    // After this call we will start getting the build_unloaded()
+    // notifications until (1) we load the tenant, (2) we cancel it, or (3)
+    // it gets archived after some timeout.
+    //
+    auto pr (create (error,
+                     warn,
+                     verb_ ? &trace : nullptr,
+                     *build_db_, retry_max_,
+                     tenant_service (sid, "ci-github", sd.json ()),
+                     chrono::seconds (30) /* interval */,
+                     chrono::seconds (0) /* delay */,
+                     duplicate_tenant_mode::replace));
+
+    if (!pr)
+    {
+      fail << "check suite " << cs.check_suite.node_id
+           << ": unable to create unloaded CI tenant";
+    }
+
+    if (pr->second == duplicate_tenant_result::created)
+    {
+      error << "check suite " << cs.check_suite.node_id
+            << ": re-requested but tenant_service with id " << sid
+            << " did not exist";
+      return true;
+    }
+
+    return true;
+  }
+
+  bool ci_github::
+  handle_check_suite_completed (gh_check_suite_event cs, bool warning_success)
+  {
+    // The plans is as follows:
+    //
+    // 1. Load the service data.
+    //
+    // 2. Verify it is completed.
+    //
+    // 3. Verify the check run counts match.
+    //
+    // 4. Verify (like in build_built()) that all the check runs are
+    //    completed.
+    //
+    // 5. Verify the result matches what GitHub thinks it is.
+
+    HANDLER_DIAG;
+
+    l3 ([&]{trace << "check_suite event { " << cs << " }";});
+
+    // Service id that uniquely identifies the CI tenant.
+    //
+    string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha);
+
+    // The common log entry subject.
+    //
+    string sub ("check suite " + cs.check_suite.node_id + '/' + sid);
+
+    // Load the service data.
+    //
+    service_data sd;
+
+    if (optional<tenant_data> d = find (*build_db_, "ci-github", sid))
+    {
+      try
+      {
+        sd = service_data (*d->service.data);
+      }
+      catch (const invalid_argument& e)
+      {
+        fail << "failed to parse service data: " << e;
+      }
+    }
+    else
+    {
+      error << sub << ": tenant_service does not exist";
+      return true;
+    }
+
+    // Verify the completed flag and the number of check runs.
+    //
+    if (!sd.completed)
+    {
+      error << sub << " service data complete flag is false";
+      return true;
+    }
+
+    // Received count will be one higher because we don't store the conclusion
+    // check run.
+    //
+    size_t check_runs_count (sd.check_runs.size () + 1);
+
+    if (check_runs_count == 1)
+    {
+      error << sub << ": no check runs in service data";
+      return true;
+    }
+
+    if (cs.check_suite.check_runs_count != check_runs_count)
+    {
+      error << sub << ": check runs count " << cs.check_suite.check_runs_count
+            << " does not match service data count " << check_runs_count;
+      return true;
+    }
+
+    // Verify that all the check runs are built and compute the summary
+    // conclusion.
+    //
+    result_status conclusion (result_status::success);
+
+    for (const check_run& cr: sd.check_runs)
+    {
+      if (cr.state == build_state::built)
+      {
+        assert (cr.status.has_value ());
+        conclusion |= *cr.status;
+      }
+      else
+      {
+        error << sub << ": unbuilt check run in service data";
+        return true;
+      }
+    }
+
+    // Verify the conclusion.
+    //
+    if (!cs.check_suite.conclusion)
+    {
+      error << sub << ": absent conclusion in completed check suite";
+      return true;
+    }
+
+    // Note that the case mismatch is due to GraphQL (gh_conclusion())
+    // requiring uppercase conclusion values while the received webhook values
+    // are lower case.
+    //
+    string gh_conclusion (gh_to_conclusion (conclusion, warning_success));
+
+    if (icasecmp (*cs.check_suite.conclusion, gh_conclusion) != 0)
+    {
+      error << sub << ": conclusion " << *cs.check_suite.conclusion
+            << " does not match service data conclusion " << gh_conclusion;
+      return true;
+    }
+
+    return true;
+  }
+
+  // Make a check run summary from a CI start_result.
+  //
+  static string
+  to_check_run_summary (const optional<ci_start::start_result>& r)
+  {
+    string s;
+
+    s = "```\n";
+    if (r) s += r->message;
+    else s += "Internal service error";
+    s += "\n```";
+
+    return s;
+  }
+
+  // Create a gq_built_result.
+  //
+  // Throw invalid_argument in case of invalid result_status.
+  //
+  static gq_built_result
+  make_built_result (result_status rs, bool warning_success, string message)
+  {
+    string title (circle (rs == result_status::warning && !warning_success
+                          ? result_status::error
+                          : rs));
+    title += ' ';
+    title += ucase (to_string (rs));
+
+    return {gh_to_conclusion (rs, warning_success),
+            move (title),
+            move (message)};
+  }
+
+  // Parse a check run details URL into a build_id.
+  //
+  // Return nullopt if the URL is invalid.
+  //
+  static optional<build_id>
+  parse_details_url (const string& details_url);
+
+  // Note that GitHub always posts a message to their GUI saying "You have
+  // successfully requested <check_run_name> be rerun", regardless of what
+  // HTTP status code we respond with. However we do return error status codes
+  // when there is no better option (like failing the conclusion) in case they
+  // start handling them someday.
+  //
+  bool ci_github::
+  handle_check_run_rerequest (const gh_check_run_event& cr,
+                              bool warning_success)
+  {
+    HANDLER_DIAG;
+
+    l3 ([&]{trace << "check_run event { " << cr << " }";});
+
+    // The overall plan is as follows:
+    //
+    // 1. Load service data.
+    //
+    // 2. If the tenant is archived, then fail (re-create) both the check run
+    //    and the conclusion with appropriate diagnostics.
+    //
+    // 3. If the check run is in the queued state, then do nothing.
+    //
+    // 4. Re-create the check run in the queued state and the conclusion in
+    //    the building state. Note: do in a single request to make sure we
+    //    either "win" or "loose" the potential race for both (important
+    //    for #7).
+    //
+    // 5. Call the rebuild() function to attempt to schedule a rebuild. Pass
+    //    the update function that does the following (if called):
+    //
+    //    a. Save new node ids.
+    //
+    //    b. Update the check run state (may also not exist).
+    //
+    //    c. Clear the completed flag if true.
+    //
+    // 6. If the result of rebuild() indicates the tenant is archived, then
+    //    fail (update) both the check run and conclusion with appropriate
+    //    diagnostics.
+    //
+    // 7. If original state is queued (no rebuild was scheduled), then fail
+    //    (update) both the check run and the conclusion.
+    //
+    // Note that while conceptually we are updating existing check runs, in
+    // practice we have to re-create as new check runs in order to replace the
+    // existing ones because GitHub does not allow transitioning out of the
+    // built state.
+
+    // Get a new installation access token.
+    //
+    auto get_iat = [this, &trace, &error, &cr] ()
+      -> optional<gh_installation_access_token>
+    {
+      optional<string> jwt (generate_jwt (cr.check_run.app_id, trace, error));
+      if (!jwt)
+        return nullopt;
+
+      optional<gh_installation_access_token> iat (
+        obtain_installation_access_token (cr.installation.id,
+                                          move (*jwt),
+                                          error));
+
+      if (iat)
+        l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+      return iat;
+    };
+
+    const string& repo_node_id (cr.repository.node_id);
+    const string& head_sha (cr.check_run.check_suite.head_sha);
+
+    // Prepare the build and conclusion check runs. They are sent to GitHub in
+    // a single request (unless something goes wrong) so store them together
+    // from the outset.
+    //
+    brep::check_runs check_runs (2);
+    check_run& bcr (check_runs[0]); // Build check run
+    check_run& ccr (check_runs[1]); // Conclusion check run
+
+    ccr.name = conclusion_check_run_name;
+
+    const gh_installation_access_token* iat (nullptr);
+    optional<gh_installation_access_token> new_iat;
+
+    // Load the service data, failing the check runs if the tenant has been
+    // archived.
+    //
+    service_data sd;
+    string tenant_id;
+    {
+      // Service id that uniquely identifies the CI tenant.
+      //
+      string sid (repo_node_id + ':' + head_sha);
+
+      optional<tenant_data> d (find (*build_db_, "ci-github", sid));
+      if (!d)
+      {
+        // No such tenant.
+        //
+        fail << "check run " << cr.check_run.node_id
+             << " re-requested but tenant_service with id " << sid
+             << " does not exist";
+      }
+
+      tenant_service& ts (d->service);
+
+      try
+      {
+        sd = service_data (*ts.data);
+      }
+      catch (const invalid_argument& e)
+      {
+        fail << "failed to parse service data: " << e;
+      }
+
+      if (!sd.conclusion_node_id)
+        fail << "no conclusion node id for check run " << cr.check_run.node_id;
+
+      tenant_id = d->tenant_id;
+
+      // Get a new IAT if the one from the service data has expired.
+      //
+      if (system_clock::now () > sd.installation_access.expires_at)
+      {
+        if ((new_iat = get_iat ()))
+          iat = &*new_iat;
+        else
+          throw server_error ();
+      }
+      else
+        iat = &sd.installation_access;
+
+      if (d->archived) // Tenant is archived
+      {
+        // Fail (update) the check runs.
+        //
+        gq_built_result br (
+          make_built_result (
+            result_status::error, warning_success,
+            "Unable to rebuild individual configuration: build has "
+            "been archived"));
+
+        // Try to update the conclusion check run even if the first update
+        // fails.
+        //
+        bool f (false); // Failed.
+
+        if (gq_update_check_run (error, bcr, iat->token,
+                                 repo_node_id, cr.check_run.node_id,
+                                 br))
+        {
+          l3 ([&]{trace << "updated check_run { " << bcr << " }";});
+        }
+        else
+        {
+          error << "check_run " << cr.check_run.node_id
+                << ": unable to update check run";
+          f = true;
+        }
+
+        if (gq_update_check_run (error, ccr, iat->token,
+                                 repo_node_id, *sd.conclusion_node_id,
+                                 move (br)))
+        {
+          l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
+        }
+        else
+        {
+          error << "check_run " << cr.check_run.node_id
+                << ": unable to update conclusion check run";
+          f = true;
+        }
+
+        // Fail the handler if either of the check runs could not be
+        // updated.
+        //
+        if (f)
+          throw server_error ();
+
+        return true;
+      }
+    }
+
+    // Fail if it's the conclusion check run that is being re-requested.
+    //
+    // Expect that if the user selects re-run all failed checks we will
+    // receive multiple check runs, one of which will be the conclusion. And
+    // if we fail it while it happens to arrive last, then we will end up in
+    // the wrong overall state (real check run is building while conclusion is
+    // failed). It seems the best we can do is to ignore it: if the user did
+    // request a rebuild of the conclusion check run explicitly, there will be
+    // no change, which is not ideal but is still an indication that this
+    // operation is not supported.
+    //
+    if (cr.check_run.name == conclusion_check_run_name)
+    {
+      l3 ([&]{trace << "re-requested conclusion check_run";});
+
+#if 0
+      if (!sd.conclusion_node_id)
+        fail << "no conclusion node id for check run " << cr.check_run.node_id;
+
+      gq_built_result br (
+        make_built_result (result_status::error, warning_success,
+                           "Conclusion check run cannot be rebuilt"));
+
+      // Fail (update) the conclusion check run.
+      //
+      if (gq_update_check_run (error, ccr, iat->token,
+                               repo_node_id, *sd.conclusion_node_id,
+                               move (br)))
+      {
+        l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
+      }
+      else
+      {
+        fail << "check run " << cr.check_run.node_id
+             << ": unable to update conclusion check run "
+             << *sd.conclusion_node_id;
+      }
+#endif
+
+      return true;
+    }
+
+    // Parse the check_run's details_url to extract build id.
+    //
+    // While this is a bit hackish, there doesn't seem to be a better way
+    // (like associating custom data with a check run). Note that the GitHub
+    // UI only allows rebuilding completed check runs, so the details URL
+    // should be there.
+    //
+    optional<build_id> bid (parse_details_url (cr.check_run.details_url));
+    if (!bid)
+    {
+      fail << "check run " << cr.check_run.node_id
+           << ": failed to extract build id from details_url";
+    }
+
+    // Initialize the check run (`bcr`) with state from the service data.
+    //
+    {
+      // Search for the check run in the service data.
+      //
+      // Note that we look by name in case node id got replaced by a racing
+      // re-request (in which case we ignore this request).
+      //
+      auto i (find_if (sd.check_runs.begin (), sd.check_runs.end (),
+                       [&cr] (const check_run& scr)
+                       {
+                         return scr.name == cr.check_run.name;
+                       }));
+
+      if (i == sd.check_runs.end ())
+        fail << "check_run " << cr.check_run.node_id
+             << " (" << cr.check_run.name << "): "
+             << "re-requested but does not exist in service data";
+
+      // Do nothing if node ids don't match.
+      //
+      if (i->node_id && *i->node_id != cr.check_run.node_id)
+      {
+        l3 ([&]{trace << "check_run " << cr.check_run.node_id
+                      << " (" << cr.check_run.name << "): "
+                      << "node id has changed in service data";});
+        return true;
+      }
+
+      // Do nothing if the build is already queued.
+      //
+      if (i->state == build_state::queued)
+      {
+        l3 ([&]{trace << "ignoring already-queued check run";});
+        return true;
+      }
+
+      bcr.name = i->name;
+      bcr.build_id = i->build_id;
+      bcr.state = i->state;
+    }
+
+    // Transition the build and conclusion check runs out of the built state
+    // (or any other state) by re-creating them.
+    //
+    bcr.state = build_state::queued;
+    bcr.state_synced = false;
+    bcr.details_url = cr.check_run.details_url;
+    bcr.description = {check_run_queued_title, check_run_queued_summary};
+
+    ccr.state = build_state::building;
+    ccr.state_synced = false;
+    ccr.details_url = details_url (tenant_id);
+    ccr.description = {conclusion_building_title,
+                       conclusion_building_summary};
+
+    if (gq_create_check_runs (error, check_runs, iat->token,
+                              cr.check_run.app_id, repo_node_id, head_sha,
+                              options_->build_queued_batch ()))
+    {
+      assert (bcr.state == build_state::queued);
+      assert (ccr.state == build_state::building);
+
+      l3 ([&]{trace << "created check_run { " << bcr << " }";});
+      l3 ([&]{trace << "created conclusion check_run { " << ccr << " }";});
+    }
+    else
+    {
+      fail << "check run " << cr.check_run.node_id
+           << ": unable to re-create build and conclusion check runs";
+    }
+
+    // Request the rebuild and update service data.
+    //
+    bool race (false);
+
+    // Callback function called by rebuild() to update the service data (but
+    // only if the build is actually restarted).
+    //
+    auto update_sd = [&error, &new_iat, &race,
+                      tenant_id = move (tenant_id),
+                      &cr, &bcr, &ccr] (const string& ti,
+                                        const tenant_service& ts,
+                                        build_state) -> optional<string>
+    {
+      // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+      // being aborted) and so should not move out of its captures.
+
+      race = false; // Reset.
+
+      if (tenant_id != ti)
+      {
+        // The tenant got replaced since we loaded it but we managed to
+        // trigger a rebuild in the new tenant. Who knows whose check runs are
+        // visible, so let's fail ours similar to the cases below.
+        //
+        race = true;
+        return nullopt;
+      }
+
+      service_data sd;
+      try
+      {
+        sd = service_data (*ts.data);
+      }
+      catch (const invalid_argument& e)
+      {
+        error << "failed to parse service data: " << e;
+        return nullopt;
+      }
+
+      // Note that we again look by name in case node id got replaced by a
+      // racing re-request. In this case, however, it's impossible to decide
+      // who won that race, so let's fail the check suite to be on the safe
+      // side (in a sense, similar to the rebuild() returning queued below).
+      //
+      auto i (find_if (
+                sd.check_runs.begin (), sd.check_runs.end (),
+                [&cr] (const check_run& scr)
+                {
+                  return scr.name == cr.check_run.name;
+                }));
+
+      if (i == sd.check_runs.end ())
+      {
+        error << "check_run " << cr.check_run.node_id
+              << " (" << cr.check_run.name << "): "
+              << "re-requested but does not exist in service data";
+        return nullopt;
+      }
+
+      if (i->node_id && *i->node_id != cr.check_run.node_id)
+      {
+        // Keep the old conclusion node id to make sure any further state
+        // transitions are ignored. A bit of a hack.
+        //
+        race = true;
+        return nullopt;
+      }
+
+      *i = bcr; // Update with new node_id, state, state_synced.
+
+      sd.conclusion_node_id = ccr.node_id;
+      sd.completed = false;
+
+      // Save the IAT if we created a new one.
+      //
+      if (new_iat)
+        sd.installation_access = *new_iat;
+
+      return sd.json ();
+    };
+
+    optional<build_state> bs (
+      rebuild (*build_db_, retry_max_, *bid, update_sd));
+
+    // If the build has been archived or re-enqueued since we loaded the
+    // service data, fail (by updating) both the build check run and the
+    // conclusion check run. Otherwise the build has been successfully
+    // re-enqueued so do nothing further.
+    //
+    if (!race && bs && *bs != build_state::queued)
+      return true;
+
+    gq_built_result br; // Built result for both check runs.
+
+    if (race || bs) // Race or re-enqueued.
+    {
+      // The re-enqueued case: this build has been re-enqueued since we first
+      // loaded the service data. This could happen if the user clicked
+      // "re-run" multiple times and another handler won the rebuild() race.
+      //
+      // However the winner of the check runs race cannot be determined.
+      //
+      // Best case the other handler won the check runs race as well and
+      // thus everything will proceed normally. Our check runs will be
+      // invisible and disregarded.
+      //
+      // Worst case we won the check runs race and the other handler's check
+      // runs -- the ones that will be updated by the build_*() notifications
+      // -- are no longer visible, leaving things quite broken.
+      //
+      // Either way, we fail our check runs. In the best case scenario it
+      // will have no effect; in the worst case scenario it lets the user
+      // know something has gone wrong.
+      //
+      br = make_built_result (result_status::error, warning_success,
+                              "Unable to rebuild, try again");
+    }
+    else // Archived.
+    {
+      // The build has expired since we loaded the service data. Most likely
+      // the tenant has been archived.
+      //
+      br = make_built_result (
+        result_status::error, warning_success,
+        "Unable to rebuild individual configuration: build has been archived");
+    }
+
+    // Try to update the conclusion check run even if the first update fails.
+    //
+    bool f (false); // Failed.
+
+    // Fail the build check run.
+    //
+    if (gq_update_check_run (error, bcr, iat->token,
+                             repo_node_id, *bcr.node_id,
+                             br))
+    {
+      l3 ([&]{trace << "updated check_run { " << bcr << " }";});
+    }
+    else
+    {
+      error << "check run " << cr.check_run.node_id
+            << ": unable to update (replacement) check run "
+            << *bcr.node_id;
+      f = true;
+    }
+
+    // Fail the conclusion check run.
+    //
+    if (gq_update_check_run (error, ccr, iat->token,
+                             repo_node_id, *ccr.node_id,
+                             move (br)))
+    {
+      l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
+    }
+    else
+    {
+      error << "check run " << cr.check_run.node_id
+            << ": unable to update conclusion check run " << *ccr.node_id;
+      f = true;
+    }
+
+    // Fail the handler if either of the check runs could not be updated.
+    //
+    if (f)
+      throw server_error ();
+
+    return true;
+  }
+
+  function<optional<string> (const string&, const tenant_service&)> ci_github::
+  build_unloaded (const string& ti,
+                  tenant_service&& ts,
+                  const diag_epilogue& log_writer) const noexcept
+  {
+    // NOTE: this function is noexcept and should not throw.
+
+    NOTIFICATION_DIAG (log_writer);
+
+    service_data sd;
+    try
+    {
+      sd = service_data (*ts.data);
+    }
+    catch (const invalid_argument& e)
+    {
+      error << "failed to parse service data: " << e;
+      return nullptr;
+    }
+
+    return sd.pre_check
+      ? build_unloaded_pre_check (move (ts), move (sd), log_writer)
+      : build_unloaded_load (ti, move (ts), move (sd), log_writer);
+  }
+
+  function<optional<string> (const string&, const tenant_service&)> ci_github::
+  build_unloaded_pre_check (tenant_service&& ts,
+                            service_data&& sd,
+                            const diag_epilogue& log_writer) const noexcept
+  try
+  {
+    // NOTE: this function is noexcept and should not throw.
+    //
+    // In a few places where invalid_argument is unlikely to be thrown and/or
+    // would indicate that things are seriously broken we let it propagate to
+    // the function catch block where the pre-check tenant will be canceled
+    // (otherwise we could end up in an infinite loop, e.g., because the
+    // problematic arguments won't change).
+
+    NOTIFICATION_DIAG (log_writer);
+
+    // We get here for PRs only (but both local and remote). The overall
+    // plan is as follows:
+    //
+    // 1. Ask for the mergeability/behind status/test merge commit.
+    //
+    // 2. If not ready, get called again.
+    //
+    // 3. If not mergeable, behind, or different head (head changed while
+    //    waiting for merge commit and thus differs from what's in the
+    //    service_data), cancel the pre-check tenant and do nothing.
+    //
+    // 4. Otherwise, create an unloaded CI tenant and cancel ourselves. Note
+    //    that all re-requested cases are handled elsewhere.
+    //
+    // Note that in case of a mixed local/remote case, whether we CI the head
+    // commit or test merge commit will be racy and there is nothing we can do
+    // about (the purely local case can get "upgraded" to mixed after we have
+    // started the CI job).
+    //
+
+    // Request PR pre-check info (triggering the generation of the test merge
+    // commit on the GitHub's side).
+    //
+    // Let unlikely invalid_argument propagate (see above).
+    //
+    optional<gq_pr_pre_check_info> pc (
+      gq_fetch_pull_request_pre_check_info (error,
+                                            sd.installation_access.token,
+                                            *sd.pr_node_id));
+
+    if (!pc)
+    {
+      // Test merge commit not available yet: get called again to retry.
+      //
+      return nullptr;
+    }
+
+    // Create the CI tenant if nothing is wrong, otherwise issue diagnostics.
+    //
+    if (pc->behind)
+    {
+      l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+                    << ": head is behind base";});
+    }
+    else if (!pc->merge_commit_sha)
+    {
+      l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+                    << ": not auto-mergeable";});
+    }
+    else if (pc->head_sha != sd.report_sha)
+    {
+      l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+                    << ": head commit has changed";});
+    }
+    else
+    {
+      // Create the CI tenant by reusing the pre-check service data.
+      //
+      sd.pre_check = false;
+
+      // Set the service data's check_sha if this is a remote PR. The test
+      // merge commit refs are located at refs/pull/<PR-number>/merge.
+      //
+      if (sd.kind == service_data::remote)
+        sd.check_sha = *pc->merge_commit_sha;
+
+      // Service id that will uniquely identify the CI tenant.
+      //
+      string sid (sd.repository_node_id + ':' + sd.report_sha);
+
+      // Create an unloaded CI tenant, doing nothing if one already exists
+      // (which could've been created by a head branch push or another PR
+      // sharing the same head commit). Note that the tenant's reference count
+      // is incremented in all cases.
+      //
+      // Note: use no delay since we need to (re)create the synthetic
+      // conclusion check run as soon as possible.
+      //
+      // Note that we use the create() API instead of start() since duplicate
+      // management is not available in start().
+      //
+      // After this call we will start getting the build_unloaded()
+      // notifications until (1) we load the tenant, (2) we cancel it, or (3)
+      // it gets archived after some timeout.
+      //
+      try
+      {
+        if (auto pr = create (error, warn, verb_ ? &trace : nullptr,
+                              *build_db_, retry_max_,
+                              tenant_service (sid, "ci-github", sd.json ()),
+                              chrono::seconds (30) /* interval */,
+                              chrono::seconds (0) /* delay */,
+                              duplicate_tenant_mode::ignore))
+        {
+          if (pr->second == duplicate_tenant_result::ignored)
+          {
+            // This PR is sharing a head commit with something else.
+            //
+            // If this is a local PR then it's probably the branch push, which
+            // is expected, so do nothing.
+            //
+            // If this is a remote PR then it could be anything (branch push,
+            // local PR, or another remote PR) which in turn means the CI
+            // result may end up being for head, not merge commit. There is
+            // nothing we can do about it on our side (the user can enable the
+            // head-behind-base protection on their side).
+            //
+            if (sd.kind == service_data::remote)
+            {
+              l3 ([&]{trace << "remote pull request " << *sd.pr_node_id
+                            << ": CI tenant already exists for " << sid;});
+            }
+          }
+        }
+        else
+        {
+          error << "pull request " << *sd.pr_node_id
+                << ": failed to create unloaded CI tenant "
+                << "with tenant_service id " << sid;
+
+          // Fall through to cancel.
+        }
+      }
+      catch (const runtime_error& e) // Database retries exhausted.
+      {
+        error << "pull request " << *sd.pr_node_id
+              << ": failed to create unloaded CI tenant "
+              << "with tenant_service id " << sid
+              << ": " << e.what ();
+
+        // Fall through to cancel.
+      }
+    }
+
+    // Cancel the pre-check tenant.
+    //
+    try
+    {
+      if (!cancel (error, warn, verb_ ? &trace : nullptr,
+                   *build_db_, retry_max_,
+                   ts.type,
+                   ts.id))
+      {
+        // Should never happen (no such tenant).
+        //
+        error << "pull request " << *sd.pr_node_id
+              << ": failed to cancel pre-check tenant with tenant_service id "
+              << ts.id;
+      }
+    }
+    catch (const runtime_error& e) // Database retries exhausted.
+    {
+      error << "pull request " << *sd.pr_node_id
+            << ": failed to cancel pre-check tenant with tenant_service id "
+            << ts.id << ": " << e.what ();
+    }
+
+    return nullptr;
+  }
+  catch (const std::exception& e)
+  {
+    NOTIFICATION_DIAG (log_writer);
+    error << "pull request " << *sd.pr_node_id
+          << ": unhandled exception: " << e.what ();
+
+    // Cancel the pre-check tenant otherwise we could end up in an infinite
+    // loop (see top of function).
+    //
+    try
+    {
+      if (cancel (error, warn, verb_ ? &trace : nullptr,
+                  *build_db_, retry_max_,
+                  ts.type,
+                  ts.id))
+        l3 ([&]{trace << "canceled pre-check tenant " << ts.id;});
+    }
+    catch (const runtime_error& e) // Database retries exhausted.
+    {
+      l3 ([&]{trace << "failed to cancel pre-check tenant " << ts.id << ": "
+                    << e.what ();});
+    }
+
+    return nullptr;
+  }
+
+  function<optional<string> (const string&, const tenant_service&)> ci_github::
+  build_unloaded_load (const string& tenant_id,
+                       tenant_service&& ts,
+                       service_data&& sd,
+                       const diag_epilogue& log_writer) const noexcept
+  try
+  {
+    // NOTE: this function is noexcept and should not throw.
+    //
+    // In a few places where invalid_argument is unlikely to be thrown and/or
+    // would indicate that things are seriously broken we let it propagate to
+    // the function catch block where the tenant will be canceled (otherwise
+    // we could end up in an infinite loop, e.g., because the problematic
+    // arguments won't change).
+
+    NOTIFICATION_DIAG (log_writer);
+
+    // Load the tenant, which is essentially the same for both branch push and
+    // PR. The overall plan is as follows:
+    //
+    // - Create synthetic conclusion check run with the in-progress state. If
+    //   unable to, get called again to re-try.
+    //
+    // - Load the tenant. If unable to, fail the conclusion check run.
+    //
+    // - Update service data.
+    //
+
+    // Get a new installation access token if the current one has expired.
+    //
+    const gh_installation_access_token* iat (nullptr);
+    optional<gh_installation_access_token> new_iat;
+
+    if (system_clock::now () > sd.installation_access.expires_at)
+    {
+      if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+      {
+        new_iat = obtain_installation_access_token (sd.installation_id,
+                                                    move (*jwt),
+                                                    error);
+        if (new_iat)
+          iat = &*new_iat;
+      }
+    }
+    else
+      iat = &sd.installation_access;
+
+    if (iat == nullptr)
+      return nullptr; // Try again on the next call.
+
+    // Create a synthetic check run with an in-progress state. Return the
+    // check run on success or nullopt on failure.
+    //
+    auto create_synthetic_cr = [&tenant_id,
+                                iat,
+                                &sd,
+                                &error,
+                                this] (string name,
+                                       const string& title,
+                                       const string& summary)
+      -> optional<check_run>
+    {
+      check_run cr;
+      cr.name = move (name);
+
+      // Let unlikely invalid_argument propagate (see above).
+      //
+      if (gq_create_check_run (error,
+                               cr,
+                               iat->token,
+                               sd.app_id,
+                               sd.repository_node_id,
+                               sd.report_sha,
+                               details_url (tenant_id),
+                               build_state::building,
+                               title, summary))
+      {
+        return cr;
+      }
+      else
+        return nullopt;
+    };
+
+    // Update a synthetic check run with success or failure. Return the check
+    // run on success or nullopt on failure.
+    //
+    auto update_synthetic_cr = [iat,
+                                &sd,
+                                &error] (const string& node_id,
+                                         const string& name,
+                                         result_status rs,
+                                         string summary) -> optional<check_run>
+    {
+      assert (!node_id.empty ());
+
+      // Let unlikely invalid_argument propagate (see above).
+      //
+      gq_built_result br (
+        make_built_result (rs, sd.warning_success, move (summary)));
+
+      check_run cr;
+      cr.name = name; // For display purposes only.
+
+      // Let unlikely invalid_argument propagate (see above).
+      //
+      if (gq_update_check_run (error,
+                               cr,
+                               iat->token,
+                               sd.repository_node_id,
+                               node_id,
+                               move (br)))
+      {
+        assert (cr.state == build_state::built);
+        return cr;
+      }
+      else
+        return nullopt;
+    };
+
+    // (Re)create the synthetic conclusion check run first in order to convert
+    // a potentially completed check suite to building as early as possible.
+    //
+    // Note that there is a window between receipt of a check_suite or
+    // pull_request event and the first bot/worker asking for a task, which
+    // could be substantial. We could probably (also) try to (re)create the
+    // conclusion checkrun in the webhook handler. @@ Maybe/later.
+    //
+    string conclusion_node_id; // Conclusion check run node ID.
+
+    if (!sd.conclusion_node_id)
+    {
+      if (auto cr = create_synthetic_cr (conclusion_check_run_name,
+                                         conclusion_building_title,
+                                         conclusion_building_summary))
+      {
+        l3 ([&]{trace << "created check_run { " << *cr << " }";});
+
+        conclusion_node_id = move (*cr->node_id);
+      }
+    }
+
+    const string& effective_conclusion_node_id (
+      sd.conclusion_node_id
+      ? *sd.conclusion_node_id
+      : conclusion_node_id);
+
+    // Load the CI tenant if the conclusion check run was created.
+    //
+    if (!effective_conclusion_node_id.empty ())
+    {
+      string ru; // Repository URL.
+
+      // CI the test merge commit for remote PRs and the head commit for
+      // everything else (branch push or local PRs).
+      //
+      if (sd.kind == service_data::remote)
+      {
+        // E.g. #pull/28/merge@1b6c9a361086ed93e6f1e67189e82d52de91c49b
+        //
+        ru = sd.repository_clone_url + "#pull/" + to_string (*sd.pr_number) +
+             "/merge@" + sd.check_sha;
+      }
+      else
+        ru = sd.repository_clone_url + '#' + sd.check_sha;
+
+      // Let unlikely invalid_argument propagate (see above).
+      //
+      repository_location rl (move (ru), repository_type::git);
+
+      try
+      {
+        optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr,
+                                        *build_db_, retry_max_,
+                                        move (ts),
+                                        move (rl)));
+
+        if (!r || r->status != 200)
+        {
+          // Let unlikely invalid_argument propagate (see above).
+          //
+          if (auto cr = update_synthetic_cr (effective_conclusion_node_id,
+                                             conclusion_check_run_name,
+                                             result_status::error,
+                                             to_check_run_summary (r)))
+          {
+            l3 ([&]{trace << "updated check_run { " << *cr << " }";});
+          }
+          else
+          {
+            // Nothing really we can do in this case since we will not receive
+            // any further notifications. Log the error as a last resort.
+
+            error << "failed to load CI tenant " << ts.id
+                  << " and unable to update conclusion";
+          }
+
+          return nullptr; // No need to update service data in this case.
+        }
+      }
+      catch (const runtime_error& e) // Database retries exhausted.
+      {
+        error << "failed to load CI tenant " << ts.id << ": " << e.what ();
+
+        // Fall through to retry on next call.
+      }
+    }
+
+    if (!new_iat && conclusion_node_id.empty ())
+      return nullptr; // Nothing to save (but potentially retry on next call).
+
+    return [&error,
+            tenant_id,
+            iat = move (new_iat),
+            cni = move (conclusion_node_id)]
+      (const string& ti,
+       const tenant_service& ts) -> optional<string>
+    {
+      // NOTE: this lambda may be called repeatedly (e.g., due to
+      // transaction being aborted) and so should not move out of its
+      // captures.
+
+      if (tenant_id != ti)
+        return nullopt; // Do nothing if the tenant has been replaced.
+
+      service_data sd;
+      try
+      {
+        sd = service_data (*ts.data);
+      }
+      catch (const invalid_argument& e)
+      {
+        error << "failed to parse service data: " << e;
+        return nullopt;
+      }
+
+      if (iat)
+        sd.installation_access = *iat;
+
+      if (!cni.empty ())
+        sd.conclusion_node_id = cni;
+
+      return sd.json ();
+    };
+  }
+  catch (const std::exception& e)
+  {
+    NOTIFICATION_DIAG (log_writer);
+    error << "CI tenant " << ts.id << ": unhandled exception: " << e.what ();
+
+    // Cancel the tenant otherwise we could end up in an infinite loop (see
+    // top of function).
+    //
+    try
+    {
+      if (cancel (error, warn, verb_ ? &trace : nullptr,
+                  *build_db_, retry_max_, ts.type, ts.id))
+        l3 ([&]{trace << "canceled CI tenant " << ts.id;});
+    }
+    catch (const runtime_error& e) // Database retries exhausted.
+    {
+      l3 ([&]{trace << "failed to cancel CI tenant " << ts.id
+                    << ": " << e.what ();});
+    }
+
+    return nullptr;
+  }
+
+  // Build state change notifications (see tenant-services.hxx for
+  // background). Mapping our state transitions to GitHub pose multiple
+  // problems:
+  //
+  // 1. In our model we have the building->queued (interrupted) and
+  //    built->queued (rebuild) transitions. We are going to ignore both of
+  //    them when notifying GitHub. The first is not important (we expect the
+  //    state to go back to building shortly). The second should normally not
+  //    happen and would mean that a completed check suite may go back on its
+  //    conclusion (which would be pretty confusing for the user). Note that
+  //    the ->queued state transition of a check run rebuild triggered by
+  //    us is handled directly in handle_check_run_rerequest().
+  //
+  //    So, for GitHub notifications, we only have the following linear
+  //    transition sequence:
+  //
+  //    -> queued -> building -> built
+  //
+  //    Note, however, that because we ignore certain transitions, we can now
+  //    observe "degenerate" state changes that we need to ignore:
+  //
+  //    building -> [queued] -> building
+  //    built -> [queued] -> ...
+  //
+  // 2. As mentioned in tenant-services.hxx, we may observe the notifications
+  //    as arriving in the wrong order. Unfortunately, GitHub provides no
+  //    mechanisms to help with that. In fact, GitHub does not even prevent
+  //    the creation of multiple check runs with the same name (it will always
+  //    use the last created instance, regardless of the status, timestamps,
+  //    etc). As a result, we cannot, for example, rely on the failure to
+  //    create a new check run in response to the queued notification as an
+  //    indication of a subsequent notification (e.g., building) having
+  //    already occurred.
+  //
+  //    The only aid in this area that GitHub provides is that it prevents
+  //    updating a check run in the built state to a former state (queued or
+  //    building). But one can still create a new check run with the same name
+  //    and a former state.
+  //
+  //    (Note that we should also be careful if trying to take advantage of
+  //    this "check run override" semantics: each created check run gets a new
+  //    URL and while the GitHub UI will always point to the last created when
+  //    showing the list of check runs, if the user is already on the previous
+  //    check run's URL, nothing will automatically cause them to be
+  //    redirected to the new URL. And so the user may sit on the abandoned
+  //    check run waiting forever for it to be completed.)
+  //
+  //    As a result, we will deal with the out of order problem differently
+  //    depending on the notification:
+  //
+  //    queued    Skip if there is already a check run in service data,
+  //              otherwise create new.
+  //
+  //    building  Skip if there is no check run in service data or it's
+  //              not in the queued state, otherwise update.
+  //
+  //    built     Update if there is check run in service data unless its
+  //              state is built, otherwise create new.
+  //
+  //    The rationale for this semantics is as follows: the building
+  //    notification is a "nice to have" and can be skipped if things are not
+  //    going normally. In contrast, the built notification cannot be skipped
+  //    and we must either update the existing check run or create a new one
+  //    (hopefully overriding the one created previously, if any). Note that
+  //    the likelihood of the built notification being performed at the same
+  //    time as queued/building is quite low (unlike queued and building).
+  //
+  //    Note also that with this semantics it's unlikely but possible that we
+  //    attempt to update the service data in the wrong order. Specifically, it
+  //    feels like this should not be possible in the ->building transition
+  //    since we skip the building notification unless the check run in the
+  //    service data is already in the queued state. But it is theoretically
+  //    possible in the ->built transition. For example, we may be updating
+  //    the service data for the queued notification after it has already been
+  //    updated by the built notification. In such cases we should not be
+  //    overriding the latter state (built) with the former (queued).
+  //
+  // 3. We may not be able to "conclusively" notify GitHub, for example, due
+  //    to a transient network error. The "conclusively" part means that the
+  //    notification may or may not have gone through (though it feels the
+  //    common case will be the inability to send the request rather than
+  //    receive the reply).
+  //
+  //    In such cases, we record in the service data that the notification was
+  //    not synchronized and in subsequent notifications we do the best we can:
+  //    if we have node_id, then we update, otherwise, we create (potentially
+  //    overriding the check run created previously).
+  //
+  function<optional<string> (const string&, const tenant_service&)> ci_github::
+  build_queued (const string& tenant_id,
+                const tenant_service& ts,
+                const vector<build>& builds,
+                optional<build_state> istate,
+                const build_queued_hints& hs,
+                const diag_epilogue& log_writer) const noexcept
+  try
+  {
+    // NOTE: this function is noexcept and should not throw.
+
+    NOTIFICATION_DIAG (log_writer);
+
+    service_data sd;
+    try
+    {
+      sd = service_data (*ts.data);
+    }
+    catch (const invalid_argument& e)
+    {
+      error << "failed to parse service data: " << e;
+      return nullptr;
+    }
+
+    // Ignore attempts to add new builds to a completed check suite. This can
+    // happen, for example, if a new build configuration is added before
+    // the tenant is archived.
+    //
+    if (sd.completed)
+      return nullptr;
+
+    // The builds for which we will be creating check runs.
+    //
+    vector<reference_wrapper<const build>> bs;
+    brep::check_runs crs; // Parallel to bs.
+
+    // Exclude the builds for which we won't be creating check runs.
+    //
+    for (const build& b: builds)
+    {
+      string bid (gh_check_run_name (b)); // Full build id.
+
+      if (const check_run* scr = sd.find_check_run (bid))
+      {
+        // Another notification has already stored this check run.
+        //
+        if (!istate)
+        {
+          // Out of order queued notification.
+          //
+          warn << "check run " << bid << ": out of order queued "
+               << "notification; existing state: " << scr->state_string ();
+        }
+        else if (*istate == build_state::built)
+        {
+          // Unexpected built->queued transition (rebuild).
+          //
+          // Note that handle_check_run_rerequest() may trigger an "expected"
+          // rebuild, in which case our state should be set to queued.
+          //
+          if (scr->state != build_state::queued || !scr->state_synced)
+            warn << "check run " << bid << ": unexpected rebuild";
+        }
+        else
+        {
+          // Ignore interrupted.
+          //
+          assert (*istate == build_state::building);
+        }
+      }
+      else
+      {
+        // No stored check run for this build so prepare to create one.
+        //
+        bs.push_back (b);
+
+        crs.push_back (
+          check_run {move (bid),
+                     gh_check_run_name (b, &hs),
+                     nullopt, /* node_id */
+                     build_state::queued,
+                     false /* state_synced */,
+                     nullopt /* status */,
+                     details_url (b),
+                     check_run::description_type {check_run_queued_title,
+                                                  check_run_queued_summary}});
+      }
+    }
+
+    if (bs.empty ()) // Nothing to do.
+      return nullptr;
+
+    // Get a new installation access token if the current one has expired.
+    //
+    const gh_installation_access_token* iat (nullptr);
+    optional<gh_installation_access_token> new_iat;
+
+    if (system_clock::now () > sd.installation_access.expires_at)
+    {
+      if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+      {
+        new_iat = obtain_installation_access_token (sd.installation_id,
+                                                    move (*jwt),
+                                                    error);
+        if (new_iat)
+          iat = &*new_iat;
+      }
+    }
+    else
+      iat = &sd.installation_access;
+
+    // Note: we treat the failure to obtain the installation access token the
+    // same as the failure to notify GitHub (state is updated by not marked
+    // synced).
+    //
+    if (iat != nullptr)
+    {
+      // Create a check_run for each build as a single request.
+      //
+      // Let unlikely invalid_argument propagate.
+      //
+      if (gq_create_check_runs (error,
+                                crs,
+                                iat->token,
+                                sd.app_id,
+                                sd.repository_node_id,
+                                sd.report_sha,
+                                options_->build_queued_batch ()))
+      {
+        for (const check_run& cr: crs)
+        {
+          // We can only create a check run in the queued state.
+          //
+          assert (cr.state == build_state::queued);
+          l3 ([&]{trace << "created check_run { " << cr << " }";});
+        }
+      }
+    }
+
+    return [tenant_id,
+            bs = move (bs),
+            iat = move (new_iat),
+            crs = move (crs),
+            error = move (error),
+            warn = move (warn)] (const string& ti,
+                                 const tenant_service& ts) -> optional<string>
+    {
+      // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+      // being aborted) and so should not move out of its captures.
+
+      if (tenant_id != ti)
+        return nullopt; // Do nothing if the tenant has been replaced.
+
+      service_data sd;
+      try
+      {
+        sd = service_data (*ts.data);
+      }
+      catch (const invalid_argument& e)
+      {
+        error << "failed to parse service data: " << e;
+        return nullopt;
+      }
+
+      if (iat)
+        sd.installation_access = *iat;
+
+      for (size_t i (0); i != bs.size (); ++i)
+      {
+        const check_run& cr (crs[i]);
+
+        // Note that this service data may not be the same as what we observed
+        // in the build_queued() function above. For example, some check runs
+        // that we have queued may have already transitioned to built. So we
+        // skip any check runs that are already present.
+        //
+        if (const check_run* scr = sd.find_check_run (cr.build_id))
+        {
+          // Doesn't looks like printing new/existing check run node_id will
+          // be of any help.
+          //
+          warn << "check run " << cr.build_id << ": out of order queued "
+               << "notification service data update; existing state: "
+               << scr->state_string ();
+        }
+        else
+          sd.check_runs.push_back (cr);
+      }
+
+      return sd.json ();
+    };
+  }
+  catch (const std::exception& e)
+  {
+    NOTIFICATION_DIAG (log_writer);
+
+    error << "CI tenant " << ts.id << ": unhandled exception: " << e.what ();
+
+    return nullptr;
+  }
+
+  function<optional<string> (const string&, const tenant_service&)> ci_github::
+  build_building (const string& tenant_id,
+                  const tenant_service& ts,
+                  const build& b,
+                  const diag_epilogue& log_writer) const noexcept
+  try
+  {
+    // NOTE: this function is noexcept and should not throw.
+
+    NOTIFICATION_DIAG (log_writer);
+
+    service_data sd;
+    try
+    {
+      sd = service_data (*ts.data);
+    }
+    catch (const invalid_argument& e)
+    {
+      error << "failed to parse service data: " << e;
+      return nullptr;
+    }
+
+    // Similar to build_queued(), ignore attempts to add new builds to a
+    // completed check suite.
+    //
+    if (sd.completed)
+      return nullptr;
+
+    optional<check_run> cr; // Updated check run.
+    string bid (gh_check_run_name (b)); // Full build id.
+
+    if (check_run* scr = sd.find_check_run (bid)) // Stored check run.
+    {
+      // Update the check run if it exists on GitHub and the queued
+      // notification updated the service data, otherwise do nothing.
+      //
+      if (scr->state == build_state::queued)
+      {
+        if (scr->node_id)
+        {
+          cr = move (*scr);
+          cr->state_synced = false;
+        }
+        else
+        {
+          // Network error during queued notification (state unsynchronized),
+          // ignore.
+          //
+          l3 ([&]{trace << "unsynchronized check run " << bid;});
+        }
+      }
+      else
+      {
+        // Ignore interrupted (building -> queued -> building transition).
+        //
+        if (scr->state != build_state::building)
+        {
+          warn << "check run " << bid << ": out of order building "
+               << "notification; existing state: " << scr->state_string ();
+        }
+      }
+    }
+    else
+      warn << "check run " << bid << ": out of order building "
+           << "notification; no check run state in service data";
+
+    if (!cr)
+      return nullptr;
+
+    // Get a new installation access token if the current one has expired.
+    //
+    const gh_installation_access_token* iat (nullptr);
+    optional<gh_installation_access_token> new_iat;
+
+    if (system_clock::now () > sd.installation_access.expires_at)
+    {
+      if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+      {
+        new_iat = obtain_installation_access_token (sd.installation_id,
+                                                    move (*jwt),
+                                                    error);
+        if (new_iat)
+          iat = &*new_iat;
+      }
+    }
+    else
+      iat = &sd.installation_access;
+
+    // Note: we treat the failure to obtain the installation access token the
+    // same as the failure to notify GitHub (state is updated but not marked
+    // synced).
+    //
+    if (iat != nullptr)
+    {
+      // Let unlikely invalid_argument propagate.
+      //
+      if (gq_update_check_run (error,
+                               *cr,
+                               iat->token,
+                               sd.repository_node_id,
+                               *cr->node_id,
+                               build_state::building,
+                               check_run_building_title,
+                               check_run_building_summary))
+      {
+        // Do nothing further if the state was already built on GitHub (note
+        // that this is based on the above-mentioned special GitHub semantics
+        // of preventing changes to the built status).
+        //
+        if (cr->state == build_state::built)
+        {
+          warn << "check run " << bid << ": already in built state on GitHub";
+          return nullptr;
+        }
+
+        assert (cr->state == build_state::building);
+        l3 ([&]{trace << "updated check_run { " << *cr << " }";});
+      }
+    }
+
+    return [tenant_id,
+            iat = move (new_iat),
+            cr = move (*cr),
+            error = move (error),
+            warn = move (warn)] (const string& ti,
+                                 const tenant_service& ts) -> optional<string>
+    {
+      // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+      // being aborted) and so should not move out of its captures.
+
+      if (tenant_id != ti)
+        return nullopt; // Do nothing if the tenant has been replaced.
+
+      service_data sd;
+      try
+      {
+        sd = service_data (*ts.data);
+      }
+      catch (const invalid_argument& e)
+      {
+        error << "failed to parse service data: " << e;
+        return nullopt;
+      }
+
+      if (iat)
+        sd.installation_access = *iat;
+
+      // Update the check run only if it is in the queued state.
+      //
+      if (check_run* scr = sd.find_check_run (cr.build_id))
+      {
+        if (scr->state == build_state::queued)
+          *scr = cr;
+        else
+        {
+          warn << "check run " << cr.build_id << ": out of order building "
+               << "notification service data update; existing state: "
+               << scr->state_string ();
+        }
+      }
+      else
+        warn << "check run " << cr.build_id << ": service data state has "
+             << "disappeared";
+
+      return sd.json ();
+    };
+  }
+  catch (const std::exception& e)
+  {
+    NOTIFICATION_DIAG (log_writer);
+
+    string bid (gh_check_run_name (b)); // Full build id.
+
+    error << "check run " << bid << ": unhandled exception: " << e.what();
+
+    return nullptr;
+  }
+
+  function<pair<optional<string>, bool> (const string&,
+                                         const tenant_service&)> ci_github::
+  build_built (const string& tenant_id,
+               const tenant_service& ts,
+               const build& b,
+               const diag_epilogue& log_writer) const noexcept
+  try
+  {
+    // NOTE: this function is noexcept and should not throw.
+
+    NOTIFICATION_DIAG (log_writer);
+
+    // @@ TODO Include ts.id in diagnostics? Check run build ids alone seem
+    //    kind of meaningless. Log lines get pretty long this way however.
+
+    service_data sd;
+    try
+    {
+      sd = service_data (*ts.data);
+    }
+    catch (const invalid_argument& e)
+    {
+      error << "failed to parse service data: " << e;
+      return nullptr;
+    }
+
+    // Similar to build_queued(), ignore attempts to add new builds to a
+    // completed check suite.
+    //
+    if (sd.completed)
+      return nullptr;
+
+    // If we don't have the accurate list of check runs in the service data
+    // (for example, because we ran out of transaction retries trying to
+    // update it), then things are going to fall apart: we will add this check
+    // run and then immediately conclude that the check suite is complete
+    // (while GitHub will likely continue showing a bunch of queued check
+    // runs. If this checks run is successful, then we will conclude the
+    // check suite is successful and update the conclusion check run, all
+    // based on one build.
+    //
+    if (sd.check_runs.empty ())
+    {
+      error << "no queued check runs in service data for tenant " << tenant_id;
+      return nullptr;
+    }
+
+    // Here we only update the state of this check run. If there are no more
+    // unbuilt ones, then the synthetic conclusion check run will be updated
+    // in build_completed(). Note that determining whether we have no more
+    // unbuilt would be racy here so instead we do it in the service data
+    // update function that we return.
+
+    check_run cr; // Updated check run.
+    {
+      string bid (gh_check_run_name (b)); // Full build id.
+
+      if (check_run* scr = sd.find_check_run (bid))
+      {
+        if (scr->state != build_state::building)
+        {
+          warn << "check run " << bid << ": out of order built notification; "
+               << "existing state: " << scr->state_string ();
+        }
+
+        // Do nothing if already built (e.g., rebuild).
+        //
+        if (scr->state == build_state::built)
+          return nullptr;
+
+        cr = move (*scr);
+      }
+      else
+      {
+        warn << "check run " << bid << ": out of order built notification; "
+             << "no check run state in service data";
+
+        // Note that we have no hints here and so have to use the full build
+        // id for name.
+        //
+        cr.build_id = move (bid);
+        cr.name = cr.build_id;
+      }
+
+      cr.state_synced = false;
+    }
+
+    // Get a new installation access token if the current one has expired.
+    //
+    const gh_installation_access_token* iat (nullptr);
+    optional<gh_installation_access_token> new_iat;
+
+    if (system_clock::now () > sd.installation_access.expires_at)
+    {
+      if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+      {
+        new_iat = obtain_installation_access_token (sd.installation_id,
+                                                    move (*jwt),
+                                                    error);
+        if (new_iat)
+          iat = &*new_iat;
+      }
+    }
+    else
+      iat = &sd.installation_access;
+
+    // Note: we treat the failure to obtain the installation access token the
+    // same as the failure to notify GitHub (state is updated but not marked
+    // synced).
+    //
+    if (iat != nullptr)
+    {
+      // Prepare the check run's summary field (the build information in an
+      // XHTML table).
+      //
+      string sm; // Summary.
+      {
+        using namespace web::xhtml;
+
+        // Note: let all serialization exceptions propagate. The XML
+        // serialization code can throw bad_alloc or xml::serialization in
+        // case of I/O failures, but we're serializing to a string stream so
+        // both exceptions are unlikely.
+        //
+        ostringstream os;
+        xml::serializer s (os, "check_run_summary");
+
+        // This hack is required to disable XML element name prefixes (which
+        // GitHub does not like). Note that this adds an xmlns declaration for
+        // the XHTML namespace which for now GitHub appears to ignore. If that
+        // ever becomes a problem, then we should redo this with raw XML
+        // serializer calls.
+        //
+        struct table: element
+        {
+          table (): element ("table") {}
+
+          void
+          start (xml::serializer& s) const override
+          {
+            s.start_element (xmlns, name);
+            s.namespace_decl (xmlns, "");
+          }
+        } TABLE;
+
+        // Serialize a result row (colored circle, result text, log URL) for
+        // an operation and result_status.
+        //
+        auto tr_result = [this, &b] (xml::serializer& s,
+                                     const string& op,
+                                     result_status rs)
+        {
+          // The log URL.
+          //
+          string lu (build_log_url (options_->host (),
+                                    options_->root (),
+                                    b,
+                                    op != "result" ? &op : nullptr));
+
+          s << TR
+            <<   TD << EM << op << ~EM << ~TD
+            <<   TD
+            <<     circle (rs) << ' '
+            <<     CODE << to_string (rs) << ~CODE
+            <<     " (" << A << HREF << lu << ~HREF << "log" << ~A << ')'
+            <<   ~TD
+            << ~TR;
+        };
+
+        // Serialize the summary to an XHTML table.
+        //
+        s << TABLE
+          <<   TBODY;
+
+        tr_result (s, "result", *b.status);
+
+        s <<     TR
+          <<       TD << EM   << "package"      << ~EM   << ~TD
+          <<       TD << CODE << b.package_name << ~CODE << ~TD
+          <<     ~TR
+          <<     TR
+          <<       TD << EM   << "version"         << ~EM   << ~TD
+          <<       TD << CODE << b.package_version << ~CODE << ~TD
+          <<     ~TR
+          <<     TR
+          <<       TD << EM << "toolchain" << ~EM << ~TD
+          <<       TD
+          <<         CODE
+          <<           b.toolchain_name << '-' << b.toolchain_version.string ()
+          <<         ~CODE
+          <<       ~TD
+          <<     ~TR
+          <<     TR
+          <<       TD << EM   << "target"           << ~EM   << ~TD
+          <<       TD << CODE << b.target.string () << ~CODE << ~TD
+          <<     ~TR
+          <<     TR
+          <<       TD << EM   << "target config"      << ~EM   << ~TD
+          <<       TD << CODE << b.target_config_name << ~CODE << ~TD
+          <<     ~TR
+          <<     TR
+          <<       TD << EM   << "package config"      << ~EM   << ~TD
+          <<       TD << CODE << b.package_config_name << ~CODE << ~TD
+          <<     ~TR;
+
+        for (const operation_result& r: b.results)
+          tr_result (s, r.operation, r.status);
+
+        s <<   ~TBODY
+          << ~TABLE;
+
+        sm = os.str ();
+      }
+
+      gq_built_result br (
+        make_built_result (*b.status, sd.warning_success, move (sm)));
+
+      if (cr.node_id)
+      {
+        // Update existing check run to built. Let unlikely invalid_argument
+        // propagate.
+        //
+        if (gq_update_check_run (error,
+                                 cr,
+                                 iat->token,
+                                 sd.repository_node_id,
+                                 *cr.node_id,
+                                 move (br)))
+        {
+          assert (cr.state == build_state::built);
+          l3 ([&]{trace << "updated check_run { " << cr << " }";});
+        }
+      }
+      else
+      {
+        // Create new check run. Let unlikely invalid_argument propagate.
+        //
+        // Note that we don't have build hints so will be creating this check
+        // run with the full build id as name. In the unlikely event that an
+        // out of order build_queued() were to run before we've saved this
+        // check run to the service data it will create another check run with
+        // the shortened name which will never get to the built state.
+        //
+        if (gq_create_check_run (error,
+                                 cr,
+                                 iat->token,
+                                 sd.app_id,
+                                 sd.repository_node_id,
+                                 sd.report_sha,
+                                 details_url (b),
+                                 move (br)))
+        {
+          assert (cr.state == build_state::built);
+          l3 ([&]{trace << "created check_run { " << cr << " }";});
+        }
+      }
+
+      if (cr.state_synced)
+      {
+        // Check run was created/updated successfully to built (with status we
+        // specified).
+        //
+        cr.status = b.status;
+      }
+    }
+
+    return [tenant_id,
+            iat = move (new_iat),
+            cr = move (cr),
+            error = move (error),
+            warn = move (warn)] (const string& ti,
+                                 const tenant_service& ts)
+    {
+      // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+      // being aborted) and so should not move out of its captures.
+
+      // Do nothing if the tenant has been replaced.
+      //
+      if (tenant_id != ti)
+        return make_pair (optional<string> (), false);
+
+      service_data sd;
+      try
+      {
+        sd = service_data (*ts.data);
+      }
+      catch (const invalid_argument& e)
+      {
+        error << "failed to parse service data: " << e;
+        return make_pair (optional<string> (), false);
+      }
+
+      // Feel like this could potentially happen in case of an out of order
+      // notification (see above).
+      //
+      if (sd.completed)
+      {
+        // @@ Perhaps this should be a warning but let's try error for now (we
+        //    essentially missed a build, which could have failed).
+        //
+        error << "built notification for completed check suite";
+        return make_pair (optional<string> (), false);
+      }
+
+      if (iat)
+        sd.installation_access = *iat;
+
+      // Only update the check_run state in service data if it matches the
+      // state (specifically, status) on GitHub.
+      //
+      if (cr.state_synced)
+      {
+        if (check_run* scr = sd.find_check_run (cr.build_id))
+        {
+          // This will most commonly generate a duplicate warning (see above).
+          // We could save the old state and only warn if it differs but let's
+          // not complicate things for now.
+          //
+#if 0
+          if (scr->state != build_state::building)
+          {
+            warn << "check run " << cr.build_id << ": out of order built "
+                 << "notification service data update; existing state: "
+                 << scr->state_string ();
+          }
+#endif
+          *scr = cr; // Note: also updates node id if created.
+        }
+        else
+          sd.check_runs.push_back (cr);
+
+        // Determine of this check suite is completed.
+        //
+        sd.completed = find_if (sd.check_runs.begin (), sd.check_runs.end (),
+                                [] (const check_run& scr)
+                                {
+                                  return scr.state != build_state::built;
+                                }) == sd.check_runs.end ();
+      }
+
+      return make_pair (optional<string> (sd.json ()), sd.completed);
+    };
+  }
+  catch (const std::exception& e)
+  {
+    NOTIFICATION_DIAG (log_writer);
+
+    string bid (gh_check_run_name (b)); // Full build id.
+
+    error << "check run " << bid << ": unhandled exception: " << e.what ();
+
+    return nullptr;
+  }
+
+  void ci_github::
+  build_completed (const string& /* tenant_id */,
+                   const tenant_service& ts,
+                   const diag_epilogue& log_writer) const noexcept
+  try
+  {
+    // NOTE: this function is noexcept and should not throw.
+
+    NOTIFICATION_DIAG (log_writer);
+
+    service_data sd;
+    try
+    {
+      sd = service_data (*ts.data);
+    }
+    catch (const invalid_argument& e)
+    {
+      error << "failed to parse service data: " << e;
+      return;
+    }
+
+    // This could have been reset by handle_check_run_rerequest().
+    //
+    if (!sd.completed)
+      return;
+
+    assert (!sd.check_runs.empty ());
+
+    // Here we need to update the state of the synthetic conclusion check run.
+    //
+    result_status result (result_status::success);
+
+    // Conclusion check run summary. Will include the success/warning/failure
+    // count breakdown.
+    //
+    string summary;
+    {
+      // The success/warning/failure counts.
+      //
+      // Note that the warning count will be included in the success or
+      // failure count (depending on the value of sd.warning_success).
+      //
+      size_t succ_count (0), warn_count (0), fail_count (0);
+
+      // Count a result_status under the appropriate category.
+      //
+      auto count = [&succ_count,
+                    &warn_count,
+                    &fail_count,
+                    ws = sd.warning_success] (result_status rs)
+      {
+        switch (rs)
+        {
+        case result_status::success:  ++succ_count; break;
+
+        case result_status::error:
+        case result_status::abort:
+        case result_status::abnormal: ++fail_count; break;
+
+        case result_status::warning:
+          {
+            ++warn_count;
+
+            if (ws)
+              ++succ_count;
+            else
+              ++fail_count;
+
+            break;
+          }
+
+        case result_status::skip:
+        case result_status::interrupt:
+          {
+            assert (false);
+          }
+        }
+      };
+
+      for (const check_run& cr: sd.check_runs)
+      {
+        assert (cr.state == build_state::built && cr.status);
+
+        result |= *cr.status;
+        count (*cr.status);
+      }
+
+      // Construct the conclusion check run summary.
+      //
+      ostringstream os;
+
+      // Note: the warning count has already been included in the success or
+      // failure count.
+      //
+      os << fail_count << " failed";
+      if (!sd.warning_success && warn_count != 0)
+        os << " (" << warn_count << " due to warnings)";
+
+      os << ", " << succ_count << " succeeded";
+      if (sd.warning_success && warn_count != 0)
+        os << " (" << warn_count << " with warnings)";
+
+      os << ", " << (succ_count + fail_count) << " total";
+
+      summary = os.str ();
+    }
+
+    // Get a new installation access token if the current one has expired
+    // (unlikely since we just returned from build_built()). Note also that we
+    // are not saving the new token in the service data.
+    //
+    const gh_installation_access_token* iat (nullptr);
+    optional<gh_installation_access_token> new_iat;
+
+    if (system_clock::now () > sd.installation_access.expires_at)
+    {
+      if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+      {
+        new_iat = obtain_installation_access_token (sd.installation_id,
+                                                    move (*jwt),
+                                                    error);
+        if (new_iat)
+          iat = &*new_iat;
+      }
+    }
+    else
+      iat = &sd.installation_access;
+
+    // Note: we treat the failure to obtain the installation access token the
+    // same as the failure to notify GitHub.
+    //
+    if (iat != nullptr)
+    {
+      // Update the conclusion check run if all check runs are now built.
+      //
+      assert (sd.conclusion_node_id);
+
+      gq_built_result br (
+        make_built_result (result, sd.warning_success, move (summary)));
+
+      check_run cr;
+
+      // Set some fields for display purposes.
+      //
+      cr.node_id = *sd.conclusion_node_id;
+      cr.name = conclusion_check_run_name;
+
+      // Let unlikely invalid_argument propagate.
+      //
+      if (gq_update_check_run (error,
+                               cr,
+                               iat->token,
+                               sd.repository_node_id,
+                               *sd.conclusion_node_id,
+                               move (br)))
+      {
+        assert (cr.state == build_state::built);
+        l3 ([&]{trace << "updated conclusion check_run { " << cr << " }";});
+      }
+      else
+      {
+        // Nothing we can do here except log the error.
+        //
+        error << "tenant_service id " << ts.id
+              << ": unable to update conclusion check run "
+              << *sd.conclusion_node_id;
+      }
+    }
+  }
+  catch (const std::exception& e)
+  {
+    NOTIFICATION_DIAG (log_writer);
+
+    error << "unhandled exception: " << e.what ();
+  }
+
+  string ci_github::
+  details_url (const build& b) const
+  {
+    // This code is based on build_force_url() in mod/build.cxx.
+    //
+    return
+      options_->host ()                                               +
+      tenant_dir (options_->root (), b.tenant).string ()              +
+      "?builds=" + mime_url_encode (b.package_name.string ())         +
+      "&pv=" + mime_url_encode (b.package_version.string ())          +
+      "&tg=" + mime_url_encode (b.target.string ())                   +
+      "&tc=" + mime_url_encode (b.target_config_name)                 +
+      "&pc=" + mime_url_encode (b.package_config_name)                +
+      "&th=" + mime_url_encode (b.toolchain_name)                     + '-' +
+                                b.toolchain_version.string ();
+  }
+
+  string ci_github::
+  details_url (const string& t) const
+  {
+    return
+      options_->host () +
+      tenant_dir (options_->root (), t).string () +
+      "?builds";
+  }
+
+  static optional<build_id>
+  parse_details_url (const string& details_url)
+  try
+  {
+    // See details_url() above for an idea of what the URL looks like.
+
+    url u (details_url);
+
+    build_id r;
+
+    // Extract the tenant from the URL path.
+    //
+    // Example paths:
+    //
+    //   @d2586f57-21dc-40b7-beb2-6517ad7917dd (37 characters)
+    //   <brep-root>/@d2586f57-21dc-40b7-beb2-6517ad7917dd
+    //
+    if (!u.path)
+      return nullopt;
+
+    {
+      size_t p (u.path->find ('@'));
+      if (p == string::npos || u.path->size () - p != 37)
+        return nullopt; // Tenant not found or too short.
+
+      r.package.tenant = u.path->substr (p + 1);
+    }
+
+    // Extract the rest of the build_id members from the URL query.
+    //
+    if (!u.query)
+      return nullopt;
+
+    bool pn (false), pv (false), tg (false), tc (false), pc (false),
+      th (false);
+
+    // This URL query parsing code is based on
+    // web::apache::request::parse_url_parameters().
+    //
+    for (const char* qp (u.query->c_str ()); qp != nullptr; )
+    {
+      const char* vp (strchr (qp, '='));
+      const char* ep (strchr (qp, '&'));
+
+      if (vp == nullptr || (ep != nullptr && ep < vp))
+        return nullopt; // Missing value.
+
+      string n (mime_url_decode (qp, vp)); // Name.
+
+      ++vp; // Skip '='
+
+      const char* ve (ep != nullptr ? ep : vp + strlen (vp)); // Value end.
+
+      // Get the value as-is or URL-decode it.
+      //
+      auto rawval = [vp, ve] () { return string (vp, ve); };
+      auto decval = [vp, ve] () { return mime_url_decode (vp, ve); };
+
+      auto make_version = [] (string&& v)
+      {
+        return canonical_version (brep::version (move (v)));
+      };
+
+      auto c = [&n] (bool& b, const char* s)
+      {
+        return n == s ? (b = true) : false;
+      };
+
+      if (c (pn, "builds"))  r.package.name        = package_name (decval ());
+      else if (c (pv, "pv")) r.package.version     = make_version (decval ());
+      else if (c (tg, "tg")) r.target              = target_triplet (decval ());
+      else if (c (tc, "tc")) r.target_config_name  = decval ();
+      else if (c (pc, "pc")) r.package_config_name = decval ();
+      else if (c (th, "th"))
+      {
+        // Toolchain name and version. E.g. "public-0.17.0"
+
+        string v (rawval ());
+
+        // Note: parsing code based on mod/mod-builds.cxx.
+        //
+        size_t p (v.find ('-'));
+        if (p == string::npos || p >= v.size () - 1)
+          return nullopt; // Invalid format.
+
+        r.toolchain_name    = v.substr (0, p);
+        r.toolchain_version = make_version (v.substr (p + 1));
+      }
+
+      qp = ep != nullptr ? ep + 1 : nullptr;
+    }
+
+    if (!pn || !pv || !tg || !tc || !pc || !th)
+      return nullopt; // Fail if any query parameters are absent.
+
+    return r;
+  }
+  catch (const invalid_argument&) // Invalid url, brep::version, etc.
+  {
+    return nullopt;
+  }
+
+  optional<string> ci_github::
+  generate_jwt (uint64_t app_id,
+                const basic_mark& trace,
+                const basic_mark& error) const
+  {
+    string jwt;
+    try
+    {
+      // Look up the private key path for the app id and fail if not found.
+      //
+      const map<uint64_t, dir_path>& pks (
+        options_->ci_github_app_id_private_key ());
+
+      auto pk (pks.find (app_id));
+      if (pk == pks.end ())
+      {
+        error << "unable to generate JWT: "
+              << "no private key configured for app id " << app_id;
+        return nullopt;
+      }
+
+      // Set token's "issued at" time 60 seconds in the past to combat clock
+      // drift (as recommended by GitHub).
+      //
+      jwt = brep::generate_jwt (
+          *options_,
+          pk->second, to_string (app_id),
+          chrono::seconds (options_->ci_github_jwt_validity_period ()),
+          chrono::seconds (60));
+
+      l3 ([&]{trace << "JWT: " << jwt;});
+    }
+    catch (const system_error& e)
+    {
+      error << "unable to generate JWT (errno=" << e.code () << "): " << e;
+      return nullopt;
+    }
+
+    return jwt;
+  }
+
+  // There are three types of GitHub API authentication:
+  //
+  //   1) Authenticating as an app. Used to access parts of the API concerning
+  //      the app itself such as getting the list of installations. (Need to
+  //      authenticate as an app as part of authenticating as an app
+  //      installation.)
+  //
+  //   2) Authenticating as an app installation (on a user or organisation
+  //      account). Used to access resources belonging to the user/repository
+  //      or organisation the app is installed in.
+  //
+  //   3) Authenticating as a user. Used to perform actions as the user.
+  //
+  // We need to authenticate as an app installation (2).
+  //
+  // How to authenticate as an app installation
+  //
+  // Reference:
+  // https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app/authenticating-as-a-github-app-installation
+  //
+  // The final authentication token we need is an installation access token
+  // (IAT), valid for one hour, which we will pass in the `Authentication`
+  // header of our Github API requests:
+  //
+  //   Authorization: Bearer <INSTALLATION_ACCESS_TOKEN>
+  //
+  // To generate an IAT:
+  //
+  // - Generate a JSON Web Token (JWT)
+  //
+  // - Get the installation ID. This will be included in the webhook request
+  //   in our case
+  //
+  // - Send a POST to /app/installations/<INSTALLATION_ID>/access_tokens which
+  //   includes the JWT (`Authorization: Bearer <JWT>`). The response will
+  //   include the IAT. Can pass the name of the repository included in the
+  //   webhook request to restrict access, otherwise we get access to all
+  //   repos covered by the installation if installed on an organisation for
+  //   example.
+  //
+  optional<gh_installation_access_token> ci_github::
+  obtain_installation_access_token (const string& iid,
+                                    string jwt,
+                                    const basic_mark& error) const
+  {
+    gh_installation_access_token iat;
+    try
+    {
+      // API endpoint.
+      //
+      string ep ("app/installations/" + iid + "/access_tokens");
+
+      uint16_t sc (
+          github_post (iat, ep, strings {"Authorization: Bearer " + jwt}));
+
+      // Possible response status codes from the access_tokens endpoint:
+      //
+      // 201 Created
+      // 401 Requires authentication
+      // 403 Forbidden
+      // 404 Resource not found
+      // 422 Validation failed, or the endpoint has been spammed.
+      //
+      // Note that the payloads of non-201 status codes are undocumented.
+      //
+      if (sc != 201)
+      {
+        error << "unable to get installation access token: error HTTP "
+              << "response status " << sc;
+        return nullopt;
+      }
+
+      // Create a clock drift safety window.
+      //
+      iat.expires_at -= chrono::minutes (5);
+    }
+    // gh_installation_access_token (via github_post())
+    //
+    catch (const json::invalid_json_input& e)
+    {
+      // Note: e.name is the GitHub API endpoint.
+      //
+      error << "malformed JSON in response from " << e.name << ", line: "
+            << e.line << ", column: " << e.column << ", byte offset: "
+            << e.position << ", error: " << e;
+      return nullopt;
+    }
+    catch (const invalid_argument& e) // github_post()
+    {
+      error << "malformed header(s) in response: " << e;
+      return nullopt;
+    }
+    catch (const system_error& e) // github_post()
+    {
+      error << "unable to get installation access token (errno=" << e.code ()
+            << "): " << e.what ();
+      return nullopt;
+    }
+
+    return iat;
+  }
+}