7 files changed, 348 insertions, 558 deletions
diff --git a/mod/mod-ci-github-gh.hxx b/mod/mod-ci-github-gh.hxx
index 58714be..281d765 100644
--- a/mod/mod-ci-github-gh.hxx
+++ b/mod/mod-ci-github-gh.hxx
@@ -80,6 +80,8 @@ namespace brep
     // the merge commit. If false then `merge_commit_sha` is either empty or
     // no longer valid.
     //
+    //   @@ TODO These appear to be unused.
+    //
     optional<bool> mergeable;
     string merge_commit_sha;
 
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx
index 1e9af74..17096eb 100644
--- a/mod/mod-ci-github-gq.cxx
+++ b/mod/mod-ci-github-gq.cxx
@@ -578,7 +578,10 @@ namespace brep
     os << "query {"                                              << '\n'
        << "  node(id:" << gq_str (nid) << ") {"                  << '\n'
        << "    ... on PullRequest {"                             << '\n'
-       << "      mergeable potentialMergeCommit { oid }"         << '\n'
+       << "      headRefOid"                                     << '\n'
+       << "      mergeStateStatus"                               << '\n'
+       << "      mergeable"                                      << '\n'
+       << "      potentialMergeCommit { oid }"                   << '\n'
        << "    }"                                                << '\n'
        << "  }"                                                  << '\n'
        << "}"                                                    << '\n';
@@ -586,10 +589,10 @@ namespace brep
     return os.str ();
   }
 
-  optional<string>
-  gq_pull_request_mergeable (const basic_mark& error,
-                             const string& iat,
-                             const string& nid)
+  optional<gq_pr_pre_check_info>
+  gq_fetch_pull_request_pre_check_info (const basic_mark& error,
+                                        const string& iat,
+                                        const string& nid)
   {
     string rq (gq_serialize_request (gq_query_pr_mergeability (nid)));
 
@@ -610,7 +613,7 @@ namespace brep
         // The response value. Absent if the merge commit is still being
         // generated.
         //
-        optional<string> value;
+        optional<gq_pr_pre_check_info> r;
 
         resp (json::parser& p)
         {
@@ -624,32 +627,42 @@ namespace brep
             {
               found = true;
 
+              string hs (p.next_expect_member_string ("headRefOid"));
+              string ms (p.next_expect_member_string ("mergeStateStatus"));
               string ma (p.next_expect_member_string ("mergeable"));
 
-              if (ma == "MERGEABLE")
+              if (ms == "BEHIND")
+              {
+                // The PR head branch is not up to date with the PR base
+                // branch.
+                //
+                // Note that we can only get here if the head-not-behind
+                // protection rule is active on the PR base branch.
+                //
+                r = {move (hs), true, nullopt};
+              }
+              else if (ma == "MERGEABLE")
               {
                 p.next_expect_member_object ("potentialMergeCommit");
                 string oid (p.next_expect_member_string ("oid"));
                 p.next_expect (event::end_object);
 
-                value = move (oid);
+                r = {move (hs), false, move (oid)};
               }
               else
               {
                 if (ma == "CONFLICTING")
-                  value = "";
+                  r = {move (hs), false, nullopt};
                 if (ma == "UNKNOWN")
-                  ; // Still being generated; leave value absent.
+                  ; // Still being generated; leave r absent.
                 else
-                {
-                  // @@ Let's throw invalid_json.
-                  //
-                  parse_error = "unexpected mergeable value '" + ma + '\'';
-
-                  // Carry on as if it were UNKNOWN.
-                }
+                  throw_json (p, "unexpected mergeable value '" + ma + '\'');
+              }
 
-                // Skip the merge commit ID (which should be null).
+              if (!r || !r->merge_commit_sha)
+              {
+                // Skip the merge commit ID if it has not yet been extracted
+                // (in which case it should be null).
                 //
                 p.next_expect_name ("potentialMergeCommit");
                 p.next_expect_value_skip ();
@@ -677,7 +690,7 @@ namespace brep
         else if (!rs.parse_error.empty ())
           error << rs.parse_error;
 
-        return rs.value;
+        return rs.r;
       }
       else
         error << "failed to fetch pull request: error HTTP response status "
@@ -711,215 +724,6 @@ namespace brep
     return nullopt;
   }
 
-  // Serialize a GraphQL query that fetches the last 100 (the maximum per
-  // page) open pull requests with the specified base branch from the
-  // repository with the specified node ID.
-  //
-  // @@ TMP Should we support more/less than 100?
-  //
-  //    Doing more (or even 100) could waste a lot of CI resources on
-  //    re-testing stale PRs. Maybe we should create a failed synthetic
-  //    conclusion check run asking the user to re-run the CI manually if/when
-  //    needed.
-  //
-  //    Note that we cannot request more than 100 at a time (will need to
-  //    do multiple requests with paging, etc).
-  //
-  //    Also, maybe we should limit the result to "fresh" PRs, e.g., those
-  //    that have been "touched" in the last week.
-  //
-  // Example query:
-  //
-  //   query {
-  //     node(id:"R_kgDOLc8CoA")
-  //     {
-  //       ... on Repository {
-  //         pullRequests (last:100 states:OPEN baseRefName:"master") {
-  //           edges {
-  //             node {
-  //               id
-  //               number
-  //               headRefOid
-  //             }
-  //           }
-  //         }
-  //       }
-  //     }
-  //   }
-  //
-  //
-  // pullRequests (last:50 states:[OPEN]
-  //               orderBy:{field:UPDATED_AT direction:ASC}
-  //               baseRefName:"master") {
-  //
-  // updatedAt field changes on PR close, conversion to draft PR, merge
-  // conflict resolution, etc. So seems like it changes with any kind of PR
-  // update.
-  //
-  static string
-  gq_query_fetch_open_pull_requests (const string& rid, const string& br)
-  {
-    ostringstream os;
-
-    os << "query {"                                              << '\n'
-       << "  node(id:" << gq_str (rid) << ") {"                  << '\n'
-       << "    ... on Repository {"                              << '\n'
-       << "      pullRequests (last:100"                         << '\n'
-       << "                    states:" << gq_enum ("OPEN")      << '\n'
-       << "                    baseRefName:" << gq_str (br)      << '\n'
-       << "                   ) {"                               << '\n'
-       << "        totalCount"                                   << '\n'
-       << "        edges { node { id number headRefOid } }"      << '\n'
-       << "      }"                                              << '\n'
-       << "    }"                                                << '\n'
-       << "  }"                                                  << '\n'
-       << "}"                                                    << '\n';
-
-    return os.str ();
-  }
-
-  optional<vector<gh_pull_request>>
-  gq_fetch_open_pull_requests (const basic_mark& error,
-                               const string& iat,
-                               const string& nid,
-                               const string& br)
-  {
-    string rq (
-      gq_serialize_request (gq_query_fetch_open_pull_requests (nid, br)));
-
-    try
-    {
-      // Response parser.
-      //
-      // Example response (only the part we need to parse here):
-      //
-      //   {
-      //     "node": {
-      //       "pullRequests": {
-      //         "totalCount": 2,
-      //         "edges": [
-      //           {
-      //             "node": {
-      //               "id": "PR_kwDOLc8CoM5vRS0y",
-      //               "number": 7,
-      //               "headRefOid": "cf72888be9484d6946a1340264e7abf18d31cc92"
-      //             }
-      //           },
-      //           {
-      //             "node": {
-      //               "id": "PR_kwDOLc8CoM5vRzHs",
-      //               "number": 8,
-      //               "headRefOid": "626d25b318aad27bc0005277afefe3e8d6b2d434"
-      //             }
-      //           }
-      //         ]
-      //       }
-      //     }
-      //   }
-      //
-      struct resp
-      {
-        bool found = false;
-
-        vector<gh_pull_request> pull_requests;
-
-        resp (json::parser& p)
-        {
-          using event = json::event;
-
-          auto parse_data = [this] (json::parser& p)
-          {
-            p.next_expect (event::begin_object);
-
-            if (p.next_expect_member_object_null ("node"))
-            {
-              found = true;
-
-              p.next_expect_member_object ("pullRequests");
-
-              uint16_t n (p.next_expect_member_number<uint16_t> ("totalCount"));
-
-              p.next_expect_member_array ("edges");
-              for (size_t i (0); i != n; ++i)
-              {
-                p.next_expect (event::begin_object); // edges[i]
-
-                p.next_expect_member_object ("node");
-                {
-                  gh_pull_request pr;
-                  pr.node_id = p.next_expect_member_string ("id");
-                  pr.number = p.next_expect_member_number<unsigned int> ("number");
-                  pr.head_sha = p.next_expect_member_string ("headRefOid");
-                  pull_requests.push_back (move (pr));
-                }
-                p.next_expect (event::end_object); // node
-
-                p.next_expect (event::end_object); // edges[i]
-              }
-              p.next_expect (event::end_array); // edges
-
-              p.next_expect (event::end_object); // pullRequests
-              p.next_expect (event::end_object); // node
-            }
-
-            p.next_expect (event::end_object);
-          };
-
-          gq_parse_response (p, move (parse_data));
-        }
-
-        resp () = default;
-      } rs;
-
-      uint16_t sc (github_post (rs,
-                                "graphql", // API Endpoint.
-                                strings {"Authorization: Bearer " + iat},
-                                move (rq)));
-
-      if (sc == 200)
-      {
-        if (!rs.found)
-        {
-          error << "repository '" << nid << "' not found";
-
-          return nullopt;
-        }
-
-        return rs.pull_requests;
-      }
-      else
-        error << "failed to fetch repository pull requests: "
-              << "error HTTP response status " << sc;
-    }
-    catch (const json::invalid_json_input& e)
-    {
-      // Note: e.name is the GitHub API endpoint.
-      //
-      error << "malformed JSON in response from " << e.name << ", line: "
-            << e.line << ", column: " << e.column << ", byte offset: "
-            << e.position << ", error: " << e;
-    }
-    catch (const invalid_argument& e)
-    {
-      error << "malformed header(s) in response: " << e;
-    }
-    catch (const system_error& e)
-    {
-      error << "unable to fetch repository pull requests (errno=" << e.code ()
-            << "): " << e.what ();
-    }
-    catch (const runtime_error& e) // From response type's parsing constructor.
-    {
-      // GitHub response contained error(s) (could be ours or theirs at this
-      // point).
-      //
-      error << "unable to fetch repository pull requests: " << e;
-    }
-
-    return nullopt;
-  }
-
-
   // GraphQL serialization functions.
   //
   // The GraphQL spec:
diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx
index ad9797a..72283ee 100644
--- a/mod/mod-ci-github-gq.hxx
+++ b/mod/mod-ci-github-gq.hxx
@@ -83,10 +83,15 @@ namespace brep
                        build_state,
                        optional<gq_built_result> = nullopt);
 
-  // Fetch a pull request's mergeability from GitHub. Return absent value if
-  // the merge commit is still being generated. Return empty string if the
-  // pull request is not auto-mergeable. Otherwise return the test merge
-  // commit id.
+  // Fetch pre-check information for a pull request from GitHub. This
+  // information is used to decide whether or not to CI the PR and is
+  // comprised of the PR's head commit SHA, whether its head branch is behind
+  // its base branch, and its test merge commit SHA.
+  //
+  // Return absent value if the merge commit is still being generated (which
+  // means PR head branch behindness is not yet known either). See the
+  // gq_pr_pre_check struct's member comments for non-absent return value
+  // semantics.
   //
   // Issue diagnostics and return absent if the request failed (which means it
   // will be treated by the caller as still being generated).
@@ -95,22 +100,27 @@ namespace brep
   // merge commit. (For details see
   // https://docs.github.com/rest/guides/getting-started-with-the-git-database-api#checking-mergeability-of-pull-requests.)
   //
-  optional<string>
-  gq_pull_request_mergeable (const basic_mark& error,
-                             const string& installation_access_token,
-                             const string& node_id);
+  struct gq_pr_pre_check_info
+  {
+    // The PR head commit id.
+    //
+    string head_sha;
 
-  // Fetch the last 100 open pull requests with the specified base branch from
-  // the repository with the specified node ID.
-  //
-  // Issue diagnostics and return nullopt if the repository was not found or
-  // an error occurred.
-  //
-  optional<vector<gh_pull_request>>
-  gq_fetch_open_pull_requests (const basic_mark& error,
-                               const string& installation_access_token,
-                               const string& repository_node_id,
-                               const string& base_branch);
+    // True if the PR's head branch is behind its base branch.
+    //
+    bool behind;
+
+    // The commit id of the test merge commit. Absent if behind or the PR is
+    // not auto-mergeable.
+    //
+    optional<string> merge_commit_sha;
+  };
+
+  optional<gq_pr_pre_check_info>
+  gq_fetch_pull_request_pre_check_info (
+    const basic_mark& error,
+    const string& installation_access_token,
+    const string& node_id);
 }
 
 #endif // MOD_MOD_CI_GITHUB_GQ_HXX
diff --git a/mod/mod-ci-github-service-data.cxx b/mod/mod-ci-github-service-data.cxx
index f93bc05..2a538a5 100644
--- a/mod/mod-ci-github-service-data.cxx
+++ b/mod/mod-ci-github-service-data.cxx
@@ -26,6 +26,22 @@ namespace brep
                               to_string (version));
     }
 
+    {
+      string v (p.next_expect_member_string ("kind"));
+
+      if      (v == "local")  kind = local;
+      else if (v == "remote") kind = remote;
+      else
+      {
+        throw json::invalid_json_input (
+          p.input_name, p.line (), p.column (), p.position (),
+          "invalid service data kind: '" + v + '\'');
+      }
+    }
+
+    pre_check = p.next_expect_member_boolean<bool> ("pre_check");
+    re_request = p.next_expect_member_boolean<bool> ("re_request");
+
     warning_success = p.next_expect_member_boolean<bool> ("warning_success");
 
     // Installation access token.
@@ -37,21 +53,17 @@ namespace brep
         p.next_expect_member_number<uint64_t> ("installation_id");
 
     repository_node_id = p.next_expect_member_string ("repository_node_id");
+    repository_clone_url = p.next_expect_member_string ("repository_clone_url");
 
     {
-      string* s (p.next_expect_member_string_null ("repository_clone_url"));
+      string* s (p.next_expect_member_string_null ("pr_node_id"));
       if (s != nullptr)
-        repository_clone_url = *s;
+        pr_node_id = *s;
     }
 
     pr_number = p.next_expect_member_number_null<uint32_t> ("pr_number");
 
-    {
-      string* s (p.next_expect_member_string_null ("merge_node_id"));
-      if (s != nullptr)
-        merge_node_id = *s;
-    }
-
+    check_sha = p.next_expect_member_string ("check_sha");
     report_sha = p.next_expect_member_string ("report_sha");
 
     p.next_expect_member_array ("check_runs");
@@ -102,6 +114,7 @@ namespace brep
                 timestamp iat_ea,
                 uint64_t iid,
                 string rid,
+                string rcu,
                 kind_type k,
                 bool rr,
                 bool pc,
@@ -112,6 +125,7 @@ namespace brep
         installation_access (move (iat_tok), iat_ea),
         installation_id (iid),
         repository_node_id (move (rid)),
+        repository_clone_url (move (rcu)),
         check_sha (move (cs)),
         report_sha (move (rs))
   {
@@ -125,12 +139,13 @@ namespace brep
                 timestamp iat_ea,
                 uint64_t iid,
                 string rid,
+                string rcu,
                 kind_type k,
                 bool rr,
                 bool pc,
                 string cs,
                 string rs,
-                string rcu,
+                string pid,
                 uint32_t prn)
       : kind (k), pre_check (pc), re_request (rr),
         warning_success (ws),
@@ -138,6 +153,7 @@ namespace brep
         installation_id (iid),
         repository_node_id (move (rid)),
         repository_clone_url (move (rcu)),
+        pr_node_id (move (pid)),
         pr_number (prn),
         check_sha (move (cs)),
         report_sha (move (rs))
@@ -154,6 +170,16 @@ namespace brep
 
     s.member ("version", 1);
 
+    s.member_name ("kind");
+    switch (kind)
+    {
+    case local:  s.value ("local"); break;
+    case remote: s.value ("remote"); break;
+    }
+
+    s.member ("pre_check", pre_check);
+    s.member ("re_request", re_request);
+
     s.member ("warning_success", warning_success);
 
     // Installation access token.
@@ -165,10 +191,11 @@ namespace brep
 
     s.member ("installation_id", installation_id);
     s.member ("repository_node_id", repository_node_id);
+    s.member ("repository_clone_url", repository_clone_url);
 
-    s.member_name ("repository_clone_url");
-    if (repository_clone_url)
-      s.value (*repository_clone_url);
+    s.member_name ("pr_node_id");
+    if (pr_node_id)
+      s.value (*pr_node_id);
     else
       s.value (nullptr);
 
@@ -178,12 +205,7 @@ namespace brep
     else
       s.value (nullptr);
 
-    s.member_name ("merge_node_id");
-    if (merge_node_id)
-      s.value (*merge_node_id);
-    else
-      s.value (nullptr);
-
+    s.member ("check_sha", check_sha);
     s.member ("report_sha", report_sha);
 
     s.member_begin_array ("check_runs");
diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx
index ae1506d..eb81ae4 100644
--- a/mod/mod-ci-github-service-data.hxx
+++ b/mod/mod-ci-github-service-data.hxx
@@ -74,8 +74,6 @@ namespace brep
 
     // Kind and phase.
     //
-    // @@ TODO Serialize these fields.
-    //
     enum kind_type {local, remote /*, queue */} kind;
     bool pre_check;
     bool re_request; // Re-requested (rebuild).
@@ -92,20 +90,15 @@ namespace brep
 
     string repository_node_id; // GitHub-internal opaque repository id.
 
+    string repository_clone_url;
+
     // The following two are only used for pull requests.
     //
     // @@ TODO/LATER: maybe put them in a struct?
     //
-    optional<string> repository_clone_url;
+    optional<string>   pr_node_id;
     optional<uint32_t> pr_number;
 
-    // The GitHub ID of the synthetic PR merge check run or absent if it
-    // hasn't been created yet.
-    //
-    // @@ TODO Remove once merge check run code has been removed.
-    //
-    optional<string> merge_node_id;
-
     // The commit ID the branch push or pull request (and its check runs) are
     // building. This will be the head commit for the branch push as well as
     // local pull requests and the test merge commit for remote pull requests.
@@ -122,7 +115,7 @@ namespace brep
     vector<check_run> check_runs;
 
     // The GitHub ID of the synthetic conclusion check run or absent if it
-    // hasn't been created yet. See also merge_node_id above.
+    // hasn't been created yet.
     //
     optional<string> conclusion_node_id;
 
@@ -149,6 +142,7 @@ namespace brep
                   timestamp iat_expires_at,
                   uint64_t installation_id,
                   string repository_node_id,
+                  string repository_clone_url,
                   kind_type kind,
                   bool pre_check,
                   bool re_request,
@@ -162,12 +156,13 @@ namespace brep
                   timestamp iat_expires_at,
                   uint64_t installation_id,
                   string repository_node_id,
+                  string repository_clone_url,
                   kind_type kind,
                   bool pre_check,
                   bool re_request,
                   string check_sha,
                   string report_sha,
-                  string repository_clone_url,
+                  string pr_node_id,
                   uint32_t pr_number);
 
     service_data () = default;
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
index f7480b6..60a6bae 100644
--- a/mod/mod-ci-github.cxx
+++ b/mod/mod-ci-github.cxx
@@ -389,10 +389,9 @@ namespace brep
     }
   }
 
-  // Let's capitalize the synthetic check run names to make them easier to
-  // distinguish from the regular ones.
+  // Let's capitalize the synthetic conclusion check run name to make it
+  // easier to distinguish from the regular ones.
   //
-  static string merge_check_run_name ("MERGE-COMMIT");
   static string conclusion_check_run_name ("CONCLUSION");
 
   // Return the colored circle corresponding to a result_status.
@@ -469,7 +468,7 @@ namespace brep
     // protection rule for head behind is not enabled. In this case, it would
     // be good to complete the CI. So maybe/later.
 
-    // Service id that uniquely identifies the CI request.
+    // Service id that uniquely identifies the CI tenant.
     //
     string sid (cs.repository.node_id + ":" + cs.check_suite.head_sha);
 
@@ -483,6 +482,12 @@ namespace brep
     //   In this case we need to load the existing service data for this head
     //   commit, extract the test merge commit, and restart the CI for that.
     //
+    //   Note that it's possible the base branch has moved in the meantime and
+    //   ideally we would want to re-request the test merge commit, etc.
+    //   However, this will only be necessary if the user does not follow our
+    //   recommendation of enabling the head-behind-base protection. And it
+    //   seems all this extra complexity would not be warranted.
+    //
     string check_sha;
     service_data::kind_type kind;
 
@@ -505,9 +510,9 @@ namespace brep
       }
       else
       {
-        error << "check suite for remote pull request "
-              << cs.check_suite.node_id
-              << ": re-requested but tenant_service with id " << sid
+        error << "check suite " << cs.check_suite.node_id
+              << " for remote pull request:"
+              << " re-requested but tenant_service with id " << sid
               << " did not exist";
         return true;
       }
@@ -525,12 +530,13 @@ namespace brep
                      iat->expires_at,
                      cs.installation.id,
                      move (cs.repository.node_id),
+                     move (cs.repository.clone_url),
                      kind, false /* pre_check */, re_requested,
                      move (check_sha),
                      move (cs.check_suite.head_sha) /* report_sha */);
 
-    // If this check suite is being re-run, replace the existing CI request if
-    // it exists; otherwise create a new one, doing nothing if a request
+    // If this check suite is being re-run, replace the existing CI tenant if
+    // it exists; otherwise create a new one, doing nothing if a tenant
     // already exists (which could've been created by handle_pull_request()).
     //
     // Note that GitHub UI does not allow re-running the entire check suite
@@ -540,7 +546,7 @@ namespace brep
                                ? duplicate_tenant_mode::replace
                                : duplicate_tenant_mode::ignore);
 
-    // Create an unloaded CI request.
+    // Create an unloaded CI tenant.
     //
     // Note: use no delay since we need to (re)create the synthetic conclusion
     // check run as soon as possible.
@@ -549,7 +555,7 @@ namespace brep
     // management is not available in start().
     //
     // After this call we will start getting the build_unloaded()
-    // notifications until (1) we load the request, (2) we cancel it, or (3)
+    // notifications until (1) we load the tenant, (2) we cancel it, or (3)
     // it gets archived after some timeout.
     //
     auto pr (create (error,
@@ -746,9 +752,6 @@ namespace brep
     // job might actually still be relevant (in both local and remote PR
     // cases).
 
-    // @@ TODO Serialize the new service_data fields!
-    //
-
     // Distinguish between local and remote PRs by comparing the head and base
     // repositories' paths.
     //
@@ -771,13 +774,14 @@ namespace brep
                      iat->expires_at,
                      pr.installation.id,
                      move (pr.repository.node_id),
+                     move (pr.repository.clone_url),
                      kind, true /* pre_check */, false /* re_request */,
                      move (check_sha),
                      move (pr.pull_request.head_sha) /* report_sha */,
-                     move (pr.repository.clone_url),
+                     pr.pull_request.node_id,
                      pr.pull_request.number);
 
-    // Create an unloaded CI request for the pre-check phase (during which we
+    // Create an unloaded CI tenant for the pre-check phase (during which we
     // wait for the PR's merge commit and behindness to become available).
     //
     // Create with an empty service id so that the generated tenant id is used
@@ -792,13 +796,13 @@ namespace brep
     //
     // After this call we will start getting the build_unloaded()
     // notifications -- which will be routed to build_unloaded_pre_check() --
-    // until we cancel the request or it gets archived after some
-    // timeout. (Note that we never actually load this request, we always
-    // cancel it; see build_unloaded_pre_check() for details.)
+    // until we cancel the tenant or it gets archived after some timeout.
+    // (Note that we never actually load this request, we always cancel it;
+    // see build_unloaded_pre_check() for details.)
     //
     if (!create (error,
                  warn,
-                 &trace,
+                 verb_ ? &trace : nullptr,
                  *build_db_,
                  move (ts),
                  chrono::seconds (30) /* interval */,
@@ -834,28 +838,140 @@ namespace brep
   }
 
   function<optional<string> (const tenant_service&)> ci_github::
-  build_unloaded_pre_check (tenant_service&&,
-                            service_data&&,
+  build_unloaded_pre_check (tenant_service&& ts,
+                            service_data&& sd,
                             const diag_epilogue& log_writer) const noexcept
   {
     NOTIFICATION_DIAG (log_writer);
 
-    // Note: PR only (but both local and remove).
+    // We get here for PRs only (but both local and remote). The overall
+    // plan is as follows:
+    //
+    // 1. Ask for the mergeability/behind status/test merge commit.
+    //
+    // 2. If not ready, get called again.
+    //
+    // 3. If not mergeable, behind, or different head (head changed while
+    //    waiting for merge commit and thus differs from what's in the
+    //    service_data), cancel the pre-check tenant and do nothing.
+    //
+    // 4. Otherwise, create an unloaded CI tenant and cancel ourselves. Note
+    //    that all re-requested cases are handled elsewhere.
     //
-    // - Ask for test merge commit.
-    // - If not ready, get called again.
-    // - If not mergeable, behind, or different head (head changed while
-    //   waiting for merge commit and thus differs from what's in the
-    //   service_data), cancel itself and ignore.
-    // - Otherwise, create unloaded CI tenant (with proper duplicate mode
-    //   based on re_request) and cancel itself.
-
     // Note that in case of a mixed local/remote case, whether we CI the head
     // commit or test merge commit will be racy and there is nothing we can do
     // about (the purely local case can get "upgraded" to mixed after we have
     // started the CI job).
     //
 
+    // Request PR pre-check info (triggering the generation of the test merge
+    // commit on the GitHub's side).
+    //
+    optional<gq_pr_pre_check_info> pc (
+      gq_fetch_pull_request_pre_check_info (error,
+                                            sd.installation_access.token,
+                                            *sd.pr_node_id));
+
+    if (!pc)
+    {
+      // Test merge commit not available yet: get called again to retry.
+      //
+      return nullptr;
+    }
+
+    // Create the CI tenant if nothing is wrong, otherwise issue diagnostics.
+    //
+    if (pc->behind)
+    {
+      l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+                    << ": head is behind base";});
+    }
+    else if (!pc->merge_commit_sha)
+    {
+      l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+                    << ": not auto-mergeable";});
+    }
+    else if (pc->head_sha != sd.report_sha)
+    {
+      l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+                    << ": head commit has changed";});
+    }
+    else
+    {
+      // Create the CI tenant.
+
+      // Update service data's check_sha if this is a remote PR.
+      //
+      if (sd.kind == service_data::remote)
+        sd.check_sha = *pc->merge_commit_sha;
+
+      // Service id that will uniquely identify the CI tenant.
+      //
+      string sid (sd.repository_node_id + ":" + sd.report_sha);
+
+      // Create an unloaded CI tenant, doing nothing if one already exists
+      // (which could've been created by a head branch push or another PR
+      // sharing the same head commit).
+      //
+      // Note: use no delay since we need to (re)create the synthetic
+      // conclusion check run as soon as possible.
+      //
+      // Note that we use the create() API instead of start() since duplicate
+      // management is not available in start().
+      //
+      // After this call we will start getting the build_unloaded()
+      // notifications until (1) we load the tenant, (2) we cancel it, or (3)
+      // it gets archived after some timeout.
+      //
+      if (auto pr = create (error, warn, verb_ ? &trace : nullptr,
+                            *build_db_,
+                            tenant_service (sid, "ci-github", sd.json ()),
+                            chrono::seconds (30) /* interval */,
+                            chrono::seconds (0) /* delay */,
+                            duplicate_tenant_mode::ignore))
+      {
+        if (pr->second == duplicate_tenant_result::ignored)
+        {
+          // This PR is sharing a head commit with something else.
+          //
+          // If this is a local PR then it's probably the branch push, which
+          // is expected, so do nothing.
+          //
+          // If this is a remote PR then it could be anything (branch push,
+          // local PR, or another remote PR) which in turn means the CI result
+          // may end up being for head, not merge commit. There is nothing we
+          // can do about it on our side (the user can enable the head-behind-
+          // base protection on their side).
+          //
+          if (sd.kind == service_data::remote)
+          {
+            l3 ([&]{trace << "remote pull request " << *sd.pr_node_id
+                          << ": CI tenant already exists for " << sid;});
+          }
+        }
+      }
+      else
+      {
+        error << "pull request " << *sd.pr_node_id
+              << ": unable to create unloaded CI tenant "
+              << "with tenant_service id " << sid;
+
+        // Fall through to cancel.
+      }
+    }
+
+    // Cancel the pre-check tenant.
+    //
+    if (!cancel (error, warn, verb_ ? &trace : nullptr,
+                 *build_db_, ts.type, ts.id))
+    {
+      // Should never happen (no such tenant).
+      //
+      error << "pull request " << *sd.pr_node_id
+            << ": failed to cancel pre-check tenant with tenant_service id "
+            << ts.id;
+    }
+
     return nullptr;
   }
 
@@ -866,8 +982,15 @@ namespace brep
   {
     NOTIFICATION_DIAG (log_writer);
 
-    // @@@ TODO: load the tenant: should be the same for both branch push and
-    // PR.
+    // Load the tenant, which is essentially the same for both branch push and
+    // PR. The overall plan is as follows:
+    //
+    // - Create synthetic conclusion check run with the in-progress state. If
+    //   unable to, get called again to re-try.
+    //
+    // - Load the tenant. If unable to, fail the conclusion check run.
+    //
+    // - Update service data.
     //
 
     // Get a new installation access token if the current one has expired.
@@ -892,40 +1015,6 @@ namespace brep
     if (iat == nullptr)
       return nullptr; // Try again on the next call.
 
-    auto make_iat_updater = [&new_iat, &error] ()
-    {
-      function<optional<string> (const tenant_service&)> r;
-
-      if (new_iat)
-      {
-        r = [&error,
-             iat = move (new_iat)] (const tenant_service& ts)
-          -> optional<string>
-        {
-          // NOTE: this lambda may be called repeatedly (e.g., due to
-          // transaction being aborted) and so should not move out of its
-          // captures.
-
-          service_data sd;
-          try
-          {
-            sd = service_data (*ts.data);
-          }
-          catch (const invalid_argument& e)
-          {
-            error << "failed to parse service data: " << e;
-            return nullopt;
-          }
-
-          sd.installation_access = *iat;
-
-          return sd.json ();
-        };
-      }
-
-      return r;
-    };
-
     // Create a synthetic check run with an in-progress state. Return the
     // check run on success or nullopt on failure.
     //
@@ -980,221 +1069,82 @@ namespace brep
                                move (br)))
       {
         assert (cr.state == build_state::built);
-
         return cr;
       }
       else
         return nullopt;
     };
 
-    // Synthetic merge check run node ID. Empty until created on the first
-    // call or retrieved from service data on subsequent calls.
-    //
-    string merge_node_id;
+    // Note that there is a window between receipt of a check_suite or
+    // pull_request event and the first bot/worker asking for a task, which
+    // could be substantial. We could probably (also) try to (re)create the
+    // conclusion checkrun in the webhook handler. @@ Maybe/later.
 
-    // True if this is the first call (or the merge commit couldn't be created
-    // on the first call, in which case we just re-try by treating it as a
-    // first call).
+    // (Re)create the synthetic conclusion check run first in order to convert
+    // a potentially completed check suite to building as early as possible.
     //
-    bool first (!sd.merge_node_id);
+    string conclusion_node_id; // Conclusion check run node ID.
 
-    // If this is the first call, (re)create the synthetic merge check run as
-    // soon as possible to make sure the previous check suite, if any, is no
-    // longer completed.
-    //
-    // Note that there is still a window between receipt of the pull_request
-    // event and the first bot/worker asking for a task, which could be
-    // substantial. We could probably (also) try to (re)create the merge
-    // checkrun in the webhook. @@ Maybe/later.
-    //
-    if (first)
+    if (auto cr = create_synthetic_cr (conclusion_check_run_name))
     {
-      if (auto cr = create_synthetic_cr (merge_check_run_name))
-      {
-        l3 ([&]{trace << "created check_run { " << *cr << " }";});
+      l3 ([&]{trace << "created check_run { " << *cr << " }";});
 
-        merge_node_id = move (*cr->node_id);
-      }
-      else
-        return make_iat_updater (); // Try again on the next call.
+      conclusion_node_id = move (*cr->node_id);
     }
-    else
-      merge_node_id = *sd.merge_node_id;
 
-    // Start/check PR mergeability.
+    // Load the CI tenant if the conclusion check run was created.
     //
-    optional<string> mc (
-      gq_pull_request_mergeable (error, iat->token, ts.id)); // Merge commit.
-
-    if (!mc || mc->empty ())
+    if (!conclusion_node_id.empty ())
     {
-      if (!mc) // No merge commit yet.
-      {
-        // If this is a subsequent notification and there is no merge commit,
-        // then there is nothing to do.
-        //
-        if (!first)
-          return make_iat_updater ();
+      string ru; // Repository URL.
 
-        // Fall through to update service data.
-      }
-      else // Not mergeable.
+      // CI the merge test commit for remote PRs and the head commit for
+      // everything else (branch push or local PRs).
+      //
+      if (sd.kind == service_data::remote)
       {
-        // If the commit is not mergeable, cancel the CI request and fail the
-        // merge check run.
+        // E.g. #pull/28/merge@1b6c9a361086ed93e6f1e67189e82d52de91c49b
         //
-        // Note that it feels like in this case we don't really need to create a
-        // failed synthetic conclusion check run since the PR cannot be merged
-        // anyway.
-
-        if (auto cr = update_synthetic_cr (
-              merge_node_id,
-              merge_check_run_name,
-              result_status::error,
-              "GitHub is unable to create test merge commit"))
-        {
-          l3 ([&]{trace << "updated check_run { " << *cr << " }";});
-
-          // Cancel the CI request.
-          //
-          // Ignore failure because this CI request may have been cancelled
-          // elsewhere due to an update to the PR base or head branches.
-          //
-          if (!cancel (error, warn, &trace, *build_db_, ts.type, ts.id))
-            l3 ([&]{trace << "CI for PR " << ts.id << " already cancelled";});
-
-          return nullptr; // No need to update service data in this case.
-        }
-        else
-        {
-          // Don't cancel the CI request if the merge check run update failed
-          // so that we can try again on the next call.
+        ru = sd.repository_clone_url + "#pull/" + to_string (*sd.pr_number) +
+             "/merge@" + sd.check_sha;
+      }
+      else
+        ru = sd.repository_clone_url + '#' + sd.check_sha;
 
-          if (!first)
-            return make_iat_updater ();
+      repository_location rl (move (ru), repository_type::git);
 
-          // Fall through to update service data.
-        }
-      }
+      optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr,
+                                      *build_db_,
+                                      move (ts),
+                                      move (rl)));
 
-      // This is a first notification, so record the merge check run in the
-      // service data.
-      //
-      return [&error,
-              iat = move (new_iat),
-              mni = move (merge_node_id)] (const tenant_service& ts)
-        -> optional<string>
+      if (!r || r->status != 200)
       {
-        // NOTE: this lambda may be called repeatedly (e.g., due to
-        // transaction being aborted) and so should not move out of its
-        // captures.
-
-        service_data sd;
-        try
-        {
-          sd = service_data (*ts.data);
-        }
-        catch (const invalid_argument& e)
+        if (auto cr = update_synthetic_cr (conclusion_node_id,
+                                           conclusion_check_run_name,
+                                           result_status::error,
+                                           to_check_run_summary (r)))
         {
-          error << "failed to parse service data: " << e;
-          return nullopt;
+          l3 ([&]{trace << "updated check_run { " << *cr << " }";});
         }
-
-        if (iat)
-          sd.installation_access = *iat;
-
-        sd.merge_node_id = mni;
-
-        return sd.json ();
-      };
-    }
-
-    // If we are here, then it means we have a merge commit that we can load.
-    //
-    // Note that this can still be the first call (first=true).
-    //
-
-    // As a first step, (re)create the synthetic conclusion check run and then
-    // change the merge check run state to success. Do it in this order so
-    // that the check suite does not become completed.
-
-    // Synthetic conclusion check run node ID. Empty until created on the
-    // "second" call or retrieved from service data on subsequent calls.
-    //
-    string conclusion_node_id;
-
-    // True if this is the first call after the merge commit became available,
-    // which we will call the "second" call (or we couldn't create the
-    // conclusion check run on the first such call, in which case we just
-    // re-try by treating it as a "second" call).
-    //
-    bool second (!sd.conclusion_node_id);
-
-    if (second)
-    {
-      if (auto cr = create_synthetic_cr (conclusion_check_run_name))
-      {
-        l3 ([&]{trace << "created check_run { " << *cr << " }";});
-
-        conclusion_node_id = move (*cr->node_id);
-      }
-    }
-    else
-      conclusion_node_id = *sd.conclusion_node_id;
-
-    if (!conclusion_node_id.empty ()) // Conclusion check run was created.
-    {
-      // Update merge check run to successful.
-      //
-      if (auto cr = update_synthetic_cr (merge_node_id,
-                                         merge_check_run_name,
-                                         result_status::success,
-                                         "GitHub created test merge commit"))
-      {
-        l3 ([&]{trace << "updated check_run { " << *cr << " }";});
-
-        // Load the CI request.
-        //
-        // Example repository URL fragment:
-        //
-        //   #pull/28/merge@1b6c9a361086ed93e6f1e67189e82d52de91c49b
-        //
-        repository_location rl (*sd.repository_clone_url + "#pull/" +
-                                to_string (*sd.pr_number) + "/merge@" + *mc,
-                                repository_type::git);
-
-        optional<start_result> r (
-          load (error, warn, &trace, *build_db_, move (ts), rl));
-
-        if (!r || r->status != 200)
+        else
         {
-          if (auto cr = update_synthetic_cr (conclusion_node_id,
-                                             conclusion_check_run_name,
-                                             result_status::error,
-                                             to_check_run_summary (r)))
-          {
-            l3 ([&]{trace << "updated check_run { " << *cr << " }";});
-          }
-          else
-          {
-            // Nothing really we can do in this case since we will not receive
-            // any further notifications.
-          }
+          // Nothing really we can do in this case since we will not receive
+          // any further notifications. Log the error as a last resort.
 
-          return nullptr; // No need to update service data in this case.
+          error << "failed to load CI tenant " << ts.id
+                << " and unable to update conclusion";
         }
-      }
-      else
-      {
-        // Don't load the CI request if the merge check run update failed so
-        // that we can try again on the next call.
+
+        return nullptr; // No need to update service data in this case.
       }
     }
+    else if (!new_iat)
+      return nullptr; // Nothing to save (but retry on next call).
 
     return [&error,
             iat = move (new_iat),
-            mni = (first ? move (merge_node_id) : string ()),
-            cni = (second ? move (conclusion_node_id) : string ())]
+            cni = move (conclusion_node_id)]
       (const tenant_service& ts) -> optional<string>
     {
       // NOTE: this lambda may be called repeatedly (e.g., due to
@@ -1215,9 +1165,6 @@ namespace brep
       if (iat)
         sd.installation_access = *iat;
 
-      if (!mni.empty ())
-        sd.merge_node_id = mni;
-
       if (!cni.empty ())
         sd.conclusion_node_id = cni;
 
@@ -1234,7 +1181,9 @@ namespace brep
   //    them when notifying GitHub. The first is not important (we expect the
   //    state to go back to building shortly). The second should normally not
   //    happen and would mean that a completed check suite may go back on its
-  //    conclusion (which would be pretty confusing for the user).
+  //    conclusion (which would be pretty confusing for the user). @@@ This
+  //    can/will happen on check run rebuild. Distinguish between internal
+  //    and external rebuilds?
   //
   //    So, for GitHub notifications, we only have the following linear
   //    transition sequence:
@@ -1340,7 +1289,7 @@ namespace brep
     //
     for (const build& b: builds)
     {
-      string bid (gh_check_run_name (b)); // Full build ID.
+      string bid (gh_check_run_name (b)); // Full build id.
 
       if (const check_run* scr = sd.find_check_run (bid))
       {
@@ -1362,6 +1311,8 @@ namespace brep
         else
         {
           // Ignore interrupted.
+          //
+          assert (*istate == build_state::building);
         }
       }
       else
@@ -1406,7 +1357,7 @@ namespace brep
     //
     if (iat != nullptr)
     {
-      // Create a check_run for each build.
+      // Create a check_run for each build as a single request.
       //
       if (gq_create_check_runs (error,
                                 crs,
@@ -1416,6 +1367,8 @@ namespace brep
       {
         for (const check_run& cr: crs)
         {
+          // We can only create a check run in the queued state.
+          //
           assert (cr.state == build_state::queued);
           l3 ([&]{trace << "created check_run { " << cr << " }";});
         }
@@ -1490,13 +1443,12 @@ namespace brep
     }
 
     optional<check_run> cr; // Updated check run.
-    string bid (gh_check_run_name (b)); // Full Build ID.
+    string bid (gh_check_run_name (b)); // Full build id.
 
     if (check_run* scr = sd.find_check_run (bid)) // Stored check run.
     {
       // Update the check run if it exists on GitHub and the queued
-      // notification succeeded and updated the service data, otherwise do
-      // nothing.
+      // notification updated the service data, otherwise do nothing.
       //
       if (scr->state == build_state::queued)
       {
@@ -1561,12 +1513,10 @@ namespace brep
         if (cr->state == build_state::built)
         {
           warn << "check run " << bid << ": already in built state on GitHub";
-
           return nullptr;
         }
 
         assert (cr->state == build_state::building);
-
         l3 ([&]{trace << "updated check_run { " << *cr << " }";});
       }
     }
@@ -1619,6 +1569,10 @@ namespace brep
                const build& b,
                const diag_epilogue& log_writer) const noexcept
   {
+    // @@ TODO Include service_data::event_node_id and perhaps ts.id in
+    //    diagnostics? E.g. when failing to update check runs we print the
+    //    build ID only.
+    //
     NOTIFICATION_DIAG (log_writer);
 
     service_data sd;
@@ -1632,13 +1586,16 @@ namespace brep
       return nullptr;
     }
 
-    // Absent if have any unbuilt check runs.
+    // Here we need to update the state of this check run and, if there are no
+    // more unbuilt ones, update the synthetic conclusion check run.
+    //
+    // Absent means we still have unbuilt check runs.
     //
     optional<result_status> conclusion (*b.status);
 
     check_run cr; // Updated check run.
     {
-      string bid (gh_check_run_name (b)); // Full Build ID.
+      string bid (gh_check_run_name (b)); // Full build id.
 
       optional<check_run> scr;
       for (check_run& cr: sd.check_runs)
@@ -1652,11 +1609,10 @@ namespace brep
         {
           if (cr.state == build_state::built)
           {
+            assert (cr.status);
+
             if (conclusion)
-            {
-              assert (cr.status);
               *conclusion |= *cr.status;
-            }
           }
           else
             conclusion = nullopt;
@@ -1686,6 +1642,9 @@ namespace brep
         warn << "check run " << bid << ": out of order built notification; "
              << "no check run state in service data";
 
+        // Note that we have no hints here and so have to use the full build
+        // id for name.
+        //
         cr.build_id = move (bid);
         cr.name = cr.build_id;
       }
@@ -1815,10 +1774,10 @@ namespace brep
         sm = os.str ();
       }
 
-      gq_built_result br (gh_to_conclusion (*b.status, sd.warning_success),
-                          circle (*b.status) + ' ' +
-                            ucase (to_string (*b.status)),
-                          move (sm));
+      gq_built_result br (
+        gh_to_conclusion (*b.status, sd.warning_success),
+        circle (*b.status) + ' ' + ucase (to_string (*b.status)),
+        move (sm));
 
       if (cr.node_id)
       {
@@ -1834,7 +1793,6 @@ namespace brep
                                  move (br)))
         {
           assert (cr.state == build_state::built);
-
           l3 ([&]{trace << "updated check_run { " << cr << " }";});
         }
       }
@@ -1843,7 +1801,7 @@ namespace brep
         // Create new check run.
         //
         // Note that we don't have build hints so will be creating this check
-        // run with the full build ID as name. In the unlikely event that an
+        // run with the full build id as name. In the unlikely event that an
         // out of order build_queued() were to run before we've saved this
         // check run to the service data it will create another check run with
         // the shortened name which will never get to the built state.
@@ -1858,7 +1816,6 @@ namespace brep
                                  move (br)))
         {
           assert (cr.state == build_state::built);
-
           l3 ([&]{trace << "created check_run { " << cr << " }";});
         }
       }
@@ -1902,8 +1859,6 @@ namespace brep
         {
           assert (sd.conclusion_node_id);
 
-          // Update the conclusion check run with success.
-          //
           result_status rs (*conclusion);
 
           optional<gq_built_result> br (
@@ -1928,14 +1883,13 @@ namespace brep
                                    move (br)))
           {
             assert (cr.state == build_state::built);
-
-            l3 ([&]{trace << "updated check_run { " << cr << " }";});
+            l3 ([&]{trace << "updated conclusion check_run { " << cr << " }";});
           }
           else
           {
             // Nothing we can do here except log the error.
             //
-            error << "check suite " << ts.id
+            error << "tenant_service id " << ts.id
                   << ": unable to update conclusion check run "
                   << *sd.conclusion_node_id;
           }
diff --git a/mod/tenant-service.hxx b/mod/tenant-service.hxx
index 6a982ea..8ba199a 100644
--- a/mod/tenant-service.hxx
+++ b/mod/tenant-service.hxx
@@ -50,7 +50,9 @@ namespace brep
   // sense for the implementation to protect against overwriting later states
   // with earlier. For example, if it's possible to place a condition on a
   // notification, it makes sense to only set the state to queued if none of
-  // the later states (e.g., building) are already in effect.
+  // the later states (e.g., building) are already in effect. See also
+  // ci_start::rebuild() for additional details on the build->queued
+  // transition.
   //
   // Note also that it's possible for the build to get deleted at any stage
   // without any further notifications. This can happen, for example, due to
@@ -131,8 +133,9 @@ namespace brep
   // returned callback should be NULL).
   //
   // Note: make sure the implementation of this notification does not take
-  // too long (currently 40 seconds) to avoid nested notifications. Note
-  // also that the first notification is delayed (currently 10 seconds).
+  // longer than the notification_interval argument of ci_start::create() to
+  // avoid nested notifications. The first notification can be delayed with
+  // the notify_delay argument.
   //
   class tenant_service_build_unloaded: public virtual tenant_service_base
   {