diff options
Diffstat (limited to 'mod')
-rw-r--r-- | mod/ci-common.cxx | 64 | ||||
-rw-r--r-- | mod/ci-common.hxx | 12 | ||||
-rw-r--r-- | mod/mod-ci-github-gh.cxx | 4 | ||||
-rw-r--r-- | mod/mod-ci-github-gh.hxx | 6 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 6 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.hxx | 11 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.hxx | 4 | ||||
-rw-r--r-- | mod/mod-ci-github.cxx | 912 |
8 files changed, 439 insertions, 580 deletions
diff --git a/mod/ci-common.cxx b/mod/ci-common.cxx index 4b9f9f9..cba421b 100644 --- a/mod/ci-common.cxx +++ b/mod/ci-common.cxx @@ -553,7 +553,11 @@ namespace brep assert (!transaction::has_current ()); build_tenant t; + + // Set the reference count to 1 for the `created` result. + // duplicate_tenant_result r (duplicate_tenant_result::created); + service.ref_count = 1; for (string request_id;;) { @@ -584,14 +588,31 @@ namespace brep : duplicate_tenant_mode::ignore); } + // Shouldn't be here otherwise. + // + assert (t->service); + // Bail out in the ignore mode and cancel the tenant in the // replace mode. // if (mode == duplicate_tenant_mode::ignore) + { + // Increment the reference count for the `ignored` result. + // + ++(t->service->ref_count); + + db.update (t); + tr.commit (); + return make_pair (move (t->id), duplicate_tenant_result::ignored); + } assert (mode == duplicate_tenant_mode::replace); + // Preserve the current reference count for the `replaced` result. + // + service.ref_count = t->service->ref_count; + if (t->unloaded_timestamp) { db.erase (t); @@ -678,6 +699,7 @@ namespace brep // request_id = move (t.id); service = move (*t.service); + service.ref_count = 1; r = duplicate_tenant_result::created; } } @@ -788,7 +810,8 @@ namespace brep odb::core::database& db, size_t retry, const string& type, - const string& id) const + const string& id, + bool ref_count) const { using namespace odb::core; @@ -810,25 +833,44 @@ namespace brep if (t == nullptr) return nullopt; - r = move (t->service); + // Shouldn't be here otherwise. + // + assert (t->service && t->service->ref_count != 0); - if (t->unloaded_timestamp) + bool cancel (!ref_count || --(t->service->ref_count) == 0); + + if (cancel) { - db.erase (t); + // Move out the service state before it is dropped from the tenant. + // + r = move (t->service); + + if (t->unloaded_timestamp) + { + db.erase (t); + } + else + { + t->service = nullopt; + t->archived = true; + db.update (t); + } + + if (trace != nullptr) + *trace << "CI request " << t->id << " for service " << id << ' ' + << type << " is canceled"; } else { - t->service = nullopt; - t->archived = true; - db.update (t); + db.update (t); // Update the service reference count. + + // Move out the service state after the tenant is updated. + // + r = move (t->service); } tr.commit (); - if (trace != nullptr) - *trace << "CI request " << t->id << " for service " << id << ' ' - << type << " is canceled"; - // Bail out if we have successfully updated or erased the tenant // object. // diff --git a/mod/ci-common.hxx b/mod/ci-common.hxx index 36d5f0e..b32d397 100644 --- a/mod/ci-common.hxx +++ b/mod/ci-common.hxx @@ -103,6 +103,10 @@ namespace brep // Finally note that only duplicate_tenant_mode::fail can be used if the // service id is empty. // + // The tenant reference count is set to 1 if the result is `created`, + // incremented if the result is `ignored`, and preserved if the result is + // `replaced`. + // // Repeat the attempts on the recoverable database failures (deadlocks, // etc) and throw runtime_error if no more retries left. // @@ -150,6 +154,11 @@ namespace brep // dropped. Note that the latter allow using unloaded tenants as a // relatively cheap asynchronous execution mechanism. // + // If ref_count is true, then decrement the tenant reference count and + // only cancel the CI request if it becomes 0. In this mode the caller can + // determine if the request was actually canceled by checking if the + // reference count in the returned service state is 0. + // // Repeat the attempts on the recoverable database failures (deadlocks, // etc) and throw runtime_error if no more retries left. // @@ -162,7 +171,8 @@ namespace brep odb::core::database&, size_t retry, const string& type, - const string& id) const; + const string& id, + bool ref_count = false) const; // Cancel previously created or started CI request. Return false if there // is no tenant for the specified tenant id. Note that the reason argument diff --git a/mod/mod-ci-github-gh.cxx b/mod/mod-ci-github-gh.cxx index 6372ef0..6ab93d7 100644 --- a/mod/mod-ci-github-gh.cxx +++ b/mod/mod-ci-github-gh.cxx @@ -513,7 +513,7 @@ namespace brep { p.next_expect (event::begin_object); - bool ac (false), pr (false), rp (false), in (false); + bool ac (false), pr (false), bf (false), rp (false), in (false); // Skip unknown/uninteresting members. // @@ -526,6 +526,7 @@ namespace brep if (c (ac, "action")) action = p.next_expect_string (); else if (c (pr, "pull_request")) pull_request = gh_pull_request (p); + else if (c (bf, "before")) before = p.next_expect_string (); else if (c (rp, "repository")) repository = gh_repository (p); else if (c (in, "installation")) installation = gh_installation (p); else p.next_expect_value_skip (); @@ -542,6 +543,7 @@ namespace brep { os << "action: " << pr.action; os << ", pull_request { " << pr.pull_request << " }"; + os << ", before: " << (pr.before ? *pr.before : "null"); os << ", repository { " << pr.repository << " }"; os << ", installation { " << pr.installation << " }"; diff --git a/mod/mod-ci-github-gh.hxx b/mod/mod-ci-github-gh.hxx index b29904b..6ede697 100644 --- a/mod/mod-ci-github-gh.hxx +++ b/mod/mod-ci-github-gh.hxx @@ -183,6 +183,12 @@ namespace brep string action; gh_pull_request pull_request; + + // The SHA of the previous commit on the head branch before the current + // one. Only present if action is "synchronize". + // + optional<string> before; + gh_repository repository; gh_installation installation; diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx index 3805445..fa701a8 100644 --- a/mod/mod-ci-github-gq.cxx +++ b/mod/mod-ci-github-gq.cxx @@ -436,7 +436,7 @@ namespace brep ostringstream os; - os << "mutation {" << '\n'; + os << "mutation {" << '\n'; // Serialize a `createCheckRun` for the build. // @@ -472,7 +472,7 @@ namespace brep << " }" << '\n' << "}" << '\n'; - os << "}" << '\n'; + os << "}" << '\n'; return os.str (); } @@ -553,8 +553,6 @@ namespace brep assert (cr.state != build_state::built); #endif - // Empty details URL because it's not available until building. - // string rq ( gq_serialize_request (gq_mutation_create_check_runs (rid, hs, crs))); diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx index 9c23cd4..7c564d7 100644 --- a/mod/mod-ci-github-gq.hxx +++ b/mod/mod-ci-github-gq.hxx @@ -19,13 +19,10 @@ namespace brep // GraphQL functions (all start with gq_). // - // Create a new check run on GitHub for each build with the build state - // taken from each check_run object. Update `check_runs` with the new data - // (node id and state_synced). Return false and issue diagnostics if the - // request failed. - // - // Note: no details_url yet since there will be no entry in the build result - // search page until the task starts building. + // Create a new check run on GitHub for each build with the build state, + // name, and details_url taken from each check_run object. Update + // `check_runs` with the new data (node id and state_synced). Return false + // and issue diagnostics if the request failed. // // Note that creating a check_run named `foo` will effectively replace any // existing check_runs with that name. They will still exist on the GitHub diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx index 93a8895..cabd19a 100644 --- a/mod/mod-ci-github-service-data.hxx +++ b/mod/mod-ci-github-service-data.hxx @@ -35,8 +35,8 @@ namespace brep optional<result_status> status; // Only if state is built & synced. - // Never serialized. Only used by some of the GraphQL functions in - // mod-ci-github-gq.hxx. + // Note: never serialized (only used to pass information to the GraphQL + // functions). // optional<string> details_url; diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx index 022abac..3bfee41 100644 --- a/mod/mod-ci-github.cxx +++ b/mod/mod-ci-github.cxx @@ -398,7 +398,8 @@ namespace brep throw invalid_request (400, move (m)); } - if (pr.action == "opened" || pr.action == "synchronize") + if (pr.action == "opened" || + pr.action == "synchronize") { // opened // A pull request was opened. @@ -406,23 +407,78 @@ namespace brep // synchronize // A pull request's head branch was updated from the base branch or // new commits were pushed to the head branch. (Note that there is - // no equivalent event for the base branch. That case gets handled - // in handle_check_suite_request() instead. @@ Not anymore.) + // no equivalent event for the base branch.) // - // Note that both cases are handled the same: we start a new CI + // Note that both cases are handled similarly: we start a new CI // request which will be reported on the new commit id. // return handle_pull_request (move (pr), warning_success); } - else + else if (pr.action == "edited") { - // Ignore the remaining actions by sending a 200 response with empty - // body. + // PR base branch changed (to a different branch) besides other + // irrelevant changes (title, body, etc). // - // @@ Ignore known but log unknown, as in check_suite above? + // This is in a sense a special case of the base branch moving. In + // that case we don't do anything (due to the head sharing problem) + // relying instead on the branch protection rule. So it makes sense + // to do the same here. // return true; } + else if (pr.action == "closed") + { + // PR has been closed (as merged or not; see merged member). Also + // apparently received if base branch is deleted (and the same + // for head branch). See also the reopened event below. + // + // While it may seem natural to cancel the CI for the closed PR, it + // might actually be useful to have a completed CI record. GitHub + // doesn't prevent us from publishing CI results for the closed PR + // (even if both base and head branches were deleted). And if such a + // PR is reopened, the CI results remain. + // + return true; + } + else if (pr.action == "reopened") + { + // Previously closed PR has been reopened. + // + // Since we don't cancel the CI for a closed PR, there is nothing + // to do if it is reopened. + // + return true; + } + else if (pr.action == "assigned" || + pr.action == "auto_merge_disabled" || + pr.action == "auto_merge_enabled" || + pr.action == "converted_to_draft" || + pr.action == "demilestoned" || + pr.action == "dequeued" || + pr.action == "enqueued" || + pr.action == "labeled" || + pr.action == "locked" || + pr.action == "milestoned" || + pr.action == "ready_for_review" || + pr.action == "review_request_removed" || + pr.action == "review_requested" || + pr.action == "unassigned" || + pr.action == "unlabeled" || + pr.action == "unlocked") + { + // These have no relation to CI. + // + return true; + } + else + { + // Ignore unknown actions by sending a 200 response with empty body + // but also log as an error since we want to notice new actions. + // + error << "unknown action '" << pr.action << "' in pull_request event"; + + return true; + } } else { @@ -507,15 +563,17 @@ namespace brep // // While it would have been nice to cancel CIs of PRs with this branch as - // base not to waste resources, there are complications: Firsty, we can - // only do this for remote PRs (since local PRs may share the result with - // branch push). Secondly, we try to do our best even if the branch - // protection rule for head behind is not enabled. In this case, it would - // be good to complete the CI. So maybe/later. + // base not to waste resources, there are complications: Firstly, we can + // only do this for remote PRs (since local PRs will most likely share the + // result with branch push). Secondly, we try to do our best even if the + // branch protection rule for head behind is not enabled. In this case, it + // would be good to complete the CI. So maybe/later. See also the head + // case in handle_pull_request(), where we do cancel remote PRs that are + // not shared. // Service id that uniquely identifies the CI tenant. // - string sid (cs.repository.node_id + ":" + cs.check_suite.head_sha); + string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha); // If the user requests a rebuild of the (entire) PR, then this manifests // as the check_suite rather than pull_request event. Specifically: @@ -653,7 +711,8 @@ namespace brep // Note that GitHub always posts a message to their GUI saying "You have // successfully requested <check_run_name> be rerun", regardless of what // HTTP status code we respond with. However we do return error status codes - // when appropriate in case they start handling them someday. + // when there is no better option (like failing the conclusion) in case they + // start handling them someday. // bool ci_github:: handle_check_run_rerequest (const gh_check_run_event& cr, @@ -663,37 +722,12 @@ namespace brep l3 ([&]{trace << "check_run event { " << cr << " }";}); - // Get a new installation access token. - // - auto get_iat = [this, &trace, &error, &cr] () - -> optional<gh_installation_access_token> - { - optional<string> jwt (generate_jwt (trace, error)); - if (!jwt) - return nullopt; - - optional<gh_installation_access_token> iat ( - obtain_installation_access_token (cr.installation.id, - move (*jwt), - error)); - - if (iat) - l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); - - return iat; - }; - // The overall plan is as follows: // // 1. Load service data. // - // 2. If the tenant is archived, then fail (re-create) conclusion with - // appropriate diagnostics. - // - // @@ TMP A PR might get blocked indefinitely by this. If the user - // re-runs a single CR the "re-run all check runs" option disappears - // from the UI. So the single re-run will keep failing but there - // will be no way to start a new CI from scratch. + // 2. If the tenant is archived, then fail (re-create) both the check run + // and the conclusion with appropriate diagnostics. // // 3. If the check run is in the queued state, then do nothing. // @@ -711,12 +745,9 @@ namespace brep // // c. Clear the completed flag if true. // - // d. "Return" the service data to be used after the call. - // - // @@ TMP Not currently returning anything. - // // 6. If the result of rebuild() indicates the tenant is archived, then - // fail (update) the conclusion check run with appropriate diagnostics. + // fail (update) both the check run and conclusion with appropriate + // diagnostics. // // 7. If original state is queued (no rebuild was scheduled), then fail // (update) both the check run and the conclusion. @@ -725,13 +756,32 @@ namespace brep // practice we have to re-create as new check runs in order to replace the // existing ones because GitHub does not allow transitioning out of the // built state. + + // Get a new installation access token. // + auto get_iat = [this, &trace, &error, &cr] () + -> optional<gh_installation_access_token> + { + optional<string> jwt (generate_jwt (trace, error)); + if (!jwt) + return nullopt; + + optional<gh_installation_access_token> iat ( + obtain_installation_access_token (cr.installation.id, + move (*jwt), + error)); + + if (iat) + l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); + + return iat; + }; const string& repo_node_id (cr.repository.node_id); const string& head_sha (cr.check_run.check_suite.head_sha); - // The build and conclusion check runs. They are sent to GitHub in a - // single request (unless something goes wrong) so store them together + // Prepare the build and conclusion check runs. They are sent to GitHub in + // a single request (unless something goes wrong) so store them together // from the outset. // vector<check_run> check_runs (2); @@ -740,8 +790,8 @@ namespace brep ccr.name = conclusion_check_run_name; - // Load the service data, failing the conclusion check run if the tenant - // has been archived. + // Load the service data, failing the check runs if the tenant has been + // archived. // service_data sd; { @@ -754,15 +804,36 @@ namespace brep { if (p->second) // Tenant is archived { - // Fail (re-create) the conclusion check run. + // Fail (re-create) the check runs. // optional<gh_installation_access_token> iat (get_iat ()); if (!iat) throw server_error (); gq_built_result br ( - make_built_result (result_status::error, warning_success, - "Unable to rebuild: tenant has been archived")); + make_built_result ( + result_status::error, warning_success, + "Unable to rebuild individual configuration: build has " + "been archived")); + + // Try to update the conclusion check run even if the first update + // fails. + // + bool f (false); // Failed. + + if (gq_create_check_run (error, bcr, iat->token, + repo_node_id, head_sha, + cr.check_run.details_url, + build_state::built, br)) + { + l3 ([&]{trace << "created check_run { " << bcr << " }";}); + } + else + { + error << "check_run " << cr.check_run.node_id + << ": unable to re-create check run"; + f = true; + } if (gq_create_check_run (error, ccr, iat->token, repo_node_id, head_sha, @@ -773,10 +844,17 @@ namespace brep } else { - fail << "check_run " << cr.check_run.node_id - << ": unable to re-create conclusion check run"; + error << "check_run " << cr.check_run.node_id + << ": unable to re-create conclusion check run"; + f = true; } + // Fail the handler if either of the check runs could not be + // updated. + // + if (f) + throw server_error (); + return true; } @@ -795,10 +873,9 @@ namespace brep { // No such tenant. // - error << "check run " << cr.check_run.node_id - << " re-requested but tenant_service with id " << sid - << " does not exist"; - return true; + fail << "check run " << cr.check_run.node_id + << " re-requested but tenant_service with id " << sid + << " does not exist"; } } @@ -809,7 +886,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (new_iat = get_iat ()) + if ((new_iat = get_iat ())) iat = &*new_iat; else throw server_error (); @@ -821,11 +898,10 @@ namespace brep // if (cr.check_run.name == conclusion_check_run_name) { - // Must exist if it can be re-requested. - // - assert (sd.conclusion_node_id); + l3 ([&]{trace << "re-requested conclusion check_run";}); - l3 ([&]{trace << "ignoring re-requested conclusion check_run";}); + if (!sd.conclusion_node_id) + fail << "no conclusion node id for check run " << cr.check_run.node_id; gq_built_result br ( make_built_result (result_status::error, warning_success, @@ -864,44 +940,50 @@ namespace brep << ": failed to extract build id from details_url"; } - // Initialize `bcr` with the check run from the service data. + // Initialize the check run (`bcr`) with state from the service data. // { // Search for the check run in the service data. // - bool found (false); - - for (const check_run& scr: sd.check_runs) - { - if (scr.node_id && *scr.node_id == cr.check_run.node_id) - { - found = true; - - // Do nothing if the build is already queued. - // - if (scr.state == build_state::queued) - { - l3 ([&]{trace << "ignoring already-queued check run";}); - return true; - } + // Note that we look by name in case node id got replaced by a racing + // re-request (in which case we ignore this request). + // + auto i (find_if (sd.check_runs.begin (), sd.check_runs.end (), + [&cr] (const check_run& scr) + { + return scr.name == cr.check_run.name; + })); - bcr.name = scr.name; - bcr.build_id = scr.build_id; - bcr.state = scr.state; + if (i == sd.check_runs.end ()) + fail << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "re-requested but does not exist in service data"; - break; - } + // Do nothing if node ids don't match. + // + if (i->node_id && *i->node_id != cr.check_run.node_id) + { + l3 ([&]{trace << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "node id has changed in service data";}); + return true; } - if (!found) + // Do nothing if the build is already queued. + // + if (i->state == build_state::queued) { - fail << "check_run " << cr.check_run.node_id - << ": re-requested but does not exist in service data"; + l3 ([&]{trace << "ignoring already-queued check run";}); + return true; } + + bcr.name = i->name; + bcr.build_id = i->build_id; + bcr.state = i->state; } // Transition the build and conclusion check runs out of the built state - // by re-creating them. + // (or any other state) by re-creating them. // bcr.state = build_state::queued; bcr.state_synced = false; @@ -925,51 +1007,74 @@ namespace brep << ": unable to re-create build and conclusion check runs"; } - // Request the rebuild. + // Request the rebuild and update service data. + // + bool race (false); - // Function called by rebuild() to update the service data (but only if - // the build is actually restarted). + // Callback function called by rebuild() to update the service data (but + // only if the build is actually restarted). // - auto update_sd = [&error, &new_iat, + auto update_sd = [&error, &new_iat, &race, &cr, &bcr, &ccr] (const tenant_service& ts, build_state) -> optional<string> + { + // NOTE: this lambda may be called repeatedly (e.g., due to transaction + // being aborted) and so should not move out of its captures. + + race = false; // Reset. + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) { - // NOTE: this lambda may be called repeatedly (e.g., due to transaction - // being aborted) and so should not move out of its captures. + error << "failed to parse service data: " << e; + return nullopt; + } - service_data sd; - try - { - sd = service_data (*ts.data); - } - catch (const invalid_argument& e) - { - error << "failed to parse service data: " << e; - return nullopt; - } + // Note that we again look by name in case node id got replaced by a + // racing re-request. In this case, however, it's impossible to decide + // who won that race, so let's fail the check suite to be on the safe + // side (in a sense, similar to the rebuild() returning queued below). + // + auto i (find_if ( + sd.check_runs.begin (), sd.check_runs.end (), + [&cr] (const check_run& scr) + { + return scr.name == cr.check_run.name; + })); - for (check_run& scr: sd.check_runs) - { - if (scr.node_id && *scr.node_id == cr.check_run.node_id) - { - scr = bcr; // Update with new node_id, state, state_synced. - sd.conclusion_node_id = ccr.node_id; + if (i == sd.check_runs.end ()) + { + error << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "re-requested but does not exist in service data"; + return nullopt; + } - sd.completed = false; + if (i->node_id && *i->node_id != cr.check_run.node_id) + { + // Keep the old conclusion node id to make sure any further state + // transitions are ignored. A bit of a hack. + // + race = true; + return nullopt; + } - // Save the IAT if we created a new one. - // - if (new_iat) - sd.installation_access = *new_iat; + *i = bcr; // Update with new node_id, state, state_synced. - return sd.json (); - } - } + sd.conclusion_node_id = ccr.node_id; + sd.completed = false; - assert (false); // Check run must exist because we found it earlier. + // Save the IAT if we created a new one. + // + if (new_iat) + sd.installation_access = *new_iat; - return nullopt; - }; + return sd.json (); + }; optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd)); @@ -978,417 +1083,88 @@ namespace brep // conclusion check run. Otherwise the build has been successfully // re-enqueued so do nothing further. // - if (!bs || *bs == build_state::queued) - { - // @@ TMP Diverging from the plan here to fail both CRs in the - // already-archived case as well. Seems better to not leave the - // build CR in the queued state. - // - gq_built_result bbr; // Built result for the build check run. - gq_built_result cbr; // Built result for the conclusion check run. - - if (!bs) // Archived - { - // The build has expired since we loaded the service data. Most likely - // the tenant has been archived. - // - bbr = make_built_result ( - result_status::error, warning_success, - "Unable to rebuild: tenant has been archived or no such build"); - - cbr = bbr; - } - else // *bs == build_state::queued - { - // This build has been re-enqueued since we first loaded the service - // data. This could happen if the user clicked "re-run" multiple times - // and another handler won the rebuild() race. - // - // However the winner of the check runs race cannot be determined. - // - // Best case the other handler won the check runs race as well and - // thus everything will proceed normally. Our check runs will be - // invisible and disregarded. - // - // Worst case we won the check runs race and the other handler's check - // runs -- the ones that will be updated by the build_*() - // notifications -- are no longer visible, leaving things quite - // broken. - // - // Either way, we fail our check runs. In the best case scenario it - // will have no effect; in the worst case scenario it lets the user - // know something has gone wrong. - // - // @@ TMP In an attempt to recover, the user might end up clicking - // re-run a few times but nothing will change in the UI because the - // new check runs will only be created once the re-enqueued build - // has entered the building state. - // - bbr = make_built_result ( - result_status::error, warning_success, - "Unable to rebuild at this time, please try again in a few minutes"); + if (!race && bs && *bs != build_state::queued) + return true; - cbr = make_built_result (result_status::error, warning_success, - "Failed to rebuild check run(s)"); - } + gq_built_result br; // Built result for both check runs. - // True if we successfully updated both of the check runs. + if (race || bs) // Race or re-enqueued. + { + // The re-enqueued case: this build has been re-enqueued since we first + // loaded the service data. This could happen if the user clicked + // "re-run" multiple times and another handler won the rebuild() race. // - bool bu (true); // Both updated - - // Fail the build check run. + // However the winner of the check runs race cannot be determined. // - if (gq_update_check_run (error, bcr, iat->token, - repo_node_id, *bcr.node_id, - nullopt /* details_url */, - build_state::built, move (bbr))) - { - l3 ([&]{trace << "updated check_run { " << bcr << " }";}); - } - else - { - bu = false; - - error << "check run " << cr.check_run.node_id - << ": unable to update (replacement) check run " - << *bcr.node_id; - } - - // Fail the conclusion check run. + // Best case the other handler won the check runs race as well and + // thus everything will proceed normally. Our check runs will be + // invisible and disregarded. // - if (gq_update_check_run (error, ccr, iat->token, - repo_node_id, *ccr.node_id, - nullopt /* details_url */, - build_state::built, move (cbr))) - { - l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";}); - } - else - { - bu = false; - - error << "check run " << cr.check_run.node_id - << ": unable to update conclusion check run " << *ccr.node_id; - } - - // Fail the handler if either of the check runs could not be updated. + // Worst case we won the check runs race and the other handler's check + // runs -- the ones that will be updated by the build_*() notifications + // -- are no longer visible, leaving things quite broken. // - if (!bu) - throw server_error (); - - return true; + // Either way, we fail our check runs. In the best case scenario it + // will have no effect; in the worst case scenario it lets the user + // know something has gone wrong. + // + br = make_built_result (result_status::error, warning_success, + "Unable to rebuild, try again"); } - - // The build has successfully been re-enqueued. - - return true; - } - - // @@ TMP - // -#if 0 - bool ci_github:: - handle_check_run_rerequest (const gh_check_run_event& cr, - bool warning_success) - { - HANDLER_DIAG; - - l3 ([&]{trace << "check_run event { " << cr << " }";}); - - // Fail if this is the conclusion check run. - // - if (cr.check_run.name == conclusion_check_run_name) + else // Archived. { - // @@ Fail conclusion check run with appropriate message and reurn - // true. - - l3 ([&]{trace << "ignoring conclusion check_run";}); - - // 422 Unprocessable Content: The request was well-formed (i.e., - // syntactically correct) but could not be processed. + // The build has expired since we loaded the service data. Most likely + // the tenant has been archived. // - throw invalid_request (422, "Conclusion check run cannot be rebuilt"); + br = make_built_result ( + result_status::error, warning_success, + "Unable to rebuild individual configuration: build has been archived"); } - // Get a new installation access token. + // Try to update the conclusion check run even if the first update fails. // - auto get_iat = [this, &trace, &error, &cr] () - -> optional<gh_installation_access_token> - { - optional<string> jwt (generate_jwt (trace, error)); - if (!jwt) - return nullopt; - - optional<gh_installation_access_token> iat ( - obtain_installation_access_token (cr.installation.id, - move (*jwt), - error)); - - if (iat) - l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); + bool f (false); // Failed. - return iat; - }; - - // Create a new conclusion check run, replacing the existing one. + // Fail the build check run. // - // Return the check run on success or nullopt on failure. - // - auto create_conclusion_cr = - [&cr, &error, warning_success] (const gh_installation_access_token& iat, - build_state bs, - optional<result_status> rs = nullopt, - optional<string> msg = nullopt) - -> optional<check_run> + if (gq_update_check_run (error, bcr, iat->token, + repo_node_id, *bcr.node_id, + nullopt /* details_url */, + build_state::built, br)) { - optional<gq_built_result> br; - if (rs) - { - assert (msg); - - br = make_built_result (*rs, warning_success, move (*msg)); - } - - check_run r; - r.name = conclusion_check_run_name; - - if (gq_create_check_run (error, r, iat.token, - rni, hs, - nullopt /* details_url */, - bs, move (br))) - { - return r; - } - else - return nullopt; - }; - - // The overall plan is as follows: - // - // 1. Call the rebuild() function to attempt to schedule a rebuild. Pass - // the update function that does the following (if called): - // - // a. Update the check run being rebuilt (may also not exist). - // - // b. Clear the completed flag if true. - // - // c. "Return" the service data to be used after the call. - // - // 2. If the result of rebuild() indicates the tenant is archived, fail - // the conclusion check run with appropriate diagnostics. - // - // 3. If original state is queued, then no rebuild was scheduled and we do - // nothing. - // - // 4. Otherwise (the original state is building or built): - // - // a. Change the check run state to queued. - // - // b. Change the conclusion check run to building (do unconditionally - // to mitigate races). - // - // Note that while conceptually we are updating existing check runs, in - // practice we have to create new check runs to replace the existing ones - // because GitHub does not allow transitioning out of the built state. - // - // This results in a new node id for each check run but we can't save them - // to the service data after the rebuild() call. As a workaround, when - // updating the service data we 1) clear the re-requested check run's node - // id and set the state_synced flag to true to signal to build_building() - // and build_built() that it needs to create a new check run; and 2) clear - // the conclusion check run's node id to cause build_built() to create a - // new conclusion check run. And these two check runs' node ids will be - // saved to the service data. - - // Parse the check_run's details_url to extract build id. - // - // While this is a bit hackish, there doesn't seem to be a better way - // (like associating custom data with a check run). Note that the GitHub - // UI only allows rebuilding completed check runs, so the details URL - // should be there. - // - optional<build_id> bid (parse_details_url (cr.check_run.details_url)); - if (!bid) + l3 ([&]{trace << "updated check_run { " << bcr << " }";}); + } + else { - fail << "check run " << cr.check_run.node_id - << ": failed to extract build id from details_url"; + error << "check run " << cr.check_run.node_id + << ": unable to update (replacement) check run " + << *bcr.node_id; + f = true; } - // The IAT retrieved from the service data. - // - optional<gh_installation_access_token> iat; - - // True if the check run exists in the service data. + // Fail the conclusion check run. // - bool cr_found (false); - - // Update the state of the check run in the service data. Return (via - // captured references) the IAT and whether the check run was found. - // - // Called by rebuild(), but only if the build is actually restarted. - // - auto update_sd = [&iat, - &cr_found, - &error, - &cr] (const tenant_service& ts, build_state) - -> optional<string> - { - // NOTE: this lambda may be called repeatedly (e.g., due to transaction - // being aborted) and so should not move out of its captures. - - service_data sd; - try - { - sd = service_data (*ts.data); - } - catch (const invalid_argument& e) - { - error << "failed to parse service data: " << e; - return nullptr; - } - - if (!iat) - iat = sd.installation_access; - - // If the re-requested check run is found, update it in the service - // data. - // - const string& nid (cr.check_run.node_id); - - for (check_run& cr: sd.check_runs) - { - if (cr.node_id && *cr.node_id == nid) - { - cr_found = true; - cr.state = build_state::queued; - sd.completed = false; - - // Clear the check run node ids and set state_synced to true to - // cause build_building() and/or build_built() to create new check - // runs (see the plan above for details). - // - cr.node_id = nullopt; - cr.state_synced = true; - sd.conclusion_node_id = nullopt; - - return sd.json (); - } - } - - return nullopt; - }; - - optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd)); - - if (!bs) - { - // Build has expired (most probably the tenant has been archived). - // - // Update the conclusion check run to notify the user (but have to - // replace it with a new one because we don't know the existing one's - // node id). - // - optional<gh_installation_access_token> iat (get_iat ()); - if (!iat) - throw server_error (); - - if (optional<check_run> ccr = create_conclusion_cr ( - *iat, - build_state::built, - result_status::error, - "Unable to rebuild: tenant has been archived or no such build")) - { - l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";}); - } - else - { - // Log the error and return failure to GitHub which will presumably - // indicate this in its GUI. - // - fail << "check run " << cr.check_run.node_id - << ": unable to create conclusion check run"; - } - } - else if (*bs == build_state::queued) + if (gq_update_check_run (error, ccr, iat->token, + repo_node_id, *ccr.node_id, + nullopt /* details_url */, + build_state::built, move (br))) { - // The build was already queued so nothing to be done. This might happen - // if the user clicked "re-run" multiple times before we managed to - // update the check run. + l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";}); } else { - // The build has been requeued. - // - assert (*bs == build_state::building || *bs == build_state::built); - - if (!cr_found) - { - // Respond with an error otherwise GitHub will post a message in its - // GUI saying "you have successfully requested a rebuild of ..." - // - fail << "check_run " << cr.check_run.node_id - << ": build restarted but check run does not exist " - << "in service data"; - } - - // Get a new IAT if the one from the service data has expired. - // - assert (iat.has_value ()); - - if (system_clock::now () > iat->expires_at) - { - iat = get_iat (); - if (!iat) - throw server_error (); - } - - // Update (by replacing) the re-requested and conclusion check runs to - // queued and building, respectively. - // - // If either fails we can only log the error but build_building() and/or - // build_built() should correct the situation (see above for details). - // - - // Update re-requested check run. - // - { - check_run ncr; // New check run. - ncr.name = cr.check_run.name; - - if (gq_create_check_run (error, - ncr, - iat->token, - cr.repository.node_id, - cr.check_run.check_suite.head_sha, - cr.check_run.details_url, - build_state::queued)) - { - l3 ([&]{trace << "created check_run { " << ncr << " }";}); - } - else - { - error << "check_run " << cr.check_run.node_id - << ": unable to create (to update) check run in queued state"; - } - } - - // Update conclusion check run. - // - if (optional<check_run> ccr = - create_conclusion_cr (*iat, build_state::building)) - { - l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";}); - } - else - { - error << "check_run " << cr.check_run.node_id - << ": unable to create (to update) conclusion check run"; - } + error << "check run " << cr.check_run.node_id + << ": unable to update conclusion check run " << *ccr.node_id; + f = true; } + // Fail the handler if either of the check runs could not be updated. + // + if (f) + throw server_error (); + return true; } -#endif // Miscellaneous pull request facts // @@ -1408,30 +1184,6 @@ namespace brep // gets updated with the head commit's SHA and check_suite.pull_requests[] // will contain all PRs with this branch as head. // - // Remaining TODOs - // - // - @@ TODO? PR base branch changed (to a different branch) - // - // => pull_request(edited) - // - // - PR closed @@ TODO - // - // Also received if base branch is deleted. (And presumably same for head - // branch.) - // - // => pull_request(closed) - // - // Cancel CI? - // - // - PR merged @@ TODO - // - // => pull_request(merged) - // - // => check_suite(PR_base) - // - // Probably wouldn't want to CI the base again because the PR CI would've - // done the equivalent already. - // bool ci_github:: handle_pull_request (gh_pull_request_event pr, bool warning_success) { @@ -1456,12 +1208,6 @@ namespace brep l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); - // Note that similar to the branch push case above, while it would have - // been nice to cancel the previous CI job once the PR head moves (the - // "synchronize" event), due to the head sharing problem the previous CI - // job might actually still be relevant (in both local and remote PR - // cases). - // Distinguish between local and remote PRs by comparing the head and base // repositories' paths. // @@ -1470,6 +1216,48 @@ namespace brep ? service_data::local : service_data::remote); + // Note that similar to the branch push case above, while it would have + // been nice to cancel the previous CI job once the PR head moves (the + // "synchronize" event), due to the head sharing problem the previous CI + // job might actually still be relevant (in both local and remote PR + // cases). So we only do it for the remote PRs and only if the head is not + // shared (via tenant reference counting). + // + if (kind == service_data::remote && pr.action == "synchronize") + { + if (pr.before) + { + // Service id that will uniquely identify the CI tenant. + // + string sid (pr.repository.node_id + ':' + *pr.before); + + if (optional<tenant_service> ts = cancel (error, warn, + verb_ ? &trace : nullptr, + *build_db_, retry_, + "ci-github", sid, + true /* ref_count */)) + { + l3 ([&]{trace << "pull request " << pr.pull_request.node_id + << ": attempted to cancel CI of previous head commit" + << " (ref_count: " << ts->ref_count << ')';}); + } + else + { + // It's possible that there was no CI for the previous commit for + // various reasons (e.g., CI was not enabled). + // + l3 ([&]{trace << "pull request " << pr.pull_request.node_id + << ": failed to cancel CI of previous head commit " + << "with tenant_service id " << sid;}); + } + } + else + { + error << "pull request " << pr.pull_request.node_id + << ": before commit is missing in synchronize event"; + } + } + // Note: for remote PRs the check_sha will be set later, in // build_unloaded_pre_check(), to test merge commit id. // @@ -1535,6 +1323,8 @@ namespace brep build_unloaded (tenant_service&& ts, const diag_epilogue& log_writer) const noexcept { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -1558,6 +1348,8 @@ namespace brep service_data&& sd, const diag_epilogue& log_writer) const noexcept { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); // We get here for PRs only (but both local and remote). The overall @@ -1626,7 +1418,7 @@ namespace brep // Service id that will uniquely identify the CI tenant. // - string sid (sd.repository_node_id + ":" + sd.report_sha); + string sid (sd.repository_node_id + ':' + sd.report_sha); // Create an unloaded CI tenant, doing nothing if one already exists // (which could've been created by a head branch push or another PR @@ -1701,6 +1493,8 @@ namespace brep service_data&& sd, const diag_epilogue& log_writer) const noexcept { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); // Load the tenant, which is essentially the same for both branch push and @@ -1986,6 +1780,8 @@ namespace brep const build_queued_hints& hs, const diag_epilogue& log_writer) const noexcept { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -2154,6 +1950,8 @@ namespace brep const build& b, const diag_epilogue& log_writer) const noexcept { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -2300,11 +2098,13 @@ namespace brep const build& b, const diag_epilogue& log_writer) const noexcept { + // NOTE: this function is noexcept and should not throw. + + NOTIFICATION_DIAG (log_writer); + // @@ TODO Include service_data::event_node_id and perhaps ts.id in // diagnostics? E.g. when failing to update check runs we print the // build ID only. - // - NOTIFICATION_DIAG (log_writer); service_data sd; try @@ -2713,22 +2513,22 @@ namespace brep url u (details_url); - if (!u.query || !u.path || u.path->size () <= 1) - return nullopt; - build_id r; // Extract the tenant from the URL path. // // Example path: @d2586f57-21dc-40b7-beb2-6517ad7917dd // - r.package.tenant = u.path->substr (1); - - if (r.package.tenant.empty ()) + if (!u.path || u.path->size () != 37 || (*u.path)[0] != '@') return nullopt; + r.package.tenant = u.path->substr (1); + // Extract the rest of the build_id members from the URL query. // + if (!u.query) + return nullopt; + bool pn (false), pv (false), tg (false), tc (false), pc (false), th (false); @@ -2747,21 +2547,25 @@ namespace brep ++vp; // Skip '=' - const char* ve (ep ? ep : vp + strlen (vp)); // Value end pointer. + const char* ve (ep != nullptr ? ep : vp + strlen (vp)); // Value end. // Get the value as-is or URL-decode it. // - auto getval = [vp, ve] () { return string (vp, ve); }; + auto rawval = [vp, ve] () { return string (vp, ve); }; auto decval = [vp, ve] () { return mime_url_decode (vp, ve); }; auto make_version = [] (string&& v) - { return canonical_version (brep::version (move (v))); }; + { + return canonical_version (brep::version (move (v))); + }; auto c = [&n] (bool& b, const char* s) - { return n == s ? (b = true) : false; }; + { + return n == s ? (b = true) : false; + }; if (c (pn, "builds")) r.package.name = package_name (decval ()); - else if (c (pv, "pv")) r.package.version = make_version (getval ()); + else if (c (pv, "pv")) r.package.version = make_version (rawval ()); else if (c (tg, "tg")) r.target = target_triplet (decval ()); else if (c (tc, "tc")) r.target_config_name = decval (); else if (c (pc, "pc")) r.package_config_name = decval (); @@ -2769,7 +2573,7 @@ namespace brep { // Toolchain name and version. E.g. "public-0.17.0" - string v (getval ()); + string v (rawval ()); // Note: parsing code based on mod/mod-builds.cxx. // @@ -2781,7 +2585,7 @@ namespace brep r.toolchain_version = make_version (v.substr (p + 1)); } - qp = ep ? ep + 1 : nullptr; + qp = ep != nullptr ? ep + 1 : nullptr; } if (!pn || !pv || !tg || !tc || !pc || !th) @@ -2789,7 +2593,7 @@ namespace brep return r; } - catch (const invalid_argument&) + catch (const invalid_argument&) // Invalid url, brep::version, etc. { return nullopt; } |