aboutsummaryrefslogtreecommitdiff
path: root/mod/mod-ci-github.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'mod/mod-ci-github.cxx')
-rw-r--r--mod/mod-ci-github.cxx1319
1 files changed, 923 insertions, 396 deletions
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
index 6dfaa5f..ded15b6 100644
--- a/mod/mod-ci-github.cxx
+++ b/mod/mod-ci-github.cxx
@@ -19,26 +19,6 @@
#include <stdexcept>
-// @@ Remaining TODOs
-//
-// - Rerequested checks
-//
-// - check_suite (action: rerequested): received when user re-runs all
-// checks.
-//
-// - check_run (action: rerequested): received when user re-runs a
-// specific check or all failed checks.
-//
-// @@ TMP I have confirmed that the above is accurate.
-//
-// Will need to extract a few more fields from check_runs, but the layout
-// is very similar to that of check_suite.
-//
-// - Choose strong webhook secret (when deploying).
-//
-// - Check that delivery UUID has not been received before (replay attack).
-//
-
// Resources:
//
// Creating an App:
@@ -280,9 +260,6 @@ namespace brep
// is that we want be "notified" of new actions at which point we can
// decide whether to ignore them or to handle.
//
- // @@ There is also check_run even (re-requested by user, either
- // individual check run or all the failed check runs).
- //
if (event == "check_suite")
{
gh_check_suite_event cs;
@@ -316,13 +293,10 @@ namespace brep
else if (cs.action == "completed")
{
// GitHub thinks that "all the check runs in this check suite have
- // completed and a conclusion is available". Looks like this one we
- // ignore?
+ // completed and a conclusion is available". Check with our own
+ // bookkeeping and log an error if there is a mismatch.
//
- // What if our bookkeeping says otherwise? But then we can't even
- // access the service data easily here. @@ TODO: maybe/later.
- //
- return true;
+ return handle_check_suite_completed (move (cs), warning_success);
}
else
{
@@ -398,7 +372,8 @@ namespace brep
throw invalid_request (400, move (m));
}
- if (pr.action == "opened" || pr.action == "synchronize")
+ if (pr.action == "opened" ||
+ pr.action == "synchronize")
{
// opened
// A pull request was opened.
@@ -406,23 +381,78 @@ namespace brep
// synchronize
// A pull request's head branch was updated from the base branch or
// new commits were pushed to the head branch. (Note that there is
- // no equivalent event for the base branch. That case gets handled
- // in handle_check_suite_request() instead. @@ Not anymore.)
+ // no equivalent event for the base branch.)
//
- // Note that both cases are handled the same: we start a new CI
+ // Note that both cases are handled similarly: we start a new CI
// request which will be reported on the new commit id.
//
return handle_pull_request (move (pr), warning_success);
}
- else
+ else if (pr.action == "edited")
+ {
+ // PR base branch changed (to a different branch) besides other
+ // irrelevant changes (title, body, etc).
+ //
+ // This is in a sense a special case of the base branch moving. In
+ // that case we don't do anything (due to the head sharing problem)
+ // relying instead on the branch protection rule. So it makes sense
+ // to do the same here.
+ //
+ return true;
+ }
+ else if (pr.action == "closed")
+ {
+ // PR has been closed (as merged or not; see merged member). Also
+ // apparently received if base branch is deleted (and the same
+ // for head branch). See also the reopened event below.
+ //
+ // While it may seem natural to cancel the CI for the closed PR, it
+ // might actually be useful to have a completed CI record. GitHub
+ // doesn't prevent us from publishing CI results for the closed PR
+ // (even if both base and head branches were deleted). And if such a
+ // PR is reopened, the CI results remain.
+ //
+ return true;
+ }
+ else if (pr.action == "reopened")
{
- // Ignore the remaining actions by sending a 200 response with empty
- // body.
+ // Previously closed PR has been reopened.
+ //
+ // Since we don't cancel the CI for a closed PR, there is nothing
+ // to do if it is reopened.
//
- // @@ Ignore known but log unknown, as in check_suite above?
+ return true;
+ }
+ else if (pr.action == "assigned" ||
+ pr.action == "auto_merge_disabled" ||
+ pr.action == "auto_merge_enabled" ||
+ pr.action == "converted_to_draft" ||
+ pr.action == "demilestoned" ||
+ pr.action == "dequeued" ||
+ pr.action == "enqueued" ||
+ pr.action == "labeled" ||
+ pr.action == "locked" ||
+ pr.action == "milestoned" ||
+ pr.action == "ready_for_review" ||
+ pr.action == "review_request_removed" ||
+ pr.action == "review_requested" ||
+ pr.action == "unassigned" ||
+ pr.action == "unlabeled" ||
+ pr.action == "unlocked")
+ {
+ // These have no relation to CI.
//
return true;
}
+ else
+ {
+ // Ignore unknown actions by sending a 200 response with empty body
+ // but also log as an error since we want to notice new actions.
+ //
+ error << "unknown action '" << pr.action << "' in pull_request event";
+
+ return true;
+ }
}
else
{
@@ -502,20 +532,18 @@ namespace brep
l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
- // @@ What happens if we call this functions with an already existing
- // node_id (e.g., replay attack). See the UUID header above.
- //
-
// While it would have been nice to cancel CIs of PRs with this branch as
- // base not to waste resources, there are complications: Firsty, we can
- // only do this for remote PRs (since local PRs may share the result with
- // branch push). Secondly, we try to do our best even if the branch
- // protection rule for head behind is not enabled. In this case, it would
- // be good to complete the CI. So maybe/later.
+ // base not to waste resources, there are complications: Firstly, we can
+ // only do this for remote PRs (since local PRs will most likely share the
+ // result with branch push). Secondly, we try to do our best even if the
+ // branch protection rule for head behind is not enabled. In this case, it
+ // would be good to complete the CI. So maybe/later. See also the head
+ // case in handle_pull_request(), where we do cancel remote PRs that are
+ // not shared.
// Service id that uniquely identifies the CI tenant.
//
- string sid (cs.repository.node_id + ":" + cs.check_suite.head_sha);
+ string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha);
// If the user requests a rebuild of the (entire) PR, then this manifests
// as the check_suite rather than pull_request event. Specifically:
@@ -541,11 +569,13 @@ namespace brep
{
kind = service_data::remote;
- if (optional<tenant_service> ts = find (*build_db_, "ci-github", sid))
+ if (optional<tenant_data> d = find (*build_db_, "ci-github", sid))
{
+ tenant_service& ts (d->service);
+
try
{
- service_data sd (*ts->data);
+ service_data sd (*ts.data);
check_sha = move (sd.check_sha); // Test merge commit.
}
catch (const invalid_argument& e)
@@ -630,6 +660,132 @@ namespace brep
return true;
}
+ bool ci_github::
+ handle_check_suite_completed (gh_check_suite_event cs, bool warning_success)
+ {
+ // The plans is as follows:
+ //
+ // 1. Load the service data.
+ //
+ // 2. Verify it is completed.
+ //
+ // 3. Verify the check run counts match.
+ //
+ // 4. Verify (like in build_built()) that all the check runs are
+ // completed.
+ //
+ // 5. Verify the result matches what GitHub thinks it is.
+
+ HANDLER_DIAG;
+
+ l3 ([&]{trace << "check_suite event { " << cs << " }";});
+
+ // Service id that uniquely identifies the CI tenant.
+ //
+ string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha);
+
+ // The common log entry subject.
+ //
+ string sub ("check suite " + cs.check_suite.node_id + '/' + sid);
+
+ // Load the service data.
+ //
+ service_data sd;
+
+ if (optional<tenant_data> d = find (*build_db_, "ci-github", sid))
+ {
+ try
+ {
+ sd = service_data (*d->service.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "failed to parse service data: " << e;
+ }
+ }
+ else
+ {
+ error << sub << ": tenant_service does not exist";
+ return true;
+ }
+
+ // Verify the completed flag and the number of check runs.
+ //
+ if (!sd.completed)
+ {
+ error << sub << " service data complete flag is false";
+ return true;
+ }
+
+ // Received count will be one higher because we don't store the conclusion
+ // check run.
+ //
+ size_t check_runs_count (sd.check_runs.size () + 1);
+
+ if (check_runs_count == 1)
+ {
+ error << sub << ": no check runs in service data";
+ return true;
+ }
+
+ if (cs.check_suite.check_runs_count != check_runs_count)
+ {
+ error << sub << ": check runs count " << cs.check_suite.check_runs_count
+ << " does not match service data count " << check_runs_count;
+ return true;
+ }
+
+ // Verify that all the check runs are built and compute the summary
+ // conclusion.
+ //
+ result_status conclusion (result_status::success);
+
+ for (const check_run& cr: sd.check_runs)
+ {
+ if (cr.state == build_state::built)
+ {
+ assert (cr.status.has_value ());
+ conclusion |= *cr.status;
+ }
+ else
+ {
+ error << sub << ": unbuilt check run in service data";
+ return true;
+ }
+ }
+
+ // Verify the conclusion.
+ //
+ if (!cs.check_suite.conclusion)
+ {
+ error << sub << ": absent conclusion in completed check suite";
+ return true;
+ }
+
+ string gh_conclusion (gh_to_conclusion (*conclusion, warning_success));
+
+ if (*cs.check_suite.conclusion != gh_conclusion)
+ {
+ error << sub << ": conclusion " << *cs.check_suite.conclusion
+ << " does not match service data conclusion " << gh_conclusion;
+ return true;
+ }
+
+ return true;
+ }
+
+ // Create a gq_built_result.
+ //
+ // Throw invalid_argument in case of invalid result_status.
+ //
+ static gq_built_result
+ make_built_result (result_status rs, bool warning_success, string message)
+ {
+ return {gh_to_conclusion (rs, warning_success),
+ circle (rs) + ' ' + ucase (to_string (rs)),
+ move (message)};
+ }
+
// Parse a check run details URL into a build_id.
//
// Return nullopt if the URL is invalid.
@@ -637,6 +793,12 @@ namespace brep
static optional<build_id>
parse_details_url (const string& details_url);
+ // Note that GitHub always posts a message to their GUI saying "You have
+ // successfully requested <check_run_name> be rerun", regardless of what
+ // HTTP status code we respond with. However we do return error status codes
+ // when there is no better option (like failing the conclusion) in case they
+ // start handling them someday.
+ //
bool ci_github::
handle_check_run_rerequest (const gh_check_run_event& cr,
bool warning_success)
@@ -645,20 +807,40 @@ namespace brep
l3 ([&]{trace << "check_run event { " << cr << " }";});
- // Fail if this is the conclusion check run.
+ // The overall plan is as follows:
//
- if (cr.check_run.name == conclusion_check_run_name)
- {
- // @@ Fail conclusion check run with appropriate message and reurn
- // true.
-
- l3 ([&]{trace << "ignoring conclusion check_run";});
-
- // 422 Unprocessable Content: The request was well-formed (i.e.,
- // syntactically correct) but could not be processed.
- //
- throw invalid_request (422, "Conclusion check run cannot be rebuilt");
- }
+ // 1. Load service data.
+ //
+ // 2. If the tenant is archived, then fail (re-create) both the check run
+ // and the conclusion with appropriate diagnostics.
+ //
+ // 3. If the check run is in the queued state, then do nothing.
+ //
+ // 4. Re-create the check run in the queued state and the conclusion in
+ // the building state. Note: do in a single request to make sure we
+ // either "win" or "loose" the potential race for both (important
+ // for #7).
+ //
+ // 5. Call the rebuild() function to attempt to schedule a rebuild. Pass
+ // the update function that does the following (if called):
+ //
+ // a. Save new node ids.
+ //
+ // b. Update the check run state (may also not exist).
+ //
+ // c. Clear the completed flag if true.
+ //
+ // 6. If the result of rebuild() indicates the tenant is archived, then
+ // fail (update) both the check run and conclusion with appropriate
+ // diagnostics.
+ //
+ // 7. If original state is queued (no rebuild was scheduled), then fail
+ // (update) both the check run and the conclusion.
+ //
+ // Note that while conceptually we are updating existing check runs, in
+ // practice we have to re-create as new check runs in order to replace the
+ // existing ones because GitHub does not allow transitioning out of the
+ // built state.
// Get a new installation access token.
//
@@ -680,77 +862,156 @@ namespace brep
return iat;
};
- // Create a new conclusion check run, replacing the existing one.
+ const string& repo_node_id (cr.repository.node_id);
+ const string& head_sha (cr.check_run.check_suite.head_sha);
+
+ // Prepare the build and conclusion check runs. They are sent to GitHub in
+ // a single request (unless something goes wrong) so store them together
+ // from the outset.
//
- // Return the check run on success or nullopt on failure.
+ vector<check_run> check_runs (2);
+ check_run& bcr (check_runs[0]); // Build check run
+ check_run& ccr (check_runs[1]); // Conclusion check run
+
+ ccr.name = conclusion_check_run_name;
+
+ // Load the service data, failing the check runs if the tenant has been
+ // archived.
//
- auto create_conclusion_cr =
- [&cr, &error, warning_success] (const gh_installation_access_token& iat,
- build_state bs,
- optional<result_status> rs = nullopt,
- optional<string> msg = nullopt)
- -> optional<check_run>
+ service_data sd;
+ string tenant_id;
{
- optional<gq_built_result> br;
- if (rs)
+ // Service id that uniquely identifies the CI tenant.
+ //
+ string sid (repo_node_id + ':' + head_sha);
+
+ if (optional<tenant_data> d = find (*build_db_, "ci-github", sid))
{
- assert (msg);
+ if (d->archived) // Tenant is archived
+ {
+ // Fail (re-create) the check runs.
+ //
+ optional<gh_installation_access_token> iat (get_iat ());
+ if (!iat)
+ throw server_error ();
+
+ gq_built_result br (
+ make_built_result (
+ result_status::error, warning_success,
+ "Unable to rebuild individual configuration: build has "
+ "been archived"));
+
+ // Try to update the conclusion check run even if the first update
+ // fails.
+ //
+ bool f (false); // Failed.
+
+ if (gq_create_check_run (error, bcr, iat->token,
+ repo_node_id, head_sha,
+ cr.check_run.details_url,
+ build_state::built, br))
+ {
+ l3 ([&]{trace << "created check_run { " << bcr << " }";});
+ }
+ else
+ {
+ error << "check_run " << cr.check_run.node_id
+ << ": unable to re-create check run";
+ f = true;
+ }
- br = gq_built_result (gh_to_conclusion (*rs, warning_success),
- circle (*rs) + ' ' + ucase (to_string (*rs)),
- move (*msg));
+ if (gq_create_check_run (error, ccr, iat->token,
+ repo_node_id, head_sha,
+ nullopt /* details_url */,
+ build_state::built, move (br)))
+ {
+ l3 ([&]{trace << "created conclusion check_run { " << ccr << " }";});
+ }
+ else
+ {
+ error << "check_run " << cr.check_run.node_id
+ << ": unable to re-create conclusion check run";
+ f = true;
+ }
+
+ // Fail the handler if either of the check runs could not be
+ // updated.
+ //
+ if (f)
+ throw server_error ();
+
+ return true;
+ }
+
+ tenant_service& ts (d->service);
+
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "failed to parse service data: " << e;
+ }
+
+ tenant_id = d->tenant_id;
}
+ else
+ {
+ // No such tenant.
+ //
+ fail << "check run " << cr.check_run.node_id
+ << " re-requested but tenant_service with id " << sid
+ << " does not exist";
+ }
+ }
- check_run r;
- r.name = conclusion_check_run_name;
+ // Get a new IAT if the one from the service data has expired.
+ //
+ const gh_installation_access_token* iat (nullptr);
+ optional<gh_installation_access_token> new_iat;
- if (gq_create_check_run (error, r, iat.token,
- rni, hs,
+ if (system_clock::now () > sd.installation_access.expires_at)
+ {
+ if ((new_iat = get_iat ()))
+ iat = &*new_iat;
+ else
+ throw server_error ();
+ }
+ else
+ iat = &sd.installation_access;
+
+ // Fail if it's the conclusion check run that is being re-requested.
+ //
+ if (cr.check_run.name == conclusion_check_run_name)
+ {
+ l3 ([&]{trace << "re-requested conclusion check_run";});
+
+ if (!sd.conclusion_node_id)
+ fail << "no conclusion node id for check run " << cr.check_run.node_id;
+
+ gq_built_result br (
+ make_built_result (result_status::error, warning_success,
+ "Conclusion check run cannot be rebuilt"));
+
+ // Fail (update) the conclusion check run.
+ //
+ if (gq_update_check_run (error, ccr, iat->token,
+ repo_node_id, *sd.conclusion_node_id,
nullopt /* details_url */,
- bs, move (br)))
+ build_state::built, move (br)))
{
- return r;
+ l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
}
else
- return nullopt;
- };
+ {
+ fail << "check run " << cr.check_run.node_id
+ << ": unable to update conclusion check run "
+ << *sd.conclusion_node_id;
+ }
- // The overall plan is as follows:
- //
- // 1. Call the rebuild() function to attempt to schedule a rebuild. Pass
- // the update function that does the following (if called):
- //
- // a. Update the check run being rebuilt (may also not exist).
- //
- // b. Clear the completed flag if true.
- //
- // c. "Return" the service data to be used after the call.
- //
- // 2. If the result of rebuild() indicates the tenant is archived, fail
- // the conclusion check run with appropriate diagnostics.
- //
- // 3. If original state is queued, then no rebuild was scheduled and we do
- // nothing.
- //
- // 4. Otherwise (the original state is building or built):
- //
- // a. Change the check run state to queued.
- //
- // b. Change the conclusion check run to building (do unconditionally
- // to mitigate races).
- //
- // Note that while conceptually we are updating existing check runs, in
- // practice we have to create new check runs to replace the existing ones
- // because GitHub does not allow transitioning out of the built state.
- //
- // This results in a new node id for each check run but we can't save them
- // to the service data after the rebuild() call. As a workaround, when
- // updating the service data we 1) clear the re-requested check run's node
- // id and set the state_synced flag to true to signal to build_building()
- // and build_built() that it needs to create a new check run; and 2) clear
- // the conclusion check run's node id to cause build_built() to create a
- // new conclusion check run. And these two check runs' node ids will be
- // saved to the service data.
+ return true;
+ }
// Parse the check_run's details_url to extract build id.
//
@@ -766,28 +1027,101 @@ namespace brep
<< ": failed to extract build id from details_url";
}
- // The IAT retrieved from the service data.
+ // Initialize the check run (`bcr`) with state from the service data.
//
- optional<gh_installation_access_token> iat;
+ {
+ // Search for the check run in the service data.
+ //
+ // Note that we look by name in case node id got replaced by a racing
+ // re-request (in which case we ignore this request).
+ //
+ auto i (find_if (sd.check_runs.begin (), sd.check_runs.end (),
+ [&cr] (const check_run& scr)
+ {
+ return scr.name == cr.check_run.name;
+ }));
+
+ if (i == sd.check_runs.end ())
+ fail << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "re-requested but does not exist in service data";
- // True if the check run exists in the service data.
+ // Do nothing if node ids don't match.
+ //
+ if (i->node_id && *i->node_id != cr.check_run.node_id)
+ {
+ l3 ([&]{trace << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "node id has changed in service data";});
+ return true;
+ }
+
+ // Do nothing if the build is already queued.
+ //
+ if (i->state == build_state::queued)
+ {
+ l3 ([&]{trace << "ignoring already-queued check run";});
+ return true;
+ }
+
+ bcr.name = i->name;
+ bcr.build_id = i->build_id;
+ bcr.state = i->state;
+ }
+
+ // Transition the build and conclusion check runs out of the built state
+ // (or any other state) by re-creating them.
//
- bool cr_found (false);
+ bcr.state = build_state::queued;
+ bcr.state_synced = false;
+ bcr.details_url = cr.check_run.details_url;
- // Update the state of the check run in the service data. Return (via
- // captured references) the IAT and whether the check run was found.
+ ccr.state = build_state::building;
+ ccr.state_synced = false;
+
+ if (gq_create_check_runs (error, check_runs, iat->token,
+ repo_node_id, head_sha))
+ {
+ assert (bcr.state == build_state::queued);
+ assert (ccr.state == build_state::building);
+
+ l3 ([&]{trace << "created check_run { " << bcr << " }";});
+ l3 ([&]{trace << "created conclusion check_run { " << ccr << " }";});
+ }
+ else
+ {
+ fail << "check run " << cr.check_run.node_id
+ << ": unable to re-create build and conclusion check runs";
+ }
+
+ // Request the rebuild and update service data.
//
- // Called by rebuild(), but only if the build is actually restarted.
+ bool race (false);
+
+ // Callback function called by rebuild() to update the service data (but
+ // only if the build is actually restarted).
//
- auto update_sd = [&iat,
- &cr_found,
- &error,
- &cr] (const tenant_service& ts, build_state)
- -> optional<string>
+ auto update_sd = [&error, &new_iat, &race,
+ tenant_id = move (tenant_id),
+ &cr, &bcr, &ccr] (const string& ti,
+ const tenant_service& ts,
+ build_state) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to transaction
// being aborted) and so should not move out of its captures.
+ race = false; // Reset.
+
+ if (tenant_id != ti)
+ {
+ // The tenant got replaced since we loaded it but we managed to
+ // trigger a rebuild in the new tenant. Who knows whose check runs are
+ // visible, so let's fail ours similar to the cases below.
+ //
+ race = true;
+ return nullopt;
+ }
+
service_data sd;
try
{
@@ -796,148 +1130,138 @@ namespace brep
catch (const invalid_argument& e)
{
error << "failed to parse service data: " << e;
- return nullptr;
+ return nullopt;
}
- if (!iat)
- iat = sd.installation_access;
-
- // If the re-requested check run is found, update it in the service
- // data.
+ // Note that we again look by name in case node id got replaced by a
+ // racing re-request. In this case, however, it's impossible to decide
+ // who won that race, so let's fail the check suite to be on the safe
+ // side (in a sense, similar to the rebuild() returning queued below).
//
- const string& nid (cr.check_run.node_id);
+ auto i (find_if (
+ sd.check_runs.begin (), sd.check_runs.end (),
+ [&cr] (const check_run& scr)
+ {
+ return scr.name == cr.check_run.name;
+ }));
- for (check_run& cr: sd.check_runs)
+ if (i == sd.check_runs.end ())
{
- if (cr.node_id && *cr.node_id == nid)
- {
- cr_found = true;
- cr.state = build_state::queued;
- sd.completed = false;
-
- // Clear the check run node ids and set state_synced to true to
- // cause build_building() and/or build_built() to create new check
- // runs (see the plan above for details).
- //
- cr.node_id = nullopt;
- cr.state_synced = true;
- sd.conclusion_node_id = nullopt;
+ error << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "re-requested but does not exist in service data";
+ return nullopt;
+ }
- return sd.json ();
- }
+ if (i->node_id && *i->node_id != cr.check_run.node_id)
+ {
+ // Keep the old conclusion node id to make sure any further state
+ // transitions are ignored. A bit of a hack.
+ //
+ race = true;
+ return nullopt;
}
- return nullopt;
+ *i = bcr; // Update with new node_id, state, state_synced.
+
+ sd.conclusion_node_id = ccr.node_id;
+ sd.completed = false;
+
+ // Save the IAT if we created a new one.
+ //
+ if (new_iat)
+ sd.installation_access = *new_iat;
+
+ return sd.json ();
};
optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd));
- if (!bs)
+ // If the build has been archived or re-enqueued since we loaded the
+ // service data, fail (by updating) both the build check run and the
+ // conclusion check run. Otherwise the build has been successfully
+ // re-enqueued so do nothing further.
+ //
+ if (!race && bs && *bs != build_state::queued)
+ return true;
+
+ gq_built_result br; // Built result for both check runs.
+
+ if (race || bs) // Race or re-enqueued.
{
- // Build has expired (most probably the tenant has been archived).
+ // The re-enqueued case: this build has been re-enqueued since we first
+ // loaded the service data. This could happen if the user clicked
+ // "re-run" multiple times and another handler won the rebuild() race.
//
- // Update the conclusion check run to notify the user (but have to
- // replace it with a new one because we don't know the existing one's
- // node id).
+ // However the winner of the check runs race cannot be determined.
//
- optional<gh_installation_access_token> iat (get_iat ());
- if (!iat)
- throw server_error ();
-
- if (optional<check_run> ccr = create_conclusion_cr (
- *iat,
- build_state::built,
- result_status::error,
- "Unable to rebuild: tenant has been archived or no such build"))
- {
- l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";});
- }
- else
- {
- // Log the error and return failure to GitHub which will presumably
- // indicate this in its GUI.
- //
- fail << "check run " << cr.check_run.node_id
- << ": unable to create conclusion check run";
- }
- }
- else if (*bs == build_state::queued)
- {
- // The build was already queued so nothing to be done. This might happen
- // if the user clicked "re-run" multiple times before we managed to
- // update the check run.
- }
- else
- {
- // The build has been requeued.
+ // Best case the other handler won the check runs race as well and
+ // thus everything will proceed normally. Our check runs will be
+ // invisible and disregarded.
//
- assert (*bs == build_state::building || *bs == build_state::built);
-
- if (!cr_found)
- {
- // Respond with an error otherwise GitHub will post a message in its
- // GUI saying "you have successfully requested a rebuild of ..."
- //
- fail << "check_run " << cr.check_run.node_id
- << ": build restarted but check run does not exist "
- << "in service data";
- }
-
- // Get a new IAT if the one from the service data has expired.
+ // Worst case we won the check runs race and the other handler's check
+ // runs -- the ones that will be updated by the build_*() notifications
+ // -- are no longer visible, leaving things quite broken.
//
- assert (iat.has_value ());
-
- if (system_clock::now () > iat->expires_at)
- {
- iat = get_iat ();
- if (!iat)
- throw server_error ();
- }
-
- // Update (by replacing) the re-requested and conclusion check runs to
- // queued and building, respectively.
+ // Either way, we fail our check runs. In the best case scenario it
+ // will have no effect; in the worst case scenario it lets the user
+ // know something has gone wrong.
//
- // If either fails we can only log the error but build_building() and/or
- // build_built() should correct the situation (see above for details).
+ br = make_built_result (result_status::error, warning_success,
+ "Unable to rebuild, try again");
+ }
+ else // Archived.
+ {
+ // The build has expired since we loaded the service data. Most likely
+ // the tenant has been archived.
//
+ br = make_built_result (
+ result_status::error, warning_success,
+ "Unable to rebuild individual configuration: build has been archived");
+ }
- // Update re-requested check run.
- //
- {
- check_run ncr; // New check run.
- ncr.name = cr.check_run.name;
+ // Try to update the conclusion check run even if the first update fails.
+ //
+ bool f (false); // Failed.
- if (gq_create_check_run (error,
- ncr,
- iat->token,
- cr.repository.node_id,
- cr.check_run.check_suite.head_sha,
- cr.check_run.details_url,
- build_state::queued))
- {
- l3 ([&]{trace << "created check_run { " << ncr << " }";});
- }
- else
- {
- error << "check_run " << cr.check_run.node_id
- << ": unable to create (to update) check run in queued state";
- }
- }
+ // Fail the build check run.
+ //
+ if (gq_update_check_run (error, bcr, iat->token,
+ repo_node_id, *bcr.node_id,
+ nullopt /* details_url */,
+ build_state::built, br))
+ {
+ l3 ([&]{trace << "updated check_run { " << bcr << " }";});
+ }
+ else
+ {
+ error << "check run " << cr.check_run.node_id
+ << ": unable to update (replacement) check run "
+ << *bcr.node_id;
+ f = true;
+ }
- // Update conclusion check run.
- //
- if (optional<check_run> ccr =
- create_conclusion_cr (*iat, build_state::building))
- {
- l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";});
- }
- else
- {
- error << "check_run " << cr.check_run.node_id
- << ": unable to create (to update) conclusion check run";
- }
+ // Fail the conclusion check run.
+ //
+ if (gq_update_check_run (error, ccr, iat->token,
+ repo_node_id, *ccr.node_id,
+ nullopt /* details_url */,
+ build_state::built, move (br)))
+ {
+ l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
+ }
+ else
+ {
+ error << "check run " << cr.check_run.node_id
+ << ": unable to update conclusion check run " << *ccr.node_id;
+ f = true;
}
+ // Fail the handler if either of the check runs could not be updated.
+ //
+ if (f)
+ throw server_error ();
+
return true;
}
@@ -959,30 +1283,6 @@ namespace brep
// gets updated with the head commit's SHA and check_suite.pull_requests[]
// will contain all PRs with this branch as head.
//
- // Remaining TODOs
- //
- // - @@ TODO? PR base branch changed (to a different branch)
- //
- // => pull_request(edited)
- //
- // - PR closed @@ TODO
- //
- // Also received if base branch is deleted. (And presumably same for head
- // branch.)
- //
- // => pull_request(closed)
- //
- // Cancel CI?
- //
- // - PR merged @@ TODO
- //
- // => pull_request(merged)
- //
- // => check_suite(PR_base)
- //
- // Probably wouldn't want to CI the base again because the PR CI would've
- // done the equivalent already.
- //
bool ci_github::
handle_pull_request (gh_pull_request_event pr, bool warning_success)
{
@@ -1007,12 +1307,6 @@ namespace brep
l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
- // Note that similar to the branch push case above, while it would have
- // been nice to cancel the previous CI job once the PR head moves (the
- // "synchronize" event), due to the head sharing problem the previous CI
- // job might actually still be relevant (in both local and remote PR
- // cases).
-
// Distinguish between local and remote PRs by comparing the head and base
// repositories' paths.
//
@@ -1021,6 +1315,48 @@ namespace brep
? service_data::local
: service_data::remote);
+ // Note that similar to the branch push case above, while it would have
+ // been nice to cancel the previous CI job once the PR head moves (the
+ // "synchronize" event), due to the head sharing problem the previous CI
+ // job might actually still be relevant (in both local and remote PR
+ // cases). So we only do it for the remote PRs and only if the head is not
+ // shared (via tenant reference counting).
+ //
+ if (kind == service_data::remote && pr.action == "synchronize")
+ {
+ if (pr.before)
+ {
+ // Service id that will uniquely identify the CI tenant.
+ //
+ string sid (pr.repository.node_id + ':' + *pr.before);
+
+ if (optional<tenant_service> ts = cancel (error, warn,
+ verb_ ? &trace : nullptr,
+ *build_db_, retry_,
+ "ci-github", sid,
+ true /* ref_count */))
+ {
+ l3 ([&]{trace << "pull request " << pr.pull_request.node_id
+ << ": attempted to cancel CI of previous head commit"
+ << " (ref_count: " << ts->ref_count << ')';});
+ }
+ else
+ {
+ // It's possible that there was no CI for the previous commit for
+ // various reasons (e.g., CI was not enabled).
+ //
+ l3 ([&]{trace << "pull request " << pr.pull_request.node_id
+ << ": failed to cancel CI of previous head commit "
+ << "with tenant_service id " << sid;});
+ }
+ }
+ else
+ {
+ error << "pull request " << pr.pull_request.node_id
+ << ": before commit is missing in synchronize event";
+ }
+ }
+
// Note: for remote PRs the check_sha will be set later, in
// build_unloaded_pre_check(), to test merge commit id.
//
@@ -1082,10 +1418,13 @@ namespace brep
return true;
}
- function<optional<string> (const tenant_service&)> ci_github::
- build_unloaded (tenant_service&& ts,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_unloaded (const string& ti,
+ tenant_service&& ts,
const diag_epilogue& log_writer) const noexcept
{
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
service_data sd;
@@ -1100,15 +1439,24 @@ namespace brep
}
return sd.pre_check
- ? build_unloaded_pre_check (move (ts), move (sd), log_writer)
- : build_unloaded_load (move (ts), move (sd), log_writer);
+ ? build_unloaded_pre_check (move (ts), move (sd), log_writer)
+ : build_unloaded_load (ti, move (ts), move (sd), log_writer);
}
- function<optional<string> (const tenant_service&)> ci_github::
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
build_unloaded_pre_check (tenant_service&& ts,
service_data&& sd,
const diag_epilogue& log_writer) const noexcept
+ try
{
+ // NOTE: this function is noexcept and should not throw.
+ //
+ // In a few places where invalid_argument is unlikely to be thrown and/or
+ // would indicate that things are seriously broken we let it propagate to
+ // the function catch block where the pre-check tenant will be canceled
+ // (otherwise we could end up in an infinite loop, e.g., because the
+ // problematic arguments won't change).
+
NOTIFICATION_DIAG (log_writer);
// We get here for PRs only (but both local and remote). The overall
@@ -1134,6 +1482,8 @@ namespace brep
// Request PR pre-check info (triggering the generation of the test merge
// commit on the GitHub's side).
//
+ // Let unlikely invalid_argument propagate (see above).
+ //
optional<gq_pr_pre_check_info> pc (
gq_fetch_pull_request_pre_check_info (error,
sd.installation_access.token,
@@ -1177,7 +1527,7 @@ namespace brep
// Service id that will uniquely identify the CI tenant.
//
- string sid (sd.repository_node_id + ":" + sd.report_sha);
+ string sid (sd.repository_node_id + ':' + sd.report_sha);
// Create an unloaded CI tenant, doing nothing if one already exists
// (which could've been created by a head branch push or another PR
@@ -1193,38 +1543,50 @@ namespace brep
// notifications until (1) we load the tenant, (2) we cancel it, or (3)
// it gets archived after some timeout.
//
- if (auto pr = create (error, warn, verb_ ? &trace : nullptr,
- *build_db_, retry_,
- tenant_service (sid, "ci-github", sd.json ()),
- chrono::seconds (30) /* interval */,
- chrono::seconds (0) /* delay */,
- duplicate_tenant_mode::ignore))
- {
- if (pr->second == duplicate_tenant_result::ignored)
+ try
+ {
+ if (auto pr = create (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_,
+ tenant_service (sid, "ci-github", sd.json ()),
+ chrono::seconds (30) /* interval */,
+ chrono::seconds (0) /* delay */,
+ duplicate_tenant_mode::ignore))
{
- // This PR is sharing a head commit with something else.
- //
- // If this is a local PR then it's probably the branch push, which
- // is expected, so do nothing.
- //
- // If this is a remote PR then it could be anything (branch push,
- // local PR, or another remote PR) which in turn means the CI result
- // may end up being for head, not merge commit. There is nothing we
- // can do about it on our side (the user can enable the head-behind-
- // base protection on their side).
- //
- if (sd.kind == service_data::remote)
+ if (pr->second == duplicate_tenant_result::ignored)
{
- l3 ([&]{trace << "remote pull request " << *sd.pr_node_id
- << ": CI tenant already exists for " << sid;});
+ // This PR is sharing a head commit with something else.
+ //
+ // If this is a local PR then it's probably the branch push, which
+ // is expected, so do nothing.
+ //
+ // If this is a remote PR then it could be anything (branch push,
+ // local PR, or another remote PR) which in turn means the CI
+ // result may end up being for head, not merge commit. There is
+ // nothing we can do about it on our side (the user can enable the
+ // head-behind- base protection on their side).
+ //
+ if (sd.kind == service_data::remote)
+ {
+ l3 ([&]{trace << "remote pull request " << *sd.pr_node_id
+ << ": CI tenant already exists for " << sid;});
+ }
}
}
+ else
+ {
+ error << "pull request " << *sd.pr_node_id
+ << ": failed to create unloaded CI tenant "
+ << "with tenant_service id " << sid;
+
+ // Fall through to cancel.
+ }
}
- else
+ catch (const runtime_error& e) // Database retries exhausted.
{
error << "pull request " << *sd.pr_node_id
- << ": unable to create unloaded CI tenant "
- << "with tenant_service id " << sid;
+ << ": failed to create unloaded CI tenant "
+ << "with tenant_service id " << sid
+ << ": " << e.what ();
// Fall through to cancel.
}
@@ -1232,26 +1594,70 @@ namespace brep
// Cancel the pre-check tenant.
//
- if (!cancel (error, warn, verb_ ? &trace : nullptr,
- *build_db_, retry_,
- ts.type,
- ts.id))
+ try
+ {
+ if (!cancel (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_,
+ ts.type,
+ ts.id))
+ {
+ // Should never happen (no such tenant).
+ //
+ error << "pull request " << *sd.pr_node_id
+ << ": failed to cancel pre-check tenant with tenant_service id "
+ << ts.id;
+ }
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
{
- // Should never happen (no such tenant).
- //
error << "pull request " << *sd.pr_node_id
<< ": failed to cancel pre-check tenant with tenant_service id "
- << ts.id;
+ << ts.id << ": " << e.what ();
+ }
+
+ return nullptr;
+ }
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+ error << "pull request " << *sd.pr_node_id
+ << ": unhandled exception: " << e.what ();
+
+ // Cancel the pre-check tenant otherwise we could end up in an infinite
+ // loop (see top of function).
+ //
+ try
+ {
+ if (cancel (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_,
+ ts.type,
+ ts.id))
+ l3 ([&]{trace << "canceled pre-check tenant " << ts.id;});
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
+ {
+ l3 ([&]{trace << "failed to cancel pre-check tenant " << ts.id << ": "
+ << e.what ();});
}
return nullptr;
}
- function<optional<string> (const tenant_service&)> ci_github::
- build_unloaded_load (tenant_service&& ts,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_unloaded_load (const string& tenant_id,
+ tenant_service&& ts,
service_data&& sd,
const diag_epilogue& log_writer) const noexcept
+ try
{
+ // NOTE: this function is noexcept and should not throw.
+ //
+ // In a few places where invalid_argument is unlikely to be thrown and/or
+ // would indicate that things are seriously broken we let it propagate to
+ // the function catch block where the tenant will be canceled (otherwise
+ // we could end up in an infinite loop, e.g., because the problematic
+ // arguments won't change).
+
NOTIFICATION_DIAG (log_writer);
// Load the tenant, which is essentially the same for both branch push and
@@ -1297,6 +1703,8 @@ namespace brep
check_run cr;
cr.name = move (name);
+ // Let unlikely invalid_argument propagate (see above).
+ //
if (gq_create_check_run (error,
cr,
iat->token,
@@ -1323,14 +1731,16 @@ namespace brep
{
assert (!node_id.empty ());
- optional<gq_built_result> br (
- gq_built_result (gh_to_conclusion (rs, sd.warning_success),
- circle (rs) + ' ' + ucase (to_string (rs)),
- move (summary)));
+ // Let unlikely invalid_argument propagate (see above).
+ //
+ gq_built_result br (
+ make_built_result (rs, sd.warning_success, move (summary)));
check_run cr;
cr.name = name; // For display purposes only.
+ // Let unlikely invalid_argument propagate (see above).
+ //
if (gq_update_check_run (error,
cr,
iat->token,
@@ -1357,16 +1767,24 @@ namespace brep
//
string conclusion_node_id; // Conclusion check run node ID.
- if (auto cr = create_synthetic_cr (conclusion_check_run_name))
+ if (!sd.conclusion_node_id)
{
- l3 ([&]{trace << "created check_run { " << *cr << " }";});
+ if (auto cr = create_synthetic_cr (conclusion_check_run_name))
+ {
+ l3 ([&]{trace << "created check_run { " << *cr << " }";});
- conclusion_node_id = move (*cr->node_id);
+ conclusion_node_id = move (*cr->node_id);
+ }
}
+ const string& effective_conclusion_node_id (
+ sd.conclusion_node_id
+ ? *sd.conclusion_node_id
+ : conclusion_node_id);
+
// Load the CI tenant if the conclusion check run was created.
//
- if (!conclusion_node_id.empty ())
+ if (!effective_conclusion_node_id.empty ())
{
string ru; // Repository URL.
@@ -1383,46 +1801,65 @@ namespace brep
else
ru = sd.repository_clone_url + '#' + sd.check_sha;
+ // Let unlikely invalid_argument propagate (see above).
+ //
repository_location rl (move (ru), repository_type::git);
- optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr,
- *build_db_, retry_,
- move (ts),
- move (rl)));
-
- if (!r || r->status != 200)
+ try
{
- if (auto cr = update_synthetic_cr (conclusion_node_id,
- conclusion_check_run_name,
- result_status::error,
- to_check_run_summary (r)))
- {
- l3 ([&]{trace << "updated check_run { " << *cr << " }";});
- }
- else
+ optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_,
+ move (ts),
+ move (rl)));
+
+ if (!r || r->status != 200)
{
- // Nothing really we can do in this case since we will not receive
- // any further notifications. Log the error as a last resort.
+ // Let unlikely invalid_argument propagate (see above).
+ //
+ if (auto cr = update_synthetic_cr (effective_conclusion_node_id,
+ conclusion_check_run_name,
+ result_status::error,
+ to_check_run_summary (r)))
+ {
+ l3 ([&]{trace << "updated check_run { " << *cr << " }";});
+ }
+ else
+ {
+ // Nothing really we can do in this case since we will not receive
+ // any further notifications. Log the error as a last resort.
- error << "failed to load CI tenant " << ts.id
- << " and unable to update conclusion";
+ error << "failed to load CI tenant " << ts.id
+ << " and unable to update conclusion";
+ }
+
+ return nullptr; // No need to update service data in this case.
}
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
+ {
+ error << "failed to load CI tenant " << ts.id << ": " << e.what ();
- return nullptr; // No need to update service data in this case.
+ // Fall through to retry on next call.
}
}
- else if (!new_iat)
- return nullptr; // Nothing to save (but retry on next call).
+
+ if (!new_iat && conclusion_node_id.empty ())
+ return nullptr; // Nothing to save (but potentially retry on next call).
return [&error,
+ tenant_id,
iat = move (new_iat),
cni = move (conclusion_node_id)]
- (const tenant_service& ts) -> optional<string>
+ (const string& ti,
+ const tenant_service& ts) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to
// transaction being aborted) and so should not move out of its
// captures.
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
service_data sd;
try
{
@@ -1443,6 +1880,28 @@ namespace brep
return sd.json ();
};
}
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+ error << "CI tenant " << ts.id << ": unhandled exception: " << e.what ();
+
+ // Cancel the tenant otherwise we could end up in an infinite loop (see
+ // top of function).
+ //
+ try
+ {
+ if (cancel (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_, ts.type, ts.id))
+ l3 ([&]{trace << "canceled CI tenant " << ts.id;});
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
+ {
+ l3 ([&]{trace << "failed to cancel CI tenant " << ts.id
+ << ": " << e.what ();});
+ }
+
+ return nullptr;
+ }
// Build state change notifications (see tenant-services.hxx for
// background). Mapping our state transitions to GitHub pose multiple
@@ -1453,9 +1912,9 @@ namespace brep
// them when notifying GitHub. The first is not important (we expect the
// state to go back to building shortly). The second should normally not
// happen and would mean that a completed check suite may go back on its
- // conclusion (which would be pretty confusing for the user). @@@ This
- // can/will happen on check run rebuild. Distinguish between internal
- // and external rebuilds?
+ // conclusion (which would be pretty confusing for the user). Note that
+ // the ->queued state transition of a check run rebuild triggered by
+ // us is handled directly in handle_check_run_rerequest().
//
// So, for GitHub notifications, we only have the following linear
// transition sequence:
@@ -1532,13 +1991,17 @@ namespace brep
// if we have node_id, then we update, otherwise, we create (potentially
// overriding the check run created previously).
//
- function<optional<string> (const tenant_service&)> ci_github::
- build_queued (const tenant_service& ts,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_queued (const string& tenant_id,
+ const tenant_service& ts,
const vector<build>& builds,
optional<build_state> istate,
const build_queued_hints& hs,
const diag_epilogue& log_writer) const noexcept
+ try
{
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
service_data sd;
@@ -1638,11 +2101,12 @@ namespace brep
{
// Create a check_run for each build as a single request.
//
+ // Let unlikely invalid_argument propagate.
+ //
if (gq_create_check_runs (error,
crs,
iat->token,
- sd.repository_node_id, sd.report_sha,
- build_state::queued))
+ sd.repository_node_id, sd.report_sha))
{
for (const check_run& cr: crs)
{
@@ -1654,15 +2118,20 @@ namespace brep
}
}
- return [bs = move (bs),
+ return [tenant_id,
+ bs = move (bs),
iat = move (new_iat),
crs = move (crs),
error = move (error),
- warn = move (warn)] (const tenant_service& ts) -> optional<string>
+ warn = move (warn)] (const string& ti,
+ const tenant_service& ts) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to transaction
// being aborted) and so should not move out of its captures.
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
service_data sd;
try
{
@@ -1702,12 +2171,24 @@ namespace brep
return sd.json ();
};
}
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
- function<optional<string> (const tenant_service&)> ci_github::
- build_building (const tenant_service& ts,
+ error << "CI tenant " << ts.id << ": unhandled exception: " << e.what ();
+
+ return nullptr;
+ }
+
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_building (const string& tenant_id,
+ const tenant_service& ts,
const build& b,
const diag_epilogue& log_writer) const noexcept
+ try
{
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
service_data sd;
@@ -1783,6 +2264,8 @@ namespace brep
//
if (iat != nullptr)
{
+ // Let unlikely invalid_argument propagate.
+ //
if (gq_update_check_run (error,
*cr,
iat->token,
@@ -1806,14 +2289,19 @@ namespace brep
}
}
- return [iat = move (new_iat),
+ return [tenant_id,
+ iat = move (new_iat),
cr = move (*cr),
error = move (error),
- warn = move (warn)] (const tenant_service& ts) -> optional<string>
+ warn = move (warn)] (const string& ti,
+ const tenant_service& ts) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to transaction
// being aborted) and so should not move out of its captures.
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
service_data sd;
try
{
@@ -1848,18 +2336,31 @@ namespace brep
return sd.json ();
};
}
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+
+ string bid (gh_check_run_name (b)); // Full build id.
+
+ error << "check run " << bid << ": unhandled exception: " << e.what();
+
+ return nullptr;
+ }
- function<optional<string> (const tenant_service&)> ci_github::
- build_built (const tenant_service& ts,
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_built (const string& tenant_id,
+ const tenant_service& ts,
const build& b,
const diag_epilogue& log_writer) const noexcept
+ try
{
- // @@ TODO Include service_data::event_node_id and perhaps ts.id in
- // diagnostics? E.g. when failing to update check runs we print the
- // build ID only.
- //
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
+ // @@ TODO Include ts.id in diagnostics? Check run build ids alone seem
+ // kind of meaningless. Log lines get pretty long this way however.
+
service_data sd;
try
{
@@ -1978,6 +2479,11 @@ namespace brep
{
using namespace web::xhtml;
+ // Note: let all serialization exceptions propagate. The XML
+ // serialization code can throw bad_alloc or xml::serialization in
+ // case of I/O failures, but we're serializing to a string stream so
+ // both exceptions are unlikely.
+ //
ostringstream os;
xml::serializer s (os, "check_run_summary");
@@ -2069,13 +2575,12 @@ namespace brep
}
gq_built_result br (
- gh_to_conclusion (*b.status, sd.warning_success),
- circle (*b.status) + ' ' + ucase (to_string (*b.status)),
- move (sm));
+ make_built_result (*b.status, sd.warning_success, move (sm)));
if (cr.node_id)
{
- // Update existing check run to built.
+ // Update existing check run to built. Let unlikely invalid_argument
+ // propagate.
//
if (gq_update_check_run (error,
cr,
@@ -2092,7 +2597,7 @@ namespace brep
}
else
{
- // Create new check run.
+ // Create new check run. Let unlikely invalid_argument propagate.
//
// Note that we don't have build hints so will be creating this check
// run with the full build id as name. In the unlikely event that an
@@ -2129,10 +2634,9 @@ namespace brep
result_status rs (*conclusion);
- optional<gq_built_result> br (
- gq_built_result (gh_to_conclusion (rs, sd.warning_success),
- circle (rs) + ' ' + ucase (to_string (rs)),
- "All configurations are built"));
+ gq_built_result br (
+ make_built_result (rs, sd.warning_success,
+ "All configurations are built"));
check_run cr;
@@ -2141,6 +2645,8 @@ namespace brep
cr.node_id = *sd.conclusion_node_id;
cr.name = conclusion_check_run_name;
+ // Let unlikely invalid_argument propagate.
+ //
if (gq_update_check_run (error,
cr,
iat->token,
@@ -2167,15 +2673,20 @@ namespace brep
}
}
- return [iat = move (new_iat),
+ return [tenant_id,
+ iat = move (new_iat),
cr = move (cr),
completed = completed,
error = move (error),
- warn = move (warn)] (const tenant_service& ts) -> optional<string>
+ warn = move (warn)] (const string& ti,
+ const tenant_service& ts) -> optional<string>
{
// NOTE: this lambda may be called repeatedly (e.g., due to transaction
// being aborted) and so should not move out of its captures.
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
service_data sd;
try
{
@@ -2245,6 +2756,16 @@ namespace brep
return sd.json ();
};
}
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+
+ string bid (gh_check_run_name (b)); // Full build id.
+
+ error << "check run " << bid << ": unhandled exception: " << e.what();
+
+ return nullptr;
+ }
string ci_github::
details_url (const build& b) const
@@ -2270,22 +2791,22 @@ namespace brep
url u (details_url);
- if (!u.query || !u.path || u.path->size () <= 1)
- return nullopt;
-
build_id r;
// Extract the tenant from the URL path.
//
// Example path: @d2586f57-21dc-40b7-beb2-6517ad7917dd
//
- r.package.tenant = u.path->substr (1);
-
- if (r.package.tenant.empty ())
+ if (!u.path || u.path->size () != 37 || (*u.path)[0] != '@')
return nullopt;
+ r.package.tenant = u.path->substr (1);
+
// Extract the rest of the build_id members from the URL query.
//
+ if (!u.query)
+ return nullopt;
+
bool pn (false), pv (false), tg (false), tc (false), pc (false),
th (false);
@@ -2304,21 +2825,25 @@ namespace brep
++vp; // Skip '='
- const char* ve (ep ? ep : vp + strlen (vp)); // Value end pointer.
+ const char* ve (ep != nullptr ? ep : vp + strlen (vp)); // Value end.
// Get the value as-is or URL-decode it.
//
- auto getval = [vp, ve] () { return string (vp, ve); };
+ auto rawval = [vp, ve] () { return string (vp, ve); };
auto decval = [vp, ve] () { return mime_url_decode (vp, ve); };
auto make_version = [] (string&& v)
- { return canonical_version (brep::version (move (v))); };
+ {
+ return canonical_version (brep::version (move (v)));
+ };
auto c = [&n] (bool& b, const char* s)
- { return n == s ? (b = true) : false; };
+ {
+ return n == s ? (b = true) : false;
+ };
if (c (pn, "builds")) r.package.name = package_name (decval ());
- else if (c (pv, "pv")) r.package.version = make_version (getval ());
+ else if (c (pv, "pv")) r.package.version = make_version (rawval ());
else if (c (tg, "tg")) r.target = target_triplet (decval ());
else if (c (tc, "tc")) r.target_config_name = decval ();
else if (c (pc, "pc")) r.package_config_name = decval ();
@@ -2326,7 +2851,7 @@ namespace brep
{
// Toolchain name and version. E.g. "public-0.17.0"
- string v (getval ());
+ string v (rawval ());
// Note: parsing code based on mod/mod-builds.cxx.
//
@@ -2338,7 +2863,7 @@ namespace brep
r.toolchain_version = make_version (v.substr (p + 1));
}
- qp = ep ? ep + 1 : nullptr;
+ qp = ep != nullptr ? ep + 1 : nullptr;
}
if (!pn || !pv || !tg || !tc || !pc || !th)
@@ -2346,7 +2871,7 @@ namespace brep
return r;
}
- catch (const invalid_argument&)
+ catch (const invalid_argument&) // Invalid url, brep::version, etc.
{
return nullopt;
}
@@ -2455,6 +2980,8 @@ namespace brep
//
iat.expires_at -= chrono::minutes (5);
}
+ // gh_installation_access_token (via github_post())
+ //
catch (const json::invalid_json_input& e)
{
// Note: e.name is the GitHub API endpoint.
@@ -2464,12 +2991,12 @@ namespace brep
<< e.position << ", error: " << e;
return nullopt;
}
- catch (const invalid_argument& e)
+ catch (const invalid_argument& e) // github_post()
{
error << "malformed header(s) in response: " << e;
return nullopt;
}
- catch (const system_error& e)
+ catch (const system_error& e) // github_post()
{
error << "unable to get installation access token (errno=" << e.code ()
<< "): " << e.what ();