aboutsummaryrefslogtreecommitdiff
path: root/mod
diff options
context:
space:
mode:
Diffstat (limited to 'mod')
-rw-r--r--mod/ci-common.cxx64
-rw-r--r--mod/ci-common.hxx12
-rw-r--r--mod/mod-ci-github-gh.cxx4
-rw-r--r--mod/mod-ci-github-gh.hxx6
-rw-r--r--mod/mod-ci-github-gq.cxx6
-rw-r--r--mod/mod-ci-github-gq.hxx11
-rw-r--r--mod/mod-ci-github-service-data.hxx4
-rw-r--r--mod/mod-ci-github.cxx912
8 files changed, 439 insertions, 580 deletions
diff --git a/mod/ci-common.cxx b/mod/ci-common.cxx
index 4b9f9f9..cba421b 100644
--- a/mod/ci-common.cxx
+++ b/mod/ci-common.cxx
@@ -553,7 +553,11 @@ namespace brep
assert (!transaction::has_current ());
build_tenant t;
+
+ // Set the reference count to 1 for the `created` result.
+ //
duplicate_tenant_result r (duplicate_tenant_result::created);
+ service.ref_count = 1;
for (string request_id;;)
{
@@ -584,14 +588,31 @@ namespace brep
: duplicate_tenant_mode::ignore);
}
+ // Shouldn't be here otherwise.
+ //
+ assert (t->service);
+
// Bail out in the ignore mode and cancel the tenant in the
// replace mode.
//
if (mode == duplicate_tenant_mode::ignore)
+ {
+ // Increment the reference count for the `ignored` result.
+ //
+ ++(t->service->ref_count);
+
+ db.update (t);
+ tr.commit ();
+
return make_pair (move (t->id), duplicate_tenant_result::ignored);
+ }
assert (mode == duplicate_tenant_mode::replace);
+ // Preserve the current reference count for the `replaced` result.
+ //
+ service.ref_count = t->service->ref_count;
+
if (t->unloaded_timestamp)
{
db.erase (t);
@@ -678,6 +699,7 @@ namespace brep
//
request_id = move (t.id);
service = move (*t.service);
+ service.ref_count = 1;
r = duplicate_tenant_result::created;
}
}
@@ -788,7 +810,8 @@ namespace brep
odb::core::database& db,
size_t retry,
const string& type,
- const string& id) const
+ const string& id,
+ bool ref_count) const
{
using namespace odb::core;
@@ -810,25 +833,44 @@ namespace brep
if (t == nullptr)
return nullopt;
- r = move (t->service);
+ // Shouldn't be here otherwise.
+ //
+ assert (t->service && t->service->ref_count != 0);
- if (t->unloaded_timestamp)
+ bool cancel (!ref_count || --(t->service->ref_count) == 0);
+
+ if (cancel)
{
- db.erase (t);
+ // Move out the service state before it is dropped from the tenant.
+ //
+ r = move (t->service);
+
+ if (t->unloaded_timestamp)
+ {
+ db.erase (t);
+ }
+ else
+ {
+ t->service = nullopt;
+ t->archived = true;
+ db.update (t);
+ }
+
+ if (trace != nullptr)
+ *trace << "CI request " << t->id << " for service " << id << ' '
+ << type << " is canceled";
}
else
{
- t->service = nullopt;
- t->archived = true;
- db.update (t);
+ db.update (t); // Update the service reference count.
+
+ // Move out the service state after the tenant is updated.
+ //
+ r = move (t->service);
}
tr.commit ();
- if (trace != nullptr)
- *trace << "CI request " << t->id << " for service " << id << ' '
- << type << " is canceled";
-
// Bail out if we have successfully updated or erased the tenant
// object.
//
diff --git a/mod/ci-common.hxx b/mod/ci-common.hxx
index 36d5f0e..b32d397 100644
--- a/mod/ci-common.hxx
+++ b/mod/ci-common.hxx
@@ -103,6 +103,10 @@ namespace brep
// Finally note that only duplicate_tenant_mode::fail can be used if the
// service id is empty.
//
+ // The tenant reference count is set to 1 if the result is `created`,
+ // incremented if the result is `ignored`, and preserved if the result is
+ // `replaced`.
+ //
// Repeat the attempts on the recoverable database failures (deadlocks,
// etc) and throw runtime_error if no more retries left.
//
@@ -150,6 +154,11 @@ namespace brep
// dropped. Note that the latter allow using unloaded tenants as a
// relatively cheap asynchronous execution mechanism.
//
+ // If ref_count is true, then decrement the tenant reference count and
+ // only cancel the CI request if it becomes 0. In this mode the caller can
+ // determine if the request was actually canceled by checking if the
+ // reference count in the returned service state is 0.
+ //
// Repeat the attempts on the recoverable database failures (deadlocks,
// etc) and throw runtime_error if no more retries left.
//
@@ -162,7 +171,8 @@ namespace brep
odb::core::database&,
size_t retry,
const string& type,
- const string& id) const;
+ const string& id,
+ bool ref_count = false) const;
// Cancel previously created or started CI request. Return false if there
// is no tenant for the specified tenant id. Note that the reason argument
diff --git a/mod/mod-ci-github-gh.cxx b/mod/mod-ci-github-gh.cxx
index 6372ef0..6ab93d7 100644
--- a/mod/mod-ci-github-gh.cxx
+++ b/mod/mod-ci-github-gh.cxx
@@ -513,7 +513,7 @@ namespace brep
{
p.next_expect (event::begin_object);
- bool ac (false), pr (false), rp (false), in (false);
+ bool ac (false), pr (false), bf (false), rp (false), in (false);
// Skip unknown/uninteresting members.
//
@@ -526,6 +526,7 @@ namespace brep
if (c (ac, "action")) action = p.next_expect_string ();
else if (c (pr, "pull_request")) pull_request = gh_pull_request (p);
+ else if (c (bf, "before")) before = p.next_expect_string ();
else if (c (rp, "repository")) repository = gh_repository (p);
else if (c (in, "installation")) installation = gh_installation (p);
else p.next_expect_value_skip ();
@@ -542,6 +543,7 @@ namespace brep
{
os << "action: " << pr.action;
os << ", pull_request { " << pr.pull_request << " }";
+ os << ", before: " << (pr.before ? *pr.before : "null");
os << ", repository { " << pr.repository << " }";
os << ", installation { " << pr.installation << " }";
diff --git a/mod/mod-ci-github-gh.hxx b/mod/mod-ci-github-gh.hxx
index b29904b..6ede697 100644
--- a/mod/mod-ci-github-gh.hxx
+++ b/mod/mod-ci-github-gh.hxx
@@ -183,6 +183,12 @@ namespace brep
string action;
gh_pull_request pull_request;
+
+ // The SHA of the previous commit on the head branch before the current
+ // one. Only present if action is "synchronize".
+ //
+ optional<string> before;
+
gh_repository repository;
gh_installation installation;
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx
index 3805445..fa701a8 100644
--- a/mod/mod-ci-github-gq.cxx
+++ b/mod/mod-ci-github-gq.cxx
@@ -436,7 +436,7 @@ namespace brep
ostringstream os;
- os << "mutation {" << '\n';
+ os << "mutation {" << '\n';
// Serialize a `createCheckRun` for the build.
//
@@ -472,7 +472,7 @@ namespace brep
<< " }" << '\n'
<< "}" << '\n';
- os << "}" << '\n';
+ os << "}" << '\n';
return os.str ();
}
@@ -553,8 +553,6 @@ namespace brep
assert (cr.state != build_state::built);
#endif
- // Empty details URL because it's not available until building.
- //
string rq (
gq_serialize_request (gq_mutation_create_check_runs (rid, hs, crs)));
diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx
index 9c23cd4..7c564d7 100644
--- a/mod/mod-ci-github-gq.hxx
+++ b/mod/mod-ci-github-gq.hxx
@@ -19,13 +19,10 @@ namespace brep
// GraphQL functions (all start with gq_).
//
- // Create a new check run on GitHub for each build with the build state
- // taken from each check_run object. Update `check_runs` with the new data
- // (node id and state_synced). Return false and issue diagnostics if the
- // request failed.
- //
- // Note: no details_url yet since there will be no entry in the build result
- // search page until the task starts building.
+ // Create a new check run on GitHub for each build with the build state,
+ // name, and details_url taken from each check_run object. Update
+ // `check_runs` with the new data (node id and state_synced). Return false
+ // and issue diagnostics if the request failed.
//
// Note that creating a check_run named `foo` will effectively replace any
// existing check_runs with that name. They will still exist on the GitHub
diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx
index 93a8895..cabd19a 100644
--- a/mod/mod-ci-github-service-data.hxx
+++ b/mod/mod-ci-github-service-data.hxx
@@ -35,8 +35,8 @@ namespace brep
optional<result_status> status; // Only if state is built & synced.
- // Never serialized. Only used by some of the GraphQL functions in
- // mod-ci-github-gq.hxx.
+ // Note: never serialized (only used to pass information to the GraphQL
+ // functions).
//
optional<string> details_url;
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
index 022abac..3bfee41 100644
--- a/mod/mod-ci-github.cxx
+++ b/mod/mod-ci-github.cxx
@@ -398,7 +398,8 @@ namespace brep
throw invalid_request (400, move (m));
}
- if (pr.action == "opened" || pr.action == "synchronize")
+ if (pr.action == "opened" ||
+ pr.action == "synchronize")
{
// opened
// A pull request was opened.
@@ -406,23 +407,78 @@ namespace brep
// synchronize
// A pull request's head branch was updated from the base branch or
// new commits were pushed to the head branch. (Note that there is
- // no equivalent event for the base branch. That case gets handled
- // in handle_check_suite_request() instead. @@ Not anymore.)
+ // no equivalent event for the base branch.)
//
- // Note that both cases are handled the same: we start a new CI
+ // Note that both cases are handled similarly: we start a new CI
// request which will be reported on the new commit id.
//
return handle_pull_request (move (pr), warning_success);
}
- else
+ else if (pr.action == "edited")
{
- // Ignore the remaining actions by sending a 200 response with empty
- // body.
+ // PR base branch changed (to a different branch) besides other
+ // irrelevant changes (title, body, etc).
//
- // @@ Ignore known but log unknown, as in check_suite above?
+ // This is in a sense a special case of the base branch moving. In
+ // that case we don't do anything (due to the head sharing problem)
+ // relying instead on the branch protection rule. So it makes sense
+ // to do the same here.
//
return true;
}
+ else if (pr.action == "closed")
+ {
+ // PR has been closed (as merged or not; see merged member). Also
+ // apparently received if base branch is deleted (and the same
+ // for head branch). See also the reopened event below.
+ //
+ // While it may seem natural to cancel the CI for the closed PR, it
+ // might actually be useful to have a completed CI record. GitHub
+ // doesn't prevent us from publishing CI results for the closed PR
+ // (even if both base and head branches were deleted). And if such a
+ // PR is reopened, the CI results remain.
+ //
+ return true;
+ }
+ else if (pr.action == "reopened")
+ {
+ // Previously closed PR has been reopened.
+ //
+ // Since we don't cancel the CI for a closed PR, there is nothing
+ // to do if it is reopened.
+ //
+ return true;
+ }
+ else if (pr.action == "assigned" ||
+ pr.action == "auto_merge_disabled" ||
+ pr.action == "auto_merge_enabled" ||
+ pr.action == "converted_to_draft" ||
+ pr.action == "demilestoned" ||
+ pr.action == "dequeued" ||
+ pr.action == "enqueued" ||
+ pr.action == "labeled" ||
+ pr.action == "locked" ||
+ pr.action == "milestoned" ||
+ pr.action == "ready_for_review" ||
+ pr.action == "review_request_removed" ||
+ pr.action == "review_requested" ||
+ pr.action == "unassigned" ||
+ pr.action == "unlabeled" ||
+ pr.action == "unlocked")
+ {
+ // These have no relation to CI.
+ //
+ return true;
+ }
+ else
+ {
+ // Ignore unknown actions by sending a 200 response with empty body
+ // but also log as an error since we want to notice new actions.
+ //
+ error << "unknown action '" << pr.action << "' in pull_request event";
+
+ return true;
+ }
}
else
{
@@ -507,15 +563,17 @@ namespace brep
//
// While it would have been nice to cancel CIs of PRs with this branch as
- // base not to waste resources, there are complications: Firsty, we can
- // only do this for remote PRs (since local PRs may share the result with
- // branch push). Secondly, we try to do our best even if the branch
- // protection rule for head behind is not enabled. In this case, it would
- // be good to complete the CI. So maybe/later.
+ // base not to waste resources, there are complications: Firstly, we can
+ // only do this for remote PRs (since local PRs will most likely share the
+ // result with branch push). Secondly, we try to do our best even if the
+ // branch protection rule for head behind is not enabled. In this case, it
+ // would be good to complete the CI. So maybe/later. See also the head
+ // case in handle_pull_request(), where we do cancel remote PRs that are
+ // not shared.
// Service id that uniquely identifies the CI tenant.
//
- string sid (cs.repository.node_id + ":" + cs.check_suite.head_sha);
+ string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha);
// If the user requests a rebuild of the (entire) PR, then this manifests
// as the check_suite rather than pull_request event. Specifically:
@@ -653,7 +711,8 @@ namespace brep
// Note that GitHub always posts a message to their GUI saying "You have
// successfully requested <check_run_name> be rerun", regardless of what
// HTTP status code we respond with. However we do return error status codes
- // when appropriate in case they start handling them someday.
+ // when there is no better option (like failing the conclusion) in case they
+ // start handling them someday.
//
bool ci_github::
handle_check_run_rerequest (const gh_check_run_event& cr,
@@ -663,37 +722,12 @@ namespace brep
l3 ([&]{trace << "check_run event { " << cr << " }";});
- // Get a new installation access token.
- //
- auto get_iat = [this, &trace, &error, &cr] ()
- -> optional<gh_installation_access_token>
- {
- optional<string> jwt (generate_jwt (trace, error));
- if (!jwt)
- return nullopt;
-
- optional<gh_installation_access_token> iat (
- obtain_installation_access_token (cr.installation.id,
- move (*jwt),
- error));
-
- if (iat)
- l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
-
- return iat;
- };
-
// The overall plan is as follows:
//
// 1. Load service data.
//
- // 2. If the tenant is archived, then fail (re-create) conclusion with
- // appropriate diagnostics.
- //
- // @@ TMP A PR might get blocked indefinitely by this. If the user
- // re-runs a single CR the "re-run all check runs" option disappears
- // from the UI. So the single re-run will keep failing but there
- // will be no way to start a new CI from scratch.
+ // 2. If the tenant is archived, then fail (re-create) both the check run
+ // and the conclusion with appropriate diagnostics.
//
// 3. If the check run is in the queued state, then do nothing.
//
@@ -711,12 +745,9 @@ namespace brep
//
// c. Clear the completed flag if true.
//
- // d. "Return" the service data to be used after the call.
- //
- // @@ TMP Not currently returning anything.
- //
// 6. If the result of rebuild() indicates the tenant is archived, then
- // fail (update) the conclusion check run with appropriate diagnostics.
+ // fail (update) both the check run and conclusion with appropriate
+ // diagnostics.
//
// 7. If original state is queued (no rebuild was scheduled), then fail
// (update) both the check run and the conclusion.
@@ -725,13 +756,32 @@ namespace brep
// practice we have to re-create as new check runs in order to replace the
// existing ones because GitHub does not allow transitioning out of the
// built state.
+
+ // Get a new installation access token.
//
+ auto get_iat = [this, &trace, &error, &cr] ()
+ -> optional<gh_installation_access_token>
+ {
+ optional<string> jwt (generate_jwt (trace, error));
+ if (!jwt)
+ return nullopt;
+
+ optional<gh_installation_access_token> iat (
+ obtain_installation_access_token (cr.installation.id,
+ move (*jwt),
+ error));
+
+ if (iat)
+ l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+ return iat;
+ };
const string& repo_node_id (cr.repository.node_id);
const string& head_sha (cr.check_run.check_suite.head_sha);
- // The build and conclusion check runs. They are sent to GitHub in a
- // single request (unless something goes wrong) so store them together
+ // Prepare the build and conclusion check runs. They are sent to GitHub in
+ // a single request (unless something goes wrong) so store them together
// from the outset.
//
vector<check_run> check_runs (2);
@@ -740,8 +790,8 @@ namespace brep
ccr.name = conclusion_check_run_name;
- // Load the service data, failing the conclusion check run if the tenant
- // has been archived.
+ // Load the service data, failing the check runs if the tenant has been
+ // archived.
//
service_data sd;
{
@@ -754,15 +804,36 @@ namespace brep
{
if (p->second) // Tenant is archived
{
- // Fail (re-create) the conclusion check run.
+ // Fail (re-create) the check runs.
//
optional<gh_installation_access_token> iat (get_iat ());
if (!iat)
throw server_error ();
gq_built_result br (
- make_built_result (result_status::error, warning_success,
- "Unable to rebuild: tenant has been archived"));
+ make_built_result (
+ result_status::error, warning_success,
+ "Unable to rebuild individual configuration: build has "
+ "been archived"));
+
+ // Try to update the conclusion check run even if the first update
+ // fails.
+ //
+ bool f (false); // Failed.
+
+ if (gq_create_check_run (error, bcr, iat->token,
+ repo_node_id, head_sha,
+ cr.check_run.details_url,
+ build_state::built, br))
+ {
+ l3 ([&]{trace << "created check_run { " << bcr << " }";});
+ }
+ else
+ {
+ error << "check_run " << cr.check_run.node_id
+ << ": unable to re-create check run";
+ f = true;
+ }
if (gq_create_check_run (error, ccr, iat->token,
repo_node_id, head_sha,
@@ -773,10 +844,17 @@ namespace brep
}
else
{
- fail << "check_run " << cr.check_run.node_id
- << ": unable to re-create conclusion check run";
+ error << "check_run " << cr.check_run.node_id
+ << ": unable to re-create conclusion check run";
+ f = true;
}
+ // Fail the handler if either of the check runs could not be
+ // updated.
+ //
+ if (f)
+ throw server_error ();
+
return true;
}
@@ -795,10 +873,9 @@ namespace brep
{
// No such tenant.
//
- error << "check run " << cr.check_run.node_id
- << " re-requested but tenant_service with id " << sid
- << " does not exist";
- return true;
+ fail << "check run " << cr.check_run.node_id
+ << " re-requested but tenant_service with id " << sid
+ << " does not exist";
}
}
@@ -809,7 +886,7 @@ namespace brep
if (system_clock::now () > sd.installation_access.expires_at)
{
- if (new_iat = get_iat ())
+ if ((new_iat = get_iat ()))
iat = &*new_iat;
else
throw server_error ();
@@ -821,11 +898,10 @@ namespace brep
//
if (cr.check_run.name == conclusion_check_run_name)
{
- // Must exist if it can be re-requested.
- //
- assert (sd.conclusion_node_id);
+ l3 ([&]{trace << "re-requested conclusion check_run";});
- l3 ([&]{trace << "ignoring re-requested conclusion check_run";});
+ if (!sd.conclusion_node_id)
+ fail << "no conclusion node id for check run " << cr.check_run.node_id;
gq_built_result br (
make_built_result (result_status::error, warning_success,
@@ -864,44 +940,50 @@ namespace brep
<< ": failed to extract build id from details_url";
}
- // Initialize `bcr` with the check run from the service data.
+ // Initialize the check run (`bcr`) with state from the service data.
//
{
// Search for the check run in the service data.
//
- bool found (false);
-
- for (const check_run& scr: sd.check_runs)
- {
- if (scr.node_id && *scr.node_id == cr.check_run.node_id)
- {
- found = true;
-
- // Do nothing if the build is already queued.
- //
- if (scr.state == build_state::queued)
- {
- l3 ([&]{trace << "ignoring already-queued check run";});
- return true;
- }
+ // Note that we look by name in case node id got replaced by a racing
+ // re-request (in which case we ignore this request).
+ //
+ auto i (find_if (sd.check_runs.begin (), sd.check_runs.end (),
+ [&cr] (const check_run& scr)
+ {
+ return scr.name == cr.check_run.name;
+ }));
- bcr.name = scr.name;
- bcr.build_id = scr.build_id;
- bcr.state = scr.state;
+ if (i == sd.check_runs.end ())
+ fail << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "re-requested but does not exist in service data";
- break;
- }
+ // Do nothing if node ids don't match.
+ //
+ if (i->node_id && *i->node_id != cr.check_run.node_id)
+ {
+ l3 ([&]{trace << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "node id has changed in service data";});
+ return true;
}
- if (!found)
+ // Do nothing if the build is already queued.
+ //
+ if (i->state == build_state::queued)
{
- fail << "check_run " << cr.check_run.node_id
- << ": re-requested but does not exist in service data";
+ l3 ([&]{trace << "ignoring already-queued check run";});
+ return true;
}
+
+ bcr.name = i->name;
+ bcr.build_id = i->build_id;
+ bcr.state = i->state;
}
// Transition the build and conclusion check runs out of the built state
- // by re-creating them.
+ // (or any other state) by re-creating them.
//
bcr.state = build_state::queued;
bcr.state_synced = false;
@@ -925,51 +1007,74 @@ namespace brep
<< ": unable to re-create build and conclusion check runs";
}
- // Request the rebuild.
+ // Request the rebuild and update service data.
+ //
+ bool race (false);
- // Function called by rebuild() to update the service data (but only if
- // the build is actually restarted).
+ // Callback function called by rebuild() to update the service data (but
+ // only if the build is actually restarted).
//
- auto update_sd = [&error, &new_iat,
+ auto update_sd = [&error, &new_iat, &race,
&cr, &bcr, &ccr] (const tenant_service& ts,
build_state) -> optional<string>
+ {
+ // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+ // being aborted) and so should not move out of its captures.
+
+ race = false; // Reset.
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
{
- // NOTE: this lambda may be called repeatedly (e.g., due to transaction
- // being aborted) and so should not move out of its captures.
+ error << "failed to parse service data: " << e;
+ return nullopt;
+ }
- service_data sd;
- try
- {
- sd = service_data (*ts.data);
- }
- catch (const invalid_argument& e)
- {
- error << "failed to parse service data: " << e;
- return nullopt;
- }
+ // Note that we again look by name in case node id got replaced by a
+ // racing re-request. In this case, however, it's impossible to decide
+ // who won that race, so let's fail the check suite to be on the safe
+ // side (in a sense, similar to the rebuild() returning queued below).
+ //
+ auto i (find_if (
+ sd.check_runs.begin (), sd.check_runs.end (),
+ [&cr] (const check_run& scr)
+ {
+ return scr.name == cr.check_run.name;
+ }));
- for (check_run& scr: sd.check_runs)
- {
- if (scr.node_id && *scr.node_id == cr.check_run.node_id)
- {
- scr = bcr; // Update with new node_id, state, state_synced.
- sd.conclusion_node_id = ccr.node_id;
+ if (i == sd.check_runs.end ())
+ {
+ error << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "re-requested but does not exist in service data";
+ return nullopt;
+ }
- sd.completed = false;
+ if (i->node_id && *i->node_id != cr.check_run.node_id)
+ {
+ // Keep the old conclusion node id to make sure any further state
+ // transitions are ignored. A bit of a hack.
+ //
+ race = true;
+ return nullopt;
+ }
- // Save the IAT if we created a new one.
- //
- if (new_iat)
- sd.installation_access = *new_iat;
+ *i = bcr; // Update with new node_id, state, state_synced.
- return sd.json ();
- }
- }
+ sd.conclusion_node_id = ccr.node_id;
+ sd.completed = false;
- assert (false); // Check run must exist because we found it earlier.
+ // Save the IAT if we created a new one.
+ //
+ if (new_iat)
+ sd.installation_access = *new_iat;
- return nullopt;
- };
+ return sd.json ();
+ };
optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd));
@@ -978,417 +1083,88 @@ namespace brep
// conclusion check run. Otherwise the build has been successfully
// re-enqueued so do nothing further.
//
- if (!bs || *bs == build_state::queued)
- {
- // @@ TMP Diverging from the plan here to fail both CRs in the
- // already-archived case as well. Seems better to not leave the
- // build CR in the queued state.
- //
- gq_built_result bbr; // Built result for the build check run.
- gq_built_result cbr; // Built result for the conclusion check run.
-
- if (!bs) // Archived
- {
- // The build has expired since we loaded the service data. Most likely
- // the tenant has been archived.
- //
- bbr = make_built_result (
- result_status::error, warning_success,
- "Unable to rebuild: tenant has been archived or no such build");
-
- cbr = bbr;
- }
- else // *bs == build_state::queued
- {
- // This build has been re-enqueued since we first loaded the service
- // data. This could happen if the user clicked "re-run" multiple times
- // and another handler won the rebuild() race.
- //
- // However the winner of the check runs race cannot be determined.
- //
- // Best case the other handler won the check runs race as well and
- // thus everything will proceed normally. Our check runs will be
- // invisible and disregarded.
- //
- // Worst case we won the check runs race and the other handler's check
- // runs -- the ones that will be updated by the build_*()
- // notifications -- are no longer visible, leaving things quite
- // broken.
- //
- // Either way, we fail our check runs. In the best case scenario it
- // will have no effect; in the worst case scenario it lets the user
- // know something has gone wrong.
- //
- // @@ TMP In an attempt to recover, the user might end up clicking
- // re-run a few times but nothing will change in the UI because the
- // new check runs will only be created once the re-enqueued build
- // has entered the building state.
- //
- bbr = make_built_result (
- result_status::error, warning_success,
- "Unable to rebuild at this time, please try again in a few minutes");
+ if (!race && bs && *bs != build_state::queued)
+ return true;
- cbr = make_built_result (result_status::error, warning_success,
- "Failed to rebuild check run(s)");
- }
+ gq_built_result br; // Built result for both check runs.
- // True if we successfully updated both of the check runs.
+ if (race || bs) // Race or re-enqueued.
+ {
+ // The re-enqueued case: this build has been re-enqueued since we first
+ // loaded the service data. This could happen if the user clicked
+ // "re-run" multiple times and another handler won the rebuild() race.
//
- bool bu (true); // Both updated
-
- // Fail the build check run.
+ // However the winner of the check runs race cannot be determined.
//
- if (gq_update_check_run (error, bcr, iat->token,
- repo_node_id, *bcr.node_id,
- nullopt /* details_url */,
- build_state::built, move (bbr)))
- {
- l3 ([&]{trace << "updated check_run { " << bcr << " }";});
- }
- else
- {
- bu = false;
-
- error << "check run " << cr.check_run.node_id
- << ": unable to update (replacement) check run "
- << *bcr.node_id;
- }
-
- // Fail the conclusion check run.
+ // Best case the other handler won the check runs race as well and
+ // thus everything will proceed normally. Our check runs will be
+ // invisible and disregarded.
//
- if (gq_update_check_run (error, ccr, iat->token,
- repo_node_id, *ccr.node_id,
- nullopt /* details_url */,
- build_state::built, move (cbr)))
- {
- l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
- }
- else
- {
- bu = false;
-
- error << "check run " << cr.check_run.node_id
- << ": unable to update conclusion check run " << *ccr.node_id;
- }
-
- // Fail the handler if either of the check runs could not be updated.
+ // Worst case we won the check runs race and the other handler's check
+ // runs -- the ones that will be updated by the build_*() notifications
+ // -- are no longer visible, leaving things quite broken.
//
- if (!bu)
- throw server_error ();
-
- return true;
+ // Either way, we fail our check runs. In the best case scenario it
+ // will have no effect; in the worst case scenario it lets the user
+ // know something has gone wrong.
+ //
+ br = make_built_result (result_status::error, warning_success,
+ "Unable to rebuild, try again");
}
-
- // The build has successfully been re-enqueued.
-
- return true;
- }
-
- // @@ TMP
- //
-#if 0
- bool ci_github::
- handle_check_run_rerequest (const gh_check_run_event& cr,
- bool warning_success)
- {
- HANDLER_DIAG;
-
- l3 ([&]{trace << "check_run event { " << cr << " }";});
-
- // Fail if this is the conclusion check run.
- //
- if (cr.check_run.name == conclusion_check_run_name)
+ else // Archived.
{
- // @@ Fail conclusion check run with appropriate message and reurn
- // true.
-
- l3 ([&]{trace << "ignoring conclusion check_run";});
-
- // 422 Unprocessable Content: The request was well-formed (i.e.,
- // syntactically correct) but could not be processed.
+ // The build has expired since we loaded the service data. Most likely
+ // the tenant has been archived.
//
- throw invalid_request (422, "Conclusion check run cannot be rebuilt");
+ br = make_built_result (
+ result_status::error, warning_success,
+ "Unable to rebuild individual configuration: build has been archived");
}
- // Get a new installation access token.
+ // Try to update the conclusion check run even if the first update fails.
//
- auto get_iat = [this, &trace, &error, &cr] ()
- -> optional<gh_installation_access_token>
- {
- optional<string> jwt (generate_jwt (trace, error));
- if (!jwt)
- return nullopt;
-
- optional<gh_installation_access_token> iat (
- obtain_installation_access_token (cr.installation.id,
- move (*jwt),
- error));
-
- if (iat)
- l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+ bool f (false); // Failed.
- return iat;
- };
-
- // Create a new conclusion check run, replacing the existing one.
+ // Fail the build check run.
//
- // Return the check run on success or nullopt on failure.
- //
- auto create_conclusion_cr =
- [&cr, &error, warning_success] (const gh_installation_access_token& iat,
- build_state bs,
- optional<result_status> rs = nullopt,
- optional<string> msg = nullopt)
- -> optional<check_run>
+ if (gq_update_check_run (error, bcr, iat->token,
+ repo_node_id, *bcr.node_id,
+ nullopt /* details_url */,
+ build_state::built, br))
{
- optional<gq_built_result> br;
- if (rs)
- {
- assert (msg);
-
- br = make_built_result (*rs, warning_success, move (*msg));
- }
-
- check_run r;
- r.name = conclusion_check_run_name;
-
- if (gq_create_check_run (error, r, iat.token,
- rni, hs,
- nullopt /* details_url */,
- bs, move (br)))
- {
- return r;
- }
- else
- return nullopt;
- };
-
- // The overall plan is as follows:
- //
- // 1. Call the rebuild() function to attempt to schedule a rebuild. Pass
- // the update function that does the following (if called):
- //
- // a. Update the check run being rebuilt (may also not exist).
- //
- // b. Clear the completed flag if true.
- //
- // c. "Return" the service data to be used after the call.
- //
- // 2. If the result of rebuild() indicates the tenant is archived, fail
- // the conclusion check run with appropriate diagnostics.
- //
- // 3. If original state is queued, then no rebuild was scheduled and we do
- // nothing.
- //
- // 4. Otherwise (the original state is building or built):
- //
- // a. Change the check run state to queued.
- //
- // b. Change the conclusion check run to building (do unconditionally
- // to mitigate races).
- //
- // Note that while conceptually we are updating existing check runs, in
- // practice we have to create new check runs to replace the existing ones
- // because GitHub does not allow transitioning out of the built state.
- //
- // This results in a new node id for each check run but we can't save them
- // to the service data after the rebuild() call. As a workaround, when
- // updating the service data we 1) clear the re-requested check run's node
- // id and set the state_synced flag to true to signal to build_building()
- // and build_built() that it needs to create a new check run; and 2) clear
- // the conclusion check run's node id to cause build_built() to create a
- // new conclusion check run. And these two check runs' node ids will be
- // saved to the service data.
-
- // Parse the check_run's details_url to extract build id.
- //
- // While this is a bit hackish, there doesn't seem to be a better way
- // (like associating custom data with a check run). Note that the GitHub
- // UI only allows rebuilding completed check runs, so the details URL
- // should be there.
- //
- optional<build_id> bid (parse_details_url (cr.check_run.details_url));
- if (!bid)
+ l3 ([&]{trace << "updated check_run { " << bcr << " }";});
+ }
+ else
{
- fail << "check run " << cr.check_run.node_id
- << ": failed to extract build id from details_url";
+ error << "check run " << cr.check_run.node_id
+ << ": unable to update (replacement) check run "
+ << *bcr.node_id;
+ f = true;
}
- // The IAT retrieved from the service data.
- //
- optional<gh_installation_access_token> iat;
-
- // True if the check run exists in the service data.
+ // Fail the conclusion check run.
//
- bool cr_found (false);
-
- // Update the state of the check run in the service data. Return (via
- // captured references) the IAT and whether the check run was found.
- //
- // Called by rebuild(), but only if the build is actually restarted.
- //
- auto update_sd = [&iat,
- &cr_found,
- &error,
- &cr] (const tenant_service& ts, build_state)
- -> optional<string>
- {
- // NOTE: this lambda may be called repeatedly (e.g., due to transaction
- // being aborted) and so should not move out of its captures.
-
- service_data sd;
- try
- {
- sd = service_data (*ts.data);
- }
- catch (const invalid_argument& e)
- {
- error << "failed to parse service data: " << e;
- return nullptr;
- }
-
- if (!iat)
- iat = sd.installation_access;
-
- // If the re-requested check run is found, update it in the service
- // data.
- //
- const string& nid (cr.check_run.node_id);
-
- for (check_run& cr: sd.check_runs)
- {
- if (cr.node_id && *cr.node_id == nid)
- {
- cr_found = true;
- cr.state = build_state::queued;
- sd.completed = false;
-
- // Clear the check run node ids and set state_synced to true to
- // cause build_building() and/or build_built() to create new check
- // runs (see the plan above for details).
- //
- cr.node_id = nullopt;
- cr.state_synced = true;
- sd.conclusion_node_id = nullopt;
-
- return sd.json ();
- }
- }
-
- return nullopt;
- };
-
- optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd));
-
- if (!bs)
- {
- // Build has expired (most probably the tenant has been archived).
- //
- // Update the conclusion check run to notify the user (but have to
- // replace it with a new one because we don't know the existing one's
- // node id).
- //
- optional<gh_installation_access_token> iat (get_iat ());
- if (!iat)
- throw server_error ();
-
- if (optional<check_run> ccr = create_conclusion_cr (
- *iat,
- build_state::built,
- result_status::error,
- "Unable to rebuild: tenant has been archived or no such build"))
- {
- l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";});
- }
- else
- {
- // Log the error and return failure to GitHub which will presumably
- // indicate this in its GUI.
- //
- fail << "check run " << cr.check_run.node_id
- << ": unable to create conclusion check run";
- }
- }
- else if (*bs == build_state::queued)
+ if (gq_update_check_run (error, ccr, iat->token,
+ repo_node_id, *ccr.node_id,
+ nullopt /* details_url */,
+ build_state::built, move (br)))
{
- // The build was already queued so nothing to be done. This might happen
- // if the user clicked "re-run" multiple times before we managed to
- // update the check run.
+ l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
}
else
{
- // The build has been requeued.
- //
- assert (*bs == build_state::building || *bs == build_state::built);
-
- if (!cr_found)
- {
- // Respond with an error otherwise GitHub will post a message in its
- // GUI saying "you have successfully requested a rebuild of ..."
- //
- fail << "check_run " << cr.check_run.node_id
- << ": build restarted but check run does not exist "
- << "in service data";
- }
-
- // Get a new IAT if the one from the service data has expired.
- //
- assert (iat.has_value ());
-
- if (system_clock::now () > iat->expires_at)
- {
- iat = get_iat ();
- if (!iat)
- throw server_error ();
- }
-
- // Update (by replacing) the re-requested and conclusion check runs to
- // queued and building, respectively.
- //
- // If either fails we can only log the error but build_building() and/or
- // build_built() should correct the situation (see above for details).
- //
-
- // Update re-requested check run.
- //
- {
- check_run ncr; // New check run.
- ncr.name = cr.check_run.name;
-
- if (gq_create_check_run (error,
- ncr,
- iat->token,
- cr.repository.node_id,
- cr.check_run.check_suite.head_sha,
- cr.check_run.details_url,
- build_state::queued))
- {
- l3 ([&]{trace << "created check_run { " << ncr << " }";});
- }
- else
- {
- error << "check_run " << cr.check_run.node_id
- << ": unable to create (to update) check run in queued state";
- }
- }
-
- // Update conclusion check run.
- //
- if (optional<check_run> ccr =
- create_conclusion_cr (*iat, build_state::building))
- {
- l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";});
- }
- else
- {
- error << "check_run " << cr.check_run.node_id
- << ": unable to create (to update) conclusion check run";
- }
+ error << "check run " << cr.check_run.node_id
+ << ": unable to update conclusion check run " << *ccr.node_id;
+ f = true;
}
+ // Fail the handler if either of the check runs could not be updated.
+ //
+ if (f)
+ throw server_error ();
+
return true;
}
-#endif
// Miscellaneous pull request facts
//
@@ -1408,30 +1184,6 @@ namespace brep
// gets updated with the head commit's SHA and check_suite.pull_requests[]
// will contain all PRs with this branch as head.
//
- // Remaining TODOs
- //
- // - @@ TODO? PR base branch changed (to a different branch)
- //
- // => pull_request(edited)
- //
- // - PR closed @@ TODO
- //
- // Also received if base branch is deleted. (And presumably same for head
- // branch.)
- //
- // => pull_request(closed)
- //
- // Cancel CI?
- //
- // - PR merged @@ TODO
- //
- // => pull_request(merged)
- //
- // => check_suite(PR_base)
- //
- // Probably wouldn't want to CI the base again because the PR CI would've
- // done the equivalent already.
- //
bool ci_github::
handle_pull_request (gh_pull_request_event pr, bool warning_success)
{
@@ -1456,12 +1208,6 @@ namespace brep
l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
- // Note that similar to the branch push case above, while it would have
- // been nice to cancel the previous CI job once the PR head moves (the
- // "synchronize" event), due to the head sharing problem the previous CI
- // job might actually still be relevant (in both local and remote PR
- // cases).
-
// Distinguish between local and remote PRs by comparing the head and base
// repositories' paths.
//
@@ -1470,6 +1216,48 @@ namespace brep
? service_data::local
: service_data::remote);
+ // Note that similar to the branch push case above, while it would have
+ // been nice to cancel the previous CI job once the PR head moves (the
+ // "synchronize" event), due to the head sharing problem the previous CI
+ // job might actually still be relevant (in both local and remote PR
+ // cases). So we only do it for the remote PRs and only if the head is not
+ // shared (via tenant reference counting).
+ //
+ if (kind == service_data::remote && pr.action == "synchronize")
+ {
+ if (pr.before)
+ {
+ // Service id that will uniquely identify the CI tenant.
+ //
+ string sid (pr.repository.node_id + ':' + *pr.before);
+
+ if (optional<tenant_service> ts = cancel (error, warn,
+ verb_ ? &trace : nullptr,
+ *build_db_, retry_,
+ "ci-github", sid,
+ true /* ref_count */))
+ {
+ l3 ([&]{trace << "pull request " << pr.pull_request.node_id
+ << ": attempted to cancel CI of previous head commit"
+ << " (ref_count: " << ts->ref_count << ')';});
+ }
+ else
+ {
+ // It's possible that there was no CI for the previous commit for
+ // various reasons (e.g., CI was not enabled).
+ //
+ l3 ([&]{trace << "pull request " << pr.pull_request.node_id
+ << ": failed to cancel CI of previous head commit "
+ << "with tenant_service id " << sid;});
+ }
+ }
+ else
+ {
+ error << "pull request " << pr.pull_request.node_id
+ << ": before commit is missing in synchronize event";
+ }
+ }
+
// Note: for remote PRs the check_sha will be set later, in
// build_unloaded_pre_check(), to test merge commit id.
//
@@ -1535,6 +1323,8 @@ namespace brep
build_unloaded (tenant_service&& ts,
const diag_epilogue& log_writer) const noexcept
{
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
service_data sd;
@@ -1558,6 +1348,8 @@ namespace brep
service_data&& sd,
const diag_epilogue& log_writer) const noexcept
{
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
// We get here for PRs only (but both local and remote). The overall
@@ -1626,7 +1418,7 @@ namespace brep
// Service id that will uniquely identify the CI tenant.
//
- string sid (sd.repository_node_id + ":" + sd.report_sha);
+ string sid (sd.repository_node_id + ':' + sd.report_sha);
// Create an unloaded CI tenant, doing nothing if one already exists
// (which could've been created by a head branch push or another PR
@@ -1701,6 +1493,8 @@ namespace brep
service_data&& sd,
const diag_epilogue& log_writer) const noexcept
{
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
// Load the tenant, which is essentially the same for both branch push and
@@ -1986,6 +1780,8 @@ namespace brep
const build_queued_hints& hs,
const diag_epilogue& log_writer) const noexcept
{
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
service_data sd;
@@ -2154,6 +1950,8 @@ namespace brep
const build& b,
const diag_epilogue& log_writer) const noexcept
{
+ // NOTE: this function is noexcept and should not throw.
+
NOTIFICATION_DIAG (log_writer);
service_data sd;
@@ -2300,11 +2098,13 @@ namespace brep
const build& b,
const diag_epilogue& log_writer) const noexcept
{
+ // NOTE: this function is noexcept and should not throw.
+
+ NOTIFICATION_DIAG (log_writer);
+
// @@ TODO Include service_data::event_node_id and perhaps ts.id in
// diagnostics? E.g. when failing to update check runs we print the
// build ID only.
- //
- NOTIFICATION_DIAG (log_writer);
service_data sd;
try
@@ -2713,22 +2513,22 @@ namespace brep
url u (details_url);
- if (!u.query || !u.path || u.path->size () <= 1)
- return nullopt;
-
build_id r;
// Extract the tenant from the URL path.
//
// Example path: @d2586f57-21dc-40b7-beb2-6517ad7917dd
//
- r.package.tenant = u.path->substr (1);
-
- if (r.package.tenant.empty ())
+ if (!u.path || u.path->size () != 37 || (*u.path)[0] != '@')
return nullopt;
+ r.package.tenant = u.path->substr (1);
+
// Extract the rest of the build_id members from the URL query.
//
+ if (!u.query)
+ return nullopt;
+
bool pn (false), pv (false), tg (false), tc (false), pc (false),
th (false);
@@ -2747,21 +2547,25 @@ namespace brep
++vp; // Skip '='
- const char* ve (ep ? ep : vp + strlen (vp)); // Value end pointer.
+ const char* ve (ep != nullptr ? ep : vp + strlen (vp)); // Value end.
// Get the value as-is or URL-decode it.
//
- auto getval = [vp, ve] () { return string (vp, ve); };
+ auto rawval = [vp, ve] () { return string (vp, ve); };
auto decval = [vp, ve] () { return mime_url_decode (vp, ve); };
auto make_version = [] (string&& v)
- { return canonical_version (brep::version (move (v))); };
+ {
+ return canonical_version (brep::version (move (v)));
+ };
auto c = [&n] (bool& b, const char* s)
- { return n == s ? (b = true) : false; };
+ {
+ return n == s ? (b = true) : false;
+ };
if (c (pn, "builds")) r.package.name = package_name (decval ());
- else if (c (pv, "pv")) r.package.version = make_version (getval ());
+ else if (c (pv, "pv")) r.package.version = make_version (rawval ());
else if (c (tg, "tg")) r.target = target_triplet (decval ());
else if (c (tc, "tc")) r.target_config_name = decval ();
else if (c (pc, "pc")) r.package_config_name = decval ();
@@ -2769,7 +2573,7 @@ namespace brep
{
// Toolchain name and version. E.g. "public-0.17.0"
- string v (getval ());
+ string v (rawval ());
// Note: parsing code based on mod/mod-builds.cxx.
//
@@ -2781,7 +2585,7 @@ namespace brep
r.toolchain_version = make_version (v.substr (p + 1));
}
- qp = ep ? ep + 1 : nullptr;
+ qp = ep != nullptr ? ep + 1 : nullptr;
}
if (!pn || !pv || !tg || !tc || !pc || !th)
@@ -2789,7 +2593,7 @@ namespace brep
return r;
}
- catch (const invalid_argument&)
+ catch (const invalid_argument&) // Invalid url, brep::version, etc.
{
return nullopt;
}