aboutsummaryrefslogtreecommitdiff
path: root/mod/mod-ci-github-gq.cxx
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2025-02-10 11:34:44 +0200
committerFrancois Kritzinger <francois@codesynthesis.com>2025-02-13 13:51:21 +0200
commitd07f49cab9e46d5dc8084a9d482a62b0a2b28093 (patch)
tree10e22e5c416df6d762a6f0f9ad22d66d6995e92c /mod/mod-ci-github-gq.cxx
parent4abf6fa163c388296c0ed3a45eca267c377f1e73 (diff)
ci-github: Handle HTTP 502 (bad gateway) when creating check runs
Do so by fetching latest check runs from GitHub and failing if they're not all there.
Diffstat (limited to 'mod/mod-ci-github-gq.cxx')
-rw-r--r--mod/mod-ci-github-gq.cxx357
1 files changed, 310 insertions, 47 deletions
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx
index 7abd709..99c04f4 100644
--- a/mod/mod-ci-github-gq.cxx
+++ b/mod/mod-ci-github-gq.cxx
@@ -19,6 +19,7 @@ namespace brep
static const string& gq_name (const string&);
static string gq_name (string&&);
static string gq_str (const string&);
+ static string gq_int (uint64_t);
static string gq_bool (bool);
static const string& gq_enum (const string&);
static string gq_enum (string&&);
@@ -187,7 +188,7 @@ namespace brep
// }
//
static vector<gh_check_run>
- gq_parse_response_check_runs (json::parser& p)
+ gq_parse_mutate_check_runs_response (json::parser& p)
{
using event = json::event;
@@ -223,20 +224,198 @@ namespace brep
return r;
}
- // Send a GraphQL mutation request `rq` that creates or updates one or more
- // check runs. The requested build state is taken from each check_run
- // object. Update the check runs in `crs` with the new data (state, node ID
- // if unset, and state_synced). Return false and issue diagnostics if the
- // request failed.
+ // Serialize a query that fetches the most recent check runs on a commit.
//
+ static string
+ gq_query_get_check_runs (const string& ri, // Repository id
+ const string& ci, // Commit id
+ uint64_t ai, // App id
+ size_t cn) // Check run count
+ {
+
+ ostringstream os;
+
+ os << "query {" << '\n';
+
+ // Get the repository node.
+ //
+ os << "node(id: " << gq_str (ri) << ") {" << '\n'
+ << "... on Repository {" << '\n';
+
+ // Get the commit object.
+ //
+ os << " object(oid: " << gq_str (ci) << ") {" << '\n'
+ << " ... on Commit {" << '\n';
+
+ // Get the check suites on the commit, filtering by our app id. (Note that
+ // as a result there should never be more than one check suite; see
+ // below.)
+ //
+ os << " checkSuites(first: 1" << '\n'
+ << " filterBy: {appId: " << gq_int (ai) << "}) {" << '\n'
+ << " edges { node {" << '\n';
+
+ // Get the check suite's last N check runs (last:).
+ //
+ // Filter by App id because apparently an App can create check runs in
+ // another App's check suite.
+ //
+ // Also ask for the latest check runs only (checkType: LATEST) otherwise
+ // we could receive multiple check runs with the same name. Although this
+ // appears to be the default it's not documented anywhere so best make it
+ // explicit.
+ //
+ // Note that the selection set (fields to be returned) must match that of
+ // the check run mutations (create/update) generated by
+ // gq_mutation_{create,update}_check_runs().
+ //
+ os << " checkRuns(last: " << gq_int (cn) << '\n'
+ << " filterBy: {appId: " << gq_int (ai) << '\n'
+ << " checkType: LATEST}) {" << '\n'
+ << " edges { node { node_id: id name status } }" << '\n'
+ << " }" /* checkRuns */ << '\n'
+ << " } }" /* node, edges */ << '\n'
+ << " }" /* checkSuites */ << '\n'
+ << " }" /* ... on Commit */ << '\n'
+ << " }" /* object */ << '\n'
+ << "}" /* ... on Repository */ << '\n'
+ << "}" /* node */ << '\n';
+
+ os << '}' /* query */ << '\n';
+
+ return os.str ();
+ }
+
+ // Parse a response to a "get check runs for repository/commit" GraphQL
+ // query as constructed by gq_query_get_check_runs().
+ //
+ // Note that there might be other check suites on this commit but they will
+ // all have been created by other apps (GitHub never creates more than one
+ // check suite per app). Therefore our query filters by app id and as a
+ // result there should never be more than one check suite in the response.
+ //
+ // Throw invalid_json_input.
+ //
+ // Example response (only the part we need to parse here):
+ //
+ // {
+ // "node": {
+ // "object":{
+ // "checkSuites":{
+ // "edges":[
+ // {"node":{
+ // "checkRuns":{
+ // "edges":[
+ // {"node":{"id":"CR_kwDOLc8CoM8AAAAImvJPfw",
+ // "name":"check_run0",
+ // "status":"QUEUED"}},
+ // {"node":{"id":"CR_kwDOLc8CoM8AAAAImvJP_Q",
+ // "name":"check_run1",
+ // "status":"QUEUED"}}
+ // ]
+ // }
+ // }
+ // }
+ // ]
+ // }
+ // }
+ // }
+ // }
+ //
+ static vector<gh_check_run>
+ gq_parse_get_check_runs_response (json::parser& p)
+ {
+ using event = json::event;
+
+ vector<gh_check_run> r;
+
+ gq_parse_response (p, [&r] (json::parser& p)
+ {
+ p.next_expect (event::begin_object); // Outermost {
+
+ p.next_expect_member_object ("node"); // Repository node
+ p.next_expect_member_object ("object"); // Commmit
+ p.next_expect_member_object ("checkSuites");
+ p.next_expect_member_array ("edges"); // Check suites array
+ p.next_expect (event::begin_object); // Check suite outer {
+ p.next_expect_member_object ("node");
+ p.next_expect_member_object ("checkRuns");
+ p.next_expect_member_array ("edges"); // Check runs array
+
+ // Parse the check run elements of the `edges` array. E.g.:
+ //
+ // {
+ // "node":{
+ // "node_id":"CR_kwDOLc8CoM8AAAAIobBFlA",
+ // "name":"CONCLUSION",
+ // "status":"IN_PROGRESS"
+ // }
+ // }
+ //
+ while (p.next_expect (event::begin_object, event::end_array))
+ {
+ p.next_expect_name ("node");
+ r.emplace_back (p); // Parse check run: { members... }
+ p.next_expect (event::end_object);
+ }
+
+ p.next_expect (event::end_object); // checkRuns
+ p.next_expect (event::end_object); // Check suite node
+ p.next_expect (event::end_object); // Check suite outer }
+ p.next_expect (event::end_array); // Check suites edges
+ p.next_expect (event::end_object); // checkSuites
+ p.next_expect (event::end_object); // Commit
+ p.next_expect (event::end_object); // Repository node
+
+ p.next_expect (event::end_object); // Outermost }
+ });
+
+ return r;
+ }
+
+ // Serialize a GraphQL operation (query/mutation) into a GraphQL request.
+ //
+ // This is essentially a JSON object with a "query" string member containing
+ // the GraphQL operation. For example:
+ //
+ // { "query": "mutation { cr0:createCheckRun(... }" }
+ //
+ static string
+ gq_serialize_request (const string& o)
+ {
+ string b;
+ json::buffer_serializer s (b);
+
+ s.begin_object ();
+ s.member ("query", o);
+ s.end_object ();
+
+ return b;
+ }
+
+ // Send a GraphQL mutation request `rq` that creates (create=true) or
+ // updates (create=false) one or more check runs. The requested build state
+ // is taken from each check_run object. Update the check runs in `crs` with
+ // the new data (state, node ID if unset, and state_synced). Return false
+ // and issue diagnostics if the request failed.
+ //
+ struct gq_create_data
+ {
+ reference_wrapper<const string> repository_id;
+ reference_wrapper<const string> head_sha;
+ uint64_t app_id;
+ };
+
static bool
gq_mutate_check_runs (const basic_mark& error,
vector<check_run>& crs,
const string& iat,
- string rq)
+ string rq,
+ const optional<gq_create_data>& create_data)
{
vector<gh_check_run> rcrs;
+ const char* what (nullptr);
try
{
// Response type which parses a GraphQL response containing multiple
@@ -247,16 +426,93 @@ namespace brep
vector<gh_check_run> check_runs; // Received check runs.
resp (json::parser& p)
- : check_runs (gq_parse_response_check_runs (p)) {}
+ : check_runs (gq_parse_mutate_check_runs_response (p)) {}
resp () = default;
} rs;
+ what = create_data ? "create" : "update";
uint16_t sc (github_post (rs,
"graphql", // API Endpoint.
strings {"Authorization: Bearer " + iat},
move (rq)));
+ // Turns out it's not uncommon to not get a reply from GitHub if the
+ // number of check runs being created in build_queued() is large. The
+ // symptom is a 502 (Bad gateway) reply from GitHub and the theory being
+ // that their load balancer drops the connection if the request is not
+ // handled within a certain time. Note that in this case the check runs
+ // are still created on GitHub, we just don't get the reply (and thus
+ // their node ids). So we try to re-query that information.
+ //
+ // @@ TODO Update comment to say not all CRs are always created and
+ // describe recovery process.
+ //
+ optional<uint16_t> sc1;
+ if (sc == 502 && create_data)
+ {
+ what = "re-query";
+
+ // GraphQL query which fetches the most recently-created check runs.
+ //
+ string rq (gq_serialize_request (
+ gq_query_get_check_runs (create_data->repository_id,
+ create_data->head_sha,
+ create_data->app_id,
+ crs.size ())));
+
+ // Type that parses the result of the above GraphQL query.
+ //
+ struct resp
+ {
+ vector<gh_check_run> check_runs; // Received check runs.
+
+ resp (json::parser& p)
+ : check_runs (gq_parse_get_check_runs_response (p)) {}
+
+ resp () = default;
+ } rs1;
+
+ sc1 = github_post (rs1,
+ "graphql", // API Endpoint.
+ strings {"Authorization: Bearer " + iat},
+ move (rq));
+
+ if (*sc1 == 200)
+ {
+ size_t n (rs1.check_runs.size ());
+
+ if (n == crs.size ())
+ {
+ // It's possible GitHub did not create all the checkruns we have
+ // requested. In which case it may return some unrelated checkruns
+ // (for example, from before re-request). So we verify we got the
+ // expected ones.
+ //
+ size_t i (0);
+ for (; i != n; ++i)
+ {
+ const check_run& cr (crs[i]);
+ const gh_check_run& gcr (rs1.check_runs[i]);
+
+ if (cr.name != gcr.name ||
+ cr.state != gh_from_status (gcr.status))
+ break;
+ }
+
+ if (i == n)
+ {
+ rs.check_runs = move (rs1.check_runs);
+
+ // Reduce to as-if the create request succeeded.
+ //
+ what = "create";
+ sc = 200;
+ }
+ }
+ }
+ }
+
if (sc == 200)
{
rcrs = move (rs.check_runs);
@@ -298,57 +554,52 @@ namespace brep
error << "unexpected number of check_run objects in response";
}
else
- error << "failed to mutate check runs: error HTTP response status "
- << sc;
+ {
+ diag_record dr (error);
+
+ dr << "failed to " << what << " check runs: error HTTP response status "
+ << sc;
+
+ if (sc1)
+ {
+ if (*sc1 != 200)
+ dr << error << "failed to re-query check runs: error HTTP "
+ << "response status " << *sc1;
+ else
+ dr << error << "unexpected number of check_run objects in "
+ << "re-query response";
+ }
+ }
}
catch (const json::invalid_json_input& e) // struct resp (via github_post())
{
// Note: e.name is the GitHub API endpoint.
//
- error << "malformed JSON in response from " << e.name << ", line: "
- << e.line << ", column: " << e.column << ", byte offset: "
- << e.position << ", error: " << e;
+ error << "malformed JSON in " << what << " response from " << e.name
+ << ", line: " << e.line << ", column: " << e.column
+ << ", byte offset: " << e.position
+ << ", error: " << e;
}
catch (const invalid_argument& e) // github_post()
{
- error << "malformed header(s) in response: " << e;
+ error << "malformed header(s) in " << what << " response: " << e;
}
catch (const system_error& e) // github_post()
{
- error << "unable to mutate check runs (errno=" << e.code () << "): "
- << e.what ();
+ error << "unable to " << what << " check runs (errno=" << e.code ()
+ << "): " << e.what ();
}
- catch (const runtime_error& e) // gq_parse_response_check_runs()
+ catch (const runtime_error& e) // gq_parse_{mutate,get}_check_runs_response()
{
// GitHub response contained error(s) (could be ours or theirs at this
// point).
//
- error << "unable to mutate check runs: " << e;
+ error << "unable to " << what << " check runs: " << e;
}
return false;
}
- // Serialize a GraphQL operation (query/mutation) into a GraphQL request.
- //
- // This is essentially a JSON object with a "query" string member containing
- // the GraphQL operation. For example:
- //
- // { "query": "mutation { cr0:createCheckRun(... }" }
- //
- static string
- gq_serialize_request (const string& o)
- {
- string b;
- json::buffer_serializer s (b);
-
- s.begin_object ();
- s.member ("query", o);
- s.end_object ();
-
- return b;
- }
-
// Serialize `createCheckRun` mutations for one or more builds to GraphQL.
//
// The check run parameters (names, build states, details_urls, etc.) are
@@ -496,7 +747,6 @@ namespace brep
return os.str ();
}
-
// Serialize an `updateCheckRun` mutation for one build to GraphQL.
//
// The `br` argument is required if the check run status is completed
@@ -571,7 +821,8 @@ namespace brep
vector<check_run>& crs,
const string& iat,
const string& rid,
- const string& hs)
+ const string& hs,
+ uint64_t ai)
{
// No support for result_status so state cannot be built.
//
@@ -583,7 +834,11 @@ namespace brep
string rq (
gq_serialize_request (gq_mutation_create_check_runs (rid, hs, crs)));
- return gq_mutate_check_runs (error, crs, iat, move (rq));
+ return gq_mutate_check_runs (error,
+ crs,
+ iat,
+ move (rq),
+ gq_create_data {rid, hs, ai});
}
bool
@@ -592,6 +847,7 @@ namespace brep
const string& iat,
const string& rid,
const string& hs,
+ uint64_t ai,
const optional<string>& du,
build_state st,
string ti, string su)
@@ -613,7 +869,11 @@ namespace brep
vector<check_run> crs {move (cr)};
crs[0].state = st;
- bool r (gq_mutate_check_runs (error, crs, iat, move (rq)));
+ bool r (gq_mutate_check_runs (error,
+ crs,
+ iat,
+ move (rq),
+ gq_create_data {rid, hs, ai}));
cr = move (crs[0]);
@@ -626,6 +886,7 @@ namespace brep
const string& iat,
const string& rid,
const string& hs,
+ uint64_t ai,
const optional<string>& du,
gq_built_result br)
{
@@ -642,7 +903,11 @@ namespace brep
vector<check_run> crs {move (cr)};
crs[0].state = build_state::built;
- bool r (gq_mutate_check_runs (error, crs, iat, move (rq)));
+ bool r (gq_mutate_check_runs (error,
+ crs,
+ iat,
+ move (rq),
+ gq_create_data {rid, hs, ai}));
cr = move (crs[0]);
@@ -681,7 +946,7 @@ namespace brep
vector<check_run> crs {move (cr)};
crs[0].state = st;
- bool r (gq_mutate_check_runs (error, crs, iat, move (rq)));
+ bool r (gq_mutate_check_runs (error, crs, iat, move (rq), nullopt));
cr = move (crs[0]);
@@ -708,7 +973,7 @@ namespace brep
vector<check_run> crs {move (cr)};
crs[0].state = build_state::built;
- bool r (gq_mutate_check_runs (error, crs, iat, move (rq)));
+ bool r (gq_mutate_check_runs (error, crs, iat, move (rq), nullopt));
cr = move (crs[0]);
@@ -944,7 +1209,6 @@ namespace brep
// Serialize an int to GraphQL.
//
-#if 0
static string
gq_int (uint64_t v)
{
@@ -953,7 +1217,6 @@ namespace brep
s.value (v);
return b;
}
-#endif
// Serialize a boolean to GraphQL.
//