aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2025-02-13 10:39:54 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2025-02-13 10:39:54 +0200
commitbbc09592b0f8e95fd96dbc39172d9f6f2464f917 (patch)
tree1d9e63ab7c9fbeb155cc2ef918d1ac6a0d34ff54
parent532de380a91b7dd63cef836d8e3ea6d514ec185b (diff)
Sketch: batching
-rw-r--r--mod/mod-ci-github-gq.cxx69
-rw-r--r--mod/mod-ci-github-gq.hxx3
-rw-r--r--mod/mod-ci-github-service-data.hxx4
3 files changed, 50 insertions, 26 deletions
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx
index 99c04f4..6f40cd6 100644
--- a/mod/mod-ci-github-gq.cxx
+++ b/mod/mod-ci-github-gq.cxx
@@ -408,12 +408,13 @@ namespace brep
static bool
gq_mutate_check_runs (const basic_mark& error,
- vector<check_run>& crs,
+ check_runs::iterator crs_b,
+ check_runs::iterator crs_e,
const string& iat,
string rq,
const optional<gq_create_data>& create_data)
{
- vector<gh_check_run> rcrs;
+ size_t crs_n (crs_e - crs_b);
const char* what (nullptr);
try
@@ -441,12 +442,10 @@ namespace brep
// number of check runs being created in build_queued() is large. The
// symptom is a 502 (Bad gateway) reply from GitHub and the theory being
// that their load balancer drops the connection if the request is not
- // handled within a certain time. Note that in this case the check runs
- // are still created on GitHub, we just don't get the reply (and thus
- // their node ids). So we try to re-query that information.
- //
- // @@ TODO Update comment to say not all CRs are always created and
- // describe recovery process.
+ // handled within a certain time. Note that if the number of check runs
+ // is under 100, they seem to still be created on GitHub, we just don't
+ // get the reply (and thus their node ids). So we try to re-query that
+ // information.
//
optional<uint16_t> sc1;
if (sc == 502 && create_data)
@@ -459,7 +458,7 @@ namespace brep
gq_query_get_check_runs (create_data->repository_id,
create_data->head_sha,
create_data->app_id,
- crs.size ())));
+ crs_n)));
// Type that parses the result of the above GraphQL query.
//
@@ -480,9 +479,7 @@ namespace brep
if (*sc1 == 200)
{
- size_t n (rs1.check_runs.size ());
-
- if (n == crs.size ())
+ if (rs1.check_runs.size () == n)
{
// It's possible GitHub did not create all the checkruns we have
// requested. In which case it may return some unrelated checkruns
@@ -492,7 +489,7 @@ namespace brep
size_t i (0);
for (; i != n; ++i)
{
- const check_run& cr (crs[i]);
+ const check_run& cr (*(crs_b + i));
const gh_check_run& gcr (rs1.check_runs[i]);
if (cr.name != gcr.name ||
@@ -515,17 +512,19 @@ namespace brep
if (sc == 200)
{
- rcrs = move (rs.check_runs);
+ vector<gh_check_run>& rcrs (rs.check_runs);
- if (rcrs.size () == crs.size ())
+ if (rcrs.size () == n)
{
- for (size_t i (0); i != rcrs.size (); ++i)
+ for (size_t i (0); i != n; ++i)
{
+ check_run& cr (*(crs_b + i));
+
// Validate the check run in the response against the build.
//
const gh_check_run& rcr (rcrs[i]); // Received check run.
- build_state st (crs[i].state); // Requested state.
+ build_state st (cr.state); // Requested state.
build_state rst (gh_from_status (rcr.status)); // Received state.
// Note that GitHub won't allow us to change a built check run to
@@ -539,8 +538,6 @@ namespace brep
return false; // Fail because something is clearly very wrong.
}
- check_run& cr (crs[i]);
-
if (!cr.node_id)
cr.node_id = move (rcr.node_id);
@@ -831,14 +828,38 @@ namespace brep
assert (cr.state != build_state::built);
#endif
- string rq (
- gq_serialize_request (gq_mutation_create_check_runs (rid, hs, crs)));
+ // Trying to create a large number of check runs at once does not work.
+ // There are two failure modes:
+ //
+ // 1. Between about 40 - 60 we may get 502 (bad gateway) but the check
+ // runs are still created on GitHub. We handle this case be re-quering
+ // the check runs (see gq_mutate_check_runs() for details).
+ //
+ // 2. Above about 60 GitHub may not create all the check runs (while still
+ // responding with 502). We handle this here by batching the creation.
+ //
+ size_t n (crs.size ());
+ size_t b (n / 60 + (n % 60 != 0 ? 1 : 0));
+ size_t bn (n / b);
+
+ auto i (crs.begin ());
+ for (size_t j (0); j != b; )
+ {
+ auto e (++j != b ? (i + bn): crs.end ());
- return gq_mutate_check_runs (error,
- crs,
+ string rq (
+ gq_serialize_request (
+ gq_mutation_create_check_runs (rid, hs, i, e)));
+
+ if (!gq_mutate_check_runs (error,
+ i, e,
iat,
move (rq),
- gq_create_data {rid, hs, ai});
+ gq_create_data {rid, hs, ai}))
+ return false;
+ }
+
+ return true;
}
bool
diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx
index 1214944..d66d0fb 100644
--- a/mod/mod-ci-github-gq.hxx
+++ b/mod/mod-ci-github-gq.hxx
@@ -22,7 +22,8 @@ namespace brep
// Create a new check run on GitHub for each build with the build state,
// name, details_url, and output taken from each check_run object. Update
// `check_runs` with the new data (node id and state_synced). Return false
- // and issue diagnostics if the request failed.
+ // and issue diagnostics if the request failed. Note that in this case some
+ // elements in check_runs may still have been updated (due to batching).
//
// Throw invalid_argument if the passed data is invalid, missing, or
// inconsistent.
diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx
index 7ac39dd..9aa512a 100644
--- a/mod/mod-ci-github-service-data.hxx
+++ b/mod/mod-ci-github-service-data.hxx
@@ -55,6 +55,8 @@ namespace brep
}
};
+ using check_runs = vector<check_run>;
+
// We have two kinds of service data that correspond to the following two
// typical scenarios (until/unless we add support for merge queues):
//
@@ -123,7 +125,7 @@ namespace brep
//
string report_sha;
- vector<check_run> check_runs;
+ brep::check_runs check_runs;
// Flag indicating that all the elements in check_runs are built and this
// check suite is completed.