diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2025-02-13 10:39:54 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2025-02-13 10:39:54 +0200 |
commit | bbc09592b0f8e95fd96dbc39172d9f6f2464f917 (patch) | |
tree | 1d9e63ab7c9fbeb155cc2ef918d1ac6a0d34ff54 | |
parent | 532de380a91b7dd63cef836d8e3ea6d514ec185b (diff) |
Sketch: batching
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 69 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.hxx | 3 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.hxx | 4 |
3 files changed, 50 insertions, 26 deletions
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx index 99c04f4..6f40cd6 100644 --- a/mod/mod-ci-github-gq.cxx +++ b/mod/mod-ci-github-gq.cxx @@ -408,12 +408,13 @@ namespace brep static bool gq_mutate_check_runs (const basic_mark& error, - vector<check_run>& crs, + check_runs::iterator crs_b, + check_runs::iterator crs_e, const string& iat, string rq, const optional<gq_create_data>& create_data) { - vector<gh_check_run> rcrs; + size_t crs_n (crs_e - crs_b); const char* what (nullptr); try @@ -441,12 +442,10 @@ namespace brep // number of check runs being created in build_queued() is large. The // symptom is a 502 (Bad gateway) reply from GitHub and the theory being // that their load balancer drops the connection if the request is not - // handled within a certain time. Note that in this case the check runs - // are still created on GitHub, we just don't get the reply (and thus - // their node ids). So we try to re-query that information. - // - // @@ TODO Update comment to say not all CRs are always created and - // describe recovery process. + // handled within a certain time. Note that if the number of check runs + // is under 100, they seem to still be created on GitHub, we just don't + // get the reply (and thus their node ids). So we try to re-query that + // information. // optional<uint16_t> sc1; if (sc == 502 && create_data) @@ -459,7 +458,7 @@ namespace brep gq_query_get_check_runs (create_data->repository_id, create_data->head_sha, create_data->app_id, - crs.size ()))); + crs_n))); // Type that parses the result of the above GraphQL query. // @@ -480,9 +479,7 @@ namespace brep if (*sc1 == 200) { - size_t n (rs1.check_runs.size ()); - - if (n == crs.size ()) + if (rs1.check_runs.size () == n) { // It's possible GitHub did not create all the checkruns we have // requested. In which case it may return some unrelated checkruns @@ -492,7 +489,7 @@ namespace brep size_t i (0); for (; i != n; ++i) { - const check_run& cr (crs[i]); + const check_run& cr (*(crs_b + i)); const gh_check_run& gcr (rs1.check_runs[i]); if (cr.name != gcr.name || @@ -515,17 +512,19 @@ namespace brep if (sc == 200) { - rcrs = move (rs.check_runs); + vector<gh_check_run>& rcrs (rs.check_runs); - if (rcrs.size () == crs.size ()) + if (rcrs.size () == n) { - for (size_t i (0); i != rcrs.size (); ++i) + for (size_t i (0); i != n; ++i) { + check_run& cr (*(crs_b + i)); + // Validate the check run in the response against the build. // const gh_check_run& rcr (rcrs[i]); // Received check run. - build_state st (crs[i].state); // Requested state. + build_state st (cr.state); // Requested state. build_state rst (gh_from_status (rcr.status)); // Received state. // Note that GitHub won't allow us to change a built check run to @@ -539,8 +538,6 @@ namespace brep return false; // Fail because something is clearly very wrong. } - check_run& cr (crs[i]); - if (!cr.node_id) cr.node_id = move (rcr.node_id); @@ -831,14 +828,38 @@ namespace brep assert (cr.state != build_state::built); #endif - string rq ( - gq_serialize_request (gq_mutation_create_check_runs (rid, hs, crs))); + // Trying to create a large number of check runs at once does not work. + // There are two failure modes: + // + // 1. Between about 40 - 60 we may get 502 (bad gateway) but the check + // runs are still created on GitHub. We handle this case be re-quering + // the check runs (see gq_mutate_check_runs() for details). + // + // 2. Above about 60 GitHub may not create all the check runs (while still + // responding with 502). We handle this here by batching the creation. + // + size_t n (crs.size ()); + size_t b (n / 60 + (n % 60 != 0 ? 1 : 0)); + size_t bn (n / b); + + auto i (crs.begin ()); + for (size_t j (0); j != b; ) + { + auto e (++j != b ? (i + bn): crs.end ()); - return gq_mutate_check_runs (error, - crs, + string rq ( + gq_serialize_request ( + gq_mutation_create_check_runs (rid, hs, i, e))); + + if (!gq_mutate_check_runs (error, + i, e, iat, move (rq), - gq_create_data {rid, hs, ai}); + gq_create_data {rid, hs, ai})) + return false; + } + + return true; } bool diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx index 1214944..d66d0fb 100644 --- a/mod/mod-ci-github-gq.hxx +++ b/mod/mod-ci-github-gq.hxx @@ -22,7 +22,8 @@ namespace brep // Create a new check run on GitHub for each build with the build state, // name, details_url, and output taken from each check_run object. Update // `check_runs` with the new data (node id and state_synced). Return false - // and issue diagnostics if the request failed. + // and issue diagnostics if the request failed. Note that in this case some + // elements in check_runs may still have been updated (due to batching). // // Throw invalid_argument if the passed data is invalid, missing, or // inconsistent. diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx index 7ac39dd..9aa512a 100644 --- a/mod/mod-ci-github-service-data.hxx +++ b/mod/mod-ci-github-service-data.hxx @@ -55,6 +55,8 @@ namespace brep } }; + using check_runs = vector<check_run>; + // We have two kinds of service data that correspond to the following two // typical scenarios (until/unless we add support for merge queues): // @@ -123,7 +125,7 @@ namespace brep // string report_sha; - vector<check_run> check_runs; + brep::check_runs check_runs; // Flag indicating that all the elements in check_runs are built and this // check suite is completed. |