diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2025-02-13 10:39:54 +0200 |
---|---|---|
committer | Francois Kritzinger <francois@codesynthesis.com> | 2025-02-13 14:03:11 +0200 |
commit | 8d25b6e3a07c0879c483a185ba5a2990640e585f (patch) | |
tree | 1af4eb39a33271d2182acf43476f9e1d65a9d31a | |
parent | 71b7cda94d211eb5b2aa3641eb44cdd8502d0a7a (diff) |
ci-github: Create check runs in batches
Doing too many at a time increases the odds of getting a HTTP 502 (bad
gateway) response from GitHub and it also decreases the odds of all check runs
getting created if we do end up getting a 502 response.
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 102 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.hxx | 5 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.hxx | 4 | ||||
-rw-r--r-- | mod/mod-ci-github.cxx | 4 |
4 files changed, 73 insertions, 42 deletions
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx index 4ac7168..763842c 100644 --- a/mod/mod-ci-github-gq.cxx +++ b/mod/mod-ci-github-gq.cxx @@ -408,12 +408,13 @@ namespace brep static bool gq_mutate_check_runs (const basic_mark& error, - vector<check_run>& crs, + check_runs::iterator crs_b, + check_runs::iterator crs_e, const string& iat, string rq, const optional<gq_create_data>& create_data) { - vector<gh_check_run> rcrs; + size_t crs_n (crs_e - crs_b); const char* what (nullptr); try @@ -441,12 +442,10 @@ namespace brep // number of check runs being created in build_queued() is large. The // symptom is a 502 (Bad gateway) reply from GitHub and the theory being // that their load balancer drops the connection if the request is not - // handled within a certain time. Note that in this case the check runs - // are still created on GitHub, we just don't get the reply (and thus - // their node ids). So we try to re-query that information. - // - // @@ TODO Update comment to say not all CRs are always created and - // describe recovery process. + // handled within a certain time. Note that if the number of check runs + // is under 100, they seem to still be created on GitHub, we just don't + // get the reply (and thus their node ids). So we try to re-query that + // information. // optional<uint16_t> sc1; if (sc == 502 && create_data) @@ -480,9 +479,7 @@ namespace brep if (*sc1 == 200) { - size_t n (rs1.check_runs.size ()); - - if (n == crs.size ()) + if (rs1.check_runs.size () == crs_n) { // It's possible GitHub did not create all the checkruns we have // requested. In which case it may return some unrelated checkruns @@ -490,9 +487,9 @@ namespace brep // expected ones. // size_t i (0); - for (; i != n; ++i) + for (; i != crs_n; ++i) { - const check_run& cr (crs[i]); + const check_run& cr (*(crs_b + i)); const gh_check_run& gcr (rs1.check_runs[i]); if (cr.name != gcr.name || @@ -500,7 +497,7 @@ namespace brep break; } - if (i == n) + if (i == crs_n) { rs.check_runs = move (rs1.check_runs); @@ -515,17 +512,19 @@ namespace brep if (sc == 200) { - rcrs = move (rs.check_runs); + vector<gh_check_run>& rcrs (rs.check_runs); - if (rcrs.size () == crs.size ()) + if (rcrs.size () == crs_n) { - for (size_t i (0); i != rcrs.size (); ++i) + for (size_t i (0); i != crs_n; ++i) { + check_run& cr (*(crs_b + i)); + // Validate the check run in the response against the build. // const gh_check_run& rcr (rcrs[i]); // Received check run. - build_state st (crs[i].state); // Requested state. + build_state st (cr.state); // Requested state. build_state rst (gh_from_status (rcr.status)); // Received state. // Note that GitHub won't allow us to change a built check run to @@ -539,8 +538,6 @@ namespace brep return false; // Fail because something is clearly very wrong. } - check_run& cr (crs[i]); - if (!cr.node_id) cr.node_id = move (rcr.node_id); @@ -617,7 +614,8 @@ namespace brep static string gq_mutation_create_check_runs (const string& ri, // Repository ID const string& hs, // Head SHA - const vector<check_run>& crs) + brep::check_runs::iterator crs_b, + brep::check_runs::iterator crs_e) { ostringstream os; @@ -625,9 +623,9 @@ namespace brep // Serialize a `createCheckRun` for each build. // - for (size_t i (0); i != crs.size (); ++i) + for (brep::check_runs::iterator crs_i (crs_b); crs_i != crs_e; ++crs_i) { - const check_run& cr (crs[i]); + const check_run& cr (*crs_i); assert (cr.state != build_state::built); // Not supported. @@ -638,7 +636,7 @@ namespace brep (!cr.description->title.empty () && !cr.description->summary.empty ())); - string al ("cr" + to_string (i)); // Field alias. + string al ("cr" + to_string (crs_i - crs_b)); // Field alias. os << gq_name (al) << ":createCheckRun(input: {" << '\n' << " name: " << gq_str (cr.name) << '\n' @@ -818,7 +816,7 @@ namespace brep bool gq_create_check_runs (const basic_mark& error, - vector<check_run>& crs, + brep::check_runs& crs, const string& iat, uint64_t ai, const string& rid, @@ -831,15 +829,37 @@ namespace brep assert (cr.state != build_state::built); #endif - string rq ( - gq_serialize_request (gq_mutation_create_check_runs (rid, hs, crs))); + // Trying to create a large number of check runs at once does not work. + // There are two failure modes: + // + // 1. Between about 40 - 60 we may get 502 (bad gateway) but the check + // runs are still created on GitHub. We handle this case be re-quering + // the check runs (see gq_mutate_check_runs() for details). + // + // 2. Above about 60 GitHub may not create all the check runs (while still + // responding with 502). We handle this here by batching the creation. + // + size_t n (crs.size ()); + size_t b (n / 60 + (n % 60 != 0 ? 1 : 0)); + size_t bn (n / b); + + auto i (crs.begin ()); + for (size_t j (0); j != b; ) + { + auto e (++j != b ? (i + bn): crs.end ()); + + string rq ( + gq_serialize_request ( + gq_mutation_create_check_runs (rid, hs, i, e))); - return gq_mutate_check_runs (error, - crs, + if (!gq_mutate_check_runs (error, + i, e, iat, move (rq), gq_create_data {ai, rid, hs})) return false; + + i += bn; } return true; @@ -870,11 +890,11 @@ namespace brep move (ti), move (su), nullopt /* conclusion */))); - vector<check_run> crs {move (cr)}; + brep::check_runs crs {move (cr)}; crs[0].state = st; bool r (gq_mutate_check_runs (error, - crs, + crs.begin (), crs.end (), iat, move (rq), gq_create_data {ai, rid, hs})); @@ -904,11 +924,11 @@ namespace brep move (br.title), move (br.summary), move (br.conclusion)))); - vector<check_run> crs {move (cr)}; + brep::check_runs crs {move (cr)}; crs[0].state = build_state::built; bool r (gq_mutate_check_runs (error, - crs, + crs.begin (), crs.end (), iat, move (rq), gq_create_data {ai, rid, hs})); @@ -947,10 +967,14 @@ namespace brep move (ti), move (su), nullopt /* conclusion */))); - vector<check_run> crs {move (cr)}; + brep::check_runs crs {move (cr)}; crs[0].state = st; - bool r (gq_mutate_check_runs (error, crs, iat, move (rq), nullopt)); + bool r (gq_mutate_check_runs (error, + crs.begin (), crs.end (), + iat, + move (rq), + nullopt)); cr = move (crs[0]); @@ -974,10 +998,14 @@ namespace brep move (br.title), move (br.summary), move (br.conclusion)))); - vector<check_run> crs {move (cr)}; + brep::check_runs crs {move (cr)}; crs[0].state = build_state::built; - bool r (gq_mutate_check_runs (error, crs, iat, move (rq), nullopt)); + bool r (gq_mutate_check_runs (error, + crs.begin (), crs.end (), + iat, + move (rq), + nullopt)); cr = move (crs[0]); diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx index a333a54..5fc75aa 100644 --- a/mod/mod-ci-github-gq.hxx +++ b/mod/mod-ci-github-gq.hxx @@ -22,7 +22,8 @@ namespace brep // Create a new check run on GitHub for each build with the build state, // name, details_url, and output taken from each check_run object. Update // `check_runs` with the new data (node id and state_synced). Return false - // and issue diagnostics if the request failed. + // and issue diagnostics if the request failed. Note that in this case some + // elements in check_runs may still have been updated (due to batching). // // Throw invalid_argument if the passed data is invalid, missing, or // inconsistent. @@ -34,7 +35,7 @@ namespace brep // bool gq_create_check_runs (const basic_mark& error, - vector<check_run>& check_runs, + brep::check_runs& check_runs, const string& installation_access_token, uint64_t app_id, const string& repository_id, diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx index 7ac39dd..9aa512a 100644 --- a/mod/mod-ci-github-service-data.hxx +++ b/mod/mod-ci-github-service-data.hxx @@ -55,6 +55,8 @@ namespace brep } }; + using check_runs = vector<check_run>; + // We have two kinds of service data that correspond to the following two // typical scenarios (until/unless we add support for merge queues): // @@ -123,7 +125,7 @@ namespace brep // string report_sha; - vector<check_run> check_runs; + brep::check_runs check_runs; // Flag indicating that all the elements in check_runs are built and this // check suite is completed. diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx index 5570beb..8242fc0 100644 --- a/mod/mod-ci-github.cxx +++ b/mod/mod-ci-github.cxx @@ -1309,7 +1309,7 @@ namespace brep // a single request (unless something goes wrong) so store them together // from the outset. // - vector<check_run> check_runs (2); + brep::check_runs check_runs (2); check_run& bcr (check_runs[0]); // Build check run check_run& ccr (check_runs[1]); // Conclusion check run @@ -2328,7 +2328,7 @@ namespace brep // The builds for which we will be creating check runs. // vector<reference_wrapper<const build>> bs; - vector<check_run> crs; // Parallel to bs. + brep::check_runs crs; // Parallel to bs. // Exclude the builds for which we won't be creating check runs. // |