diff options
-rw-r--r-- | INSTALL-GITHUB-DEV | 2 | ||||
-rw-r--r-- | etc/brep-module.conf | 13 | ||||
-rw-r--r-- | etc/private/install/brep-module.conf | 18 | ||||
-rw-r--r-- | mod/ci-common.cxx | 9 | ||||
-rw-r--r-- | mod/ci-common.hxx | 12 | ||||
-rw-r--r-- | mod/database-module.cxx | 20 | ||||
-rw-r--r-- | mod/database-module.hxx | 14 | ||||
-rw-r--r-- | mod/mod-build-force.cxx | 5 | ||||
-rw-r--r-- | mod/mod-build-result.cxx | 9 | ||||
-rw-r--r-- | mod/mod-build-task.cxx | 26 | ||||
-rw-r--r-- | mod/mod-ci-github-gh.cxx | 178 | ||||
-rw-r--r-- | mod/mod-ci-github-gh.hxx | 131 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 74 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.hxx | 11 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.cxx | 86 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.hxx | 21 | ||||
-rw-r--r-- | mod/mod-ci-github.cxx | 1066 | ||||
-rw-r--r-- | mod/mod-ci-github.hxx | 47 | ||||
-rw-r--r-- | mod/mod-ci.cxx | 29 | ||||
-rw-r--r-- | mod/mod-ci.hxx | 24 | ||||
-rw-r--r-- | mod/module.cli | 18 | ||||
-rw-r--r-- | mod/tenant-service.hxx | 32 |
22 files changed, 1146 insertions, 699 deletions
diff --git a/INSTALL-GITHUB-DEV b/INSTALL-GITHUB-DEV index 45f4b9b..245b095 100644 --- a/INSTALL-GITHUB-DEV +++ b/INSTALL-GITHUB-DEV @@ -1,3 +1,5 @@ +@@ TODO Explain multiple apps in INSTALL-GITHUB-DEV. + This document explains how to get GitHub webhooks (a notification that an event such as a push has occurred on a repository) delivered to a locally-running instance of brep (currently to initiate a CI job). diff --git a/etc/brep-module.conf b/etc/brep-module.conf index 80f7cbf..92b3182 100644 --- a/etc/brep-module.conf +++ b/etc/brep-module.conf @@ -453,21 +453,16 @@ menu About=?about # ci-handler-timeout -# The GitHub App ID. Found in the app's settings on GitHub. -# -# ci-github-app-id - - # The GitHub App's configured webhook secret. If not set, then the GitHub CI -# service is disabled. +# service is disabled. Note: make sure to choose a strong (random) secret. # # ci-github-app-webhook-secret -# The private key used during GitHub API authentication. Created in the GitHub -# App's settings. +# The private key used during GitHub API authentication for the specified +# GitHub App ID. Both vales are found in the GitHub App's settings. # -# ci-github-app-private-key +# ci-github-app-id-private-key <id>=<path> # The number of seconds a JWT (authentication token) should be valid for. The diff --git a/etc/private/install/brep-module.conf b/etc/private/install/brep-module.conf index 0c7f065..680352a 100644 --- a/etc/private/install/brep-module.conf +++ b/etc/private/install/brep-module.conf @@ -461,6 +461,24 @@ submit-handler-timeout 120 # ci-handler-timeout +# The GitHub App's configured webhook secret. If not set, then the GitHub CI +# service is disabled. Note: make sure to choose a strong (random) secret. +# +# ci-github-app-webhook-secret + + +# The private key used during GitHub API authentication for the specified +# GitHub App ID. Both vales are found in the GitHub App's settings. +# +# ci-github-app-id-private-key <id>=<path> + + +# The number of seconds a JWT (authentication token) should be valid for. The +# maximum allowed by GitHub is 10 minutes. +# +# ci-github-jwt-validity-period 600 + + # The directory to save upload data to for the specified upload type. If # unspecified, the build artifacts upload functionality will be disabled for # this type. diff --git a/mod/ci-common.cxx b/mod/ci-common.cxx index cba421b..e720914 100644 --- a/mod/ci-common.cxx +++ b/mod/ci-common.cxx @@ -955,7 +955,8 @@ namespace brep rebuild (odb::core::database& db, size_t retry, const build_id& id, - function<optional<string> (const tenant_service&, + function<optional<string> (const string& tenant_id, + const tenant_service&, build_state)> uf) const { using namespace odb::core; @@ -1002,7 +1003,7 @@ namespace brep tenant_service& ts (*t->service); - if (optional<string> data = uf (ts, s)) + if (optional<string> data = uf (t->id, ts, s)) { ts.data = move (*data); db.update (t); @@ -1030,7 +1031,7 @@ namespace brep return s; } - optional<pair<tenant_service, bool>> ci_start:: + optional<ci_start::tenant_data> ci_start:: find (odb::core::database& db, const string& type, const string& id) const @@ -1052,6 +1053,6 @@ namespace brep if (t == nullptr || !t->service) return nullopt; - return pair<tenant_service, bool> (move (*t->service), t->archived); + return tenant_data {move (t->id), move (*t->service), t->archived}; } } diff --git a/mod/ci-common.hxx b/mod/ci-common.hxx index b32d397..a38ac54 100644 --- a/mod/ci-common.hxx +++ b/mod/ci-common.hxx @@ -242,7 +242,8 @@ namespace brep rebuild (odb::core::database&, size_t retry, const build_id&, - function<optional<string> (const tenant_service&, + function<optional<string> (const string& tenant_id, + const tenant_service&, build_state)> = nullptr) const; // Find the tenant given the tenant service type and id and return the @@ -251,7 +252,14 @@ namespace brep // // Note: should be called out of the database transaction. // - optional<pair<tenant_service, bool /*archived*/>> + struct tenant_data + { + string tenant_id; + tenant_service service; + bool archived; + }; + + optional<tenant_data> find (odb::core::database&, const string& type, const string& id) const; diff --git a/mod/database-module.cxx b/mod/database-module.cxx index bce8c93..629e393 100644 --- a/mod/database-module.cxx +++ b/mod/database-module.cxx @@ -79,8 +79,10 @@ namespace brep optional<string> database_module:: update_tenant_service_state ( const connection_ptr& conn, - const string& tid, - const function<optional<string> (const tenant_service&)>& f) + const string& type, + const string& id, + const function<optional<string> (const string& tenant_id, + const tenant_service&)>& f) { assert (f != nullptr); // Shouldn't be called otherwise. @@ -96,13 +98,21 @@ namespace brep { transaction tr (conn->begin ()); - shared_ptr<build_tenant> t (build_db_->find<build_tenant> (tid)); + using query = query<build_tenant>; - if (t != nullptr && t->service) + shared_ptr<build_tenant> t ( + build_db_->query_one<build_tenant> (query::service.id == id && + query::service.type == type)); + + if (t != nullptr) { + // Shouldn't be here otherwise. + // + assert (t->service); + tenant_service& s (*t->service); - if (optional<string> data = f (s)) + if (optional<string> data = f (t->id, s)) { s.data = move (*data); build_db_->update (t); diff --git a/mod/database-module.hxx b/mod/database-module.hxx index 298afbf..76f13d4 100644 --- a/mod/database-module.hxx +++ b/mod/database-module.hxx @@ -61,16 +61,18 @@ namespace brep // and nullopt otherwise. // // Specifically, start the database transaction, query the service state, - // and call the callback-returned function on this state. If this call - // returns the data string (rather than nullopt), then update the service - // state with this data and persist the change. Repeat all the above steps - // on the recoverable database failures (deadlocks, etc). + // and, if present, call the callback-returned function on this state. If + // this call returns the data string (rather than nullopt), then update + // the service state with this data and persist the change. Repeat all the + // above steps on the recoverable database failures (deadlocks, etc). // optional<string> update_tenant_service_state ( const odb::core::connection_ptr&, - const string& tid, - const function<optional<string> (const tenant_service&)>&); + const string& type, + const string& id, + const function<optional<string> (const string& tenant_id, + const tenant_service&)>&); protected: size_t retry_ = 0; // Max of all retries. diff --git a/mod/mod-build-force.cxx b/mod/mod-build-force.cxx index 8666889..d37674f 100644 --- a/mod/mod-build-force.cxx +++ b/mod/mod-build-force.cxx @@ -314,14 +314,15 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, build_state::building, qhs, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, qbs.back ().tenant, f); + update_tenant_service_state (conn, ss.type, ss.id, f); } } diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx index bc44bd2..cc058b5 100644 --- a/mod/mod-build-result.cxx +++ b/mod/mod-build-result.cxx @@ -545,14 +545,15 @@ handle (request& rq, response&) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, build_state::building, qhs, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, qbs.back ().tenant, f); + update_tenant_service_state (conn, ss.type, ss.id, f); } } @@ -572,10 +573,10 @@ handle (request& rq, response&) // conn.reset (); - if (auto f = tsb->build_built (ss, b, log_writer_)) + if (auto f = tsb->build_built (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, b.tenant, f); + update_tenant_service_state (conn, ss.type, ss.id, f); } } diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index 2ae1237..c8b1bb2 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -499,10 +499,14 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsu->build_unloaded (move (*t->service), log_writer_)) + tenant_service& ts (*t->service); + string type (ts.type); + string id (ts.id); + + if (auto f = tsu->build_unloaded (t->id, move (ts), log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, t->id, f); + update_tenant_service_state (conn, type, id, f); } } } @@ -2350,7 +2354,8 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, nullopt /* initial_state */, qhs, @@ -2359,7 +2364,7 @@ handle (request& rq, response& rs) conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, qbs.back ().tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2382,7 +2387,8 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, initial_state, qhs, @@ -2391,7 +2397,7 @@ handle (request& rq, response& rs) conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, qbs.back ().tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2418,12 +2424,12 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsb->build_building (ss, b, log_writer_)) + if (auto f = tsb->build_building (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, b.tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2546,12 +2552,12 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsb->build_built (ss, b, log_writer_)) + if (auto f = tsb->build_built (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, b.tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } diff --git a/mod/mod-ci-github-gh.cxx b/mod/mod-ci-github-gh.cxx index 6ab93d7..021ff6b 100644 --- a/mod/mod-ci-github-gh.cxx +++ b/mod/mod-ci-github-gh.cxx @@ -7,6 +7,15 @@ namespace brep { + [[noreturn]] static void + throw_json (const json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + // Return the GitHub check run status corresponding to a build_state. // string @@ -102,10 +111,7 @@ namespace brep [[noreturn]] static void missing_member (const json::parser& p, const char* o, const char* m) { - throw json::invalid_json_input ( - p.input_name, - p.line (), p.column (), p.position (), - o + string (" object is missing member '") + m + '\''); + throw_json (p, o + string (" object is missing member '") + m + '\''); } using event = json::event; @@ -154,6 +160,93 @@ namespace brep return os; } + // gh_check_suite_ex + // + gh_check_suite_ex:: + gh_check_suite_ex (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), hb (false), hs (false), cc (false), co (false), + ap (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (hb, "head_branch")) + { + string* v (p.next_expect_string_null ()); + if (v != nullptr) + head_branch = *v; + } + else if (c (hs, "head_sha")) head_sha = p.next_expect_string (); + else if (c (cc, "latest_check_runs_count")) + check_runs_count = p.next_expect_number <size_t> (); + else if (c (co, "conclusion")) + { + string* v (p.next_expect_string_null ()); + if (v != nullptr) + conclusion = *v; + } + else if (c (ap, "app")) + { + p.next_expect (event::begin_object); + + bool ai (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + if (c (ai, "id")) + { + // Note: unlike the check_run webhook's app.id, the check_suite + // one can be null. It's unclear under what circumstances, but it + // shouldn't happen unless something is broken. + // + string* v (p.next_expect_number_null ()); + + if (v == nullptr) + throw_json (p, "check_suite.app.id is null"); + + app_id = *v; + } + else p.next_expect_value_skip (); + } + + if (!ai) missing_member (p, "gh_check_suite_ex.app", "id"); + } + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_check_suite_ex", "node_id"); + if (!hb) missing_member (p, "gh_check_suite_ex", "head_branch"); + if (!hs) missing_member (p, "gh_check_suite_ex", "head_sha"); + if (!cc) missing_member (p, "gh_check_suite_ex", "latest_check_runs_count"); + if (!co) missing_member (p, "gh_check_suite_ex", "conclusion"); + if (!ap) missing_member (p, "gh_check_suite_ex", "app"); + } + + ostream& + operator<< (ostream& os, const gh_check_suite_ex& cs) + { + os << "node_id: " << cs.node_id + << ", head_branch: " << (cs.head_branch ? *cs.head_branch : "null") + << ", head_sha: " << cs.head_sha + << ", latest_check_runs_count: " << cs.check_runs_count + << ", conclusion: " << (cs.conclusion ? *cs.conclusion : "null") + << ", app_id: " << cs.app_id; + + return os; + } + // gh_check_run // gh_check_run:: @@ -190,7 +283,8 @@ namespace brep { p.next_expect (event::begin_object); - bool ni (false), nm (false), st (false), du (false), cs (false); + bool ni (false), nm (false), st (false), du (false), cs (false), + ap (false); // Skip unknown/uninteresting members. // @@ -206,14 +300,31 @@ namespace brep else if (c (st, "status")) status = p.next_expect_string (); else if (c (du, "details_url")) details_url = p.next_expect_string (); else if (c (cs, "check_suite")) check_suite = gh_check_suite (p); + else if (c (ap, "app")) + { + p.next_expect (event::begin_object); + + bool ai (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + if (c (ai, "id")) app_id = p.next_expect_number (); + else p.next_expect_value_skip (); + } + + if (!ai) missing_member (p, "gh_check_run_ex.app", "id"); + } else p.next_expect_value_skip (); } - if (!ni) missing_member (p, "gh_check_run", "node_id"); - if (!nm) missing_member (p, "gh_check_run", "name"); - if (!st) missing_member (p, "gh_check_run", "status"); - if (!du) missing_member (p, "gh_check_run", "details_url"); - if (!cs) missing_member (p, "gh_check_run", "check_suite"); + if (!ni) missing_member (p, "gh_check_run_ex", "node_id"); + if (!nm) missing_member (p, "gh_check_run_ex", "name"); + if (!st) missing_member (p, "gh_check_run_ex", "status"); + if (!du) missing_member (p, "gh_check_run_ex", "details_url"); + if (!cs) missing_member (p, "gh_check_run_ex", "check_suite"); + if (!ap) missing_member (p, "gh_check_run_ex", "app"); } @@ -232,7 +343,8 @@ namespace brep { os << static_cast<const gh_check_run&> (cr) << ", details_url: " << cr.details_url - << ", check_suite: { " << cr.check_suite << " }"; + << ", check_suite: { " << cr.check_suite << " }" + << ", app_id: " << cr.app_id; return os; } @@ -338,7 +450,8 @@ namespace brep << "path: " << pr.head_path << ", ref: " << pr.head_ref << ", sha: " << pr.head_sha - << " }"; + << " }" + << ", app_id: " << pr.app_id; return os; } @@ -350,7 +463,7 @@ namespace brep { p.next_expect (event::begin_object); - bool ni (false), nm (false), fn (false), cu (false); + bool ni (false), fn (false), cu (false); // Skip unknown/uninteresting members. // @@ -362,14 +475,12 @@ namespace brep }; if (c (ni, "node_id")) node_id = p.next_expect_string (); - else if (c (nm, "name")) name = p.next_expect_string (); else if (c (fn, "full_name")) path = p.next_expect_string (); else if (c (cu, "clone_url")) clone_url = p.next_expect_string (); else p.next_expect_value_skip (); } if (!ni) missing_member (p, "gh_repository", "node_id"); - if (!nm) missing_member (p, "gh_repository", "name"); if (!fn) missing_member (p, "gh_repository", "full_name"); if (!cu) missing_member (p, "gh_repository", "clone_url"); } @@ -378,7 +489,6 @@ namespace brep operator<< (ostream& os, const gh_repository& rep) { os << "node_id: " << rep.node_id - << ", name: " << rep.name << ", path: " << rep.path << ", clone_url: " << rep.clone_url; @@ -403,7 +513,7 @@ namespace brep return p.name () == s ? (v = true) : false; }; - if (c (i, "id")) id = p.next_expect_number<uint64_t> (); + if (c (i, "id")) id = p.next_expect_number (); else p.next_expect_value_skip (); } @@ -437,7 +547,7 @@ namespace brep }; if (c (ac, "action")) action = p.next_expect_string (); - else if (c (cs, "check_suite")) check_suite = gh_check_suite (p); + else if (c (cs, "check_suite")) check_suite = gh_check_suite_ex (p); else if (c (rp, "repository")) repository = gh_repository (p); else if (c (in, "installation")) installation = gh_installation (p); else p.next_expect_value_skip (); @@ -478,10 +588,6 @@ namespace brep return p.name () == s ? (v = true) : false; }; - // Pass true to gh_check_run() to indicate that the we're parsing a - // webhook event or REST API response (in which case more fields are - // expected to be present than in a GraphQL response). - // if (c (ac, "action")) action = p.next_expect_string (); else if (c (cs, "check_run")) check_run = gh_check_run_ex (p); else if (c (rp, "repository")) repository = gh_repository (p); @@ -577,7 +683,29 @@ namespace brep }; if (c (tk, "token")) token = p.next_expect_string (); - else if (c (ea, "expires_at")) expires_at = gh_from_iso8601 (p.next_expect_string ()); + else if (c (ea, "expires_at")) + { + string v (p.next_expect_string ()); + + try + { + expires_at = gh_from_iso8601 (v); + } + catch (const invalid_argument& e) + { + throw_json (p, + "invalid IAT expires_at value '" + v + + "': " + e.what ()); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw_json (p, + "unable to convert IAT expires_at value '" + v + + "': " + e.what ()); + } + } else p.next_expect_value_skip (); } @@ -612,6 +740,8 @@ namespace brep timestamp gh_from_iso8601 (const string& s) { - return butl::from_string (s.c_str (), "%Y-%m-%dT%TZ", false /* local */); + return butl::from_string (s.c_str (), + "%Y-%m-%dT%TZ", + false /* local */); } } diff --git a/mod/mod-ci-github-gh.hxx b/mod/mod-ci-github-gh.hxx index 6ede697..ab6dbaa 100644 --- a/mod/mod-ci-github-gh.hxx +++ b/mod/mod-ci-github-gh.hxx @@ -21,16 +21,15 @@ namespace butl namespace brep { - // @@@ Check if any data members are unused (once the dust settles). - using build_queued_hints = tenant_service_build_queued::build_queued_hints; // GitHub request/response types (all start with gh_). // // Note that the GitHub REST and GraphQL APIs use different id types and - // values. In the REST API they are usually integers (but sometimes - // strings!) whereas in GraphQL they are always strings (note: - // base64-encoded and opaque, not just the REST id value as a string). + // values. In the REST API they are usually integers (but check the API + // reference for the object in question) whereas in GraphQL they are always + // strings (note: base64-encoded and opaque, not just the REST id value as a + // string). // // In both APIs the id field is called `id`, but REST responses and webhook // events also contain the corresponding GraphQL object's id in the @@ -45,7 +44,7 @@ namespace brep // namespace json = butl::json; - // The "check_suite" object within a check_suite webhook event request. + // The check_suite member of a check_run webhook event (gh_check_run_event). // struct gh_check_suite { @@ -59,6 +58,26 @@ namespace brep gh_check_suite () = default; }; + // The check_suite member of a check_suite webhook event + // (gh_check_suite_event). + // + struct gh_check_suite_ex: gh_check_suite + { + size_t check_runs_count; + optional<string> conclusion; + + string app_id; + + explicit + gh_check_suite_ex (json::parser&); + + gh_check_suite_ex () = default; + }; + + // The check_run object returned in response to GraphQL requests + // (e.g. create or update check run). Note that we specifiy the set of + // members to return in the GraphQL request. + // struct gh_check_run { string node_id; @@ -71,66 +90,58 @@ namespace brep gh_check_run () = default; }; + // The check_run member of a check_run webhook event (gh_check_run_event). + // struct gh_check_run_ex: gh_check_run { string details_url; gh_check_suite check_suite; + string app_id; + explicit gh_check_run_ex (json::parser&); gh_check_run_ex () = default; }; + // The pull_request member of a pull_request webhook event + // (gh_pull_request_event). + // struct gh_pull_request { string node_id; unsigned int number; - // @@ TMP The unused base/head members may be useful for trace output when - // we receive the pull_request webhook. - string base_path; // Repository path (<org>/<repo>) under github.com. - string base_ref; // @@ TODO Remove if remains unused. - string base_sha; // @@ TODO Remove if remains unused. + string base_ref; + string base_sha; string head_path; - string head_ref; // @@ TODO Remove if remains unused. + string head_ref; string head_sha; + // Note: not received from GitHub but set from the app-id webhook query + // parameter instead. + // + // For some reason, unlike the check_suite and check_run webhooks, the + // pull_request webhook does not contain the app id. For the sake of + // simplicity we emulate check_suite and check_run by storing the app-id + // webhook query parameter here. + // + string app_id; + explicit gh_pull_request (json::parser&); gh_pull_request () = default; }; - // Return the GitHub check run status corresponding to a build_state. + // The repository member of various webhook events. // - string - gh_to_status (build_state st); - - // Return the build_state corresponding to a GitHub check run status - // string. Throw invalid_argument if the passed status was invalid. - // - build_state - gh_from_status (const string&); - - // If warning_success is true, then map result_status::warning to SUCCESS - // and to FAILURE otherwise. - // - string - gh_to_conclusion (result_status, bool warning_success); - - // Create a check_run name from a build. If the second argument is not - // NULL, return an abbreviated id if possible. - // - string - gh_check_run_name (const build&, const build_queued_hints* = nullptr); - struct gh_repository { string node_id; - string name; string path; // Repository path (<org>/<repo>) under github.com. string clone_url; @@ -140,9 +151,11 @@ namespace brep gh_repository () = default; }; + // The installation member of various webhook events. + // struct gh_installation { - uint64_t id; // Note: used for installation access token (REST API). + string id; // Note: used for installation access token (REST API). explicit gh_installation (json::parser&); @@ -150,12 +163,12 @@ namespace brep gh_installation () = default; }; - // The check_suite webhook event request. + // The check_suite webhook event. // struct gh_check_suite_event { string action; - gh_check_suite check_suite; + gh_check_suite_ex check_suite; gh_repository repository; gh_installation installation; @@ -165,6 +178,8 @@ namespace brep gh_check_suite_event () = default; }; + // The check_run webhook event. + // struct gh_check_run_event { string action; @@ -178,6 +193,8 @@ namespace brep gh_check_run_event () = default; }; + // The pull_request webhook event. + // struct gh_pull_request_event { string action; @@ -198,6 +215,9 @@ namespace brep gh_pull_request_event () = default; }; + // Installation access token (IAT) returned when we authenticate as a GitHub + // app installation. + // struct gh_installation_access_token { string token; @@ -211,9 +231,41 @@ namespace brep gh_installation_access_token () = default; }; + // Return the GitHub check run status corresponding to a build_state. + // + string + gh_to_status (build_state); + + // Return the build_state corresponding to a GitHub check run status + // string. Throw invalid_argument if the passed status was invalid. + // + build_state + gh_from_status (const string&); + + // If warning_success is true, then map result_status::warning to `SUCCESS` + // and to `FAILURE` otherwise. + // + // Throw invalid_argument in case of unsupported result_status value + // (currently skip, interrupt). + // + string + gh_to_conclusion (result_status, bool warning_success); + + // Create a check_run name from a build. If the second argument is not + // NULL, return an abbreviated id if possible. + // + string + gh_check_run_name (const build&, const build_queued_hints* = nullptr); + + // Throw system_error if the conversion fails due to underlying operating + // system errors. + // string gh_to_iso8601 (timestamp); + // Throw invalid_argument if the conversion fails due to the invalid + // argument and system_error if due to underlying operating system errors. + // timestamp gh_from_iso8601 (const string&); @@ -221,6 +273,9 @@ namespace brep operator<< (ostream&, const gh_check_suite&); ostream& + operator<< (ostream&, const gh_check_suite_ex&); + + ostream& operator<< (ostream&, const gh_check_run&); ostream& diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx index fa701a8..774eeed 100644 --- a/mod/mod-ci-github-gq.cxx +++ b/mod/mod-ci-github-gq.cxx @@ -17,9 +17,11 @@ namespace brep // bottom). // static const string& gq_name (const string&); + static string gq_name (string&&); static string gq_str (const string&); static string gq_bool (bool); static const string& gq_enum (const string&); + static string gq_enum (string&&); [[noreturn]] static void throw_json (json::parser& p, const string& m) @@ -163,6 +165,8 @@ namespace brep // Parse a response to a check_run GraphQL mutation such as `createCheckRun` // or `updateCheckRun`. // + // Throw invalid_json_input. + // // Example response (only the part we need to parse here): // // { @@ -229,7 +233,7 @@ namespace brep gq_mutate_check_runs (const basic_mark& error, vector<check_run>& crs, const string& iat, - string rq) noexcept + string rq) { vector<gh_check_run> rcrs; @@ -271,11 +275,6 @@ namespace brep // Note that GitHub won't allow us to change a built check run to // any other state (but all other transitions are allowed). // - // @@ Are we handling the case where the resulting state (built) - // differs from what we expect? - // - // @@@ Does built-to-built transition updates status? - // if (rst != st && rst != build_state::built) { error << "unexpected check_run status: received '" << rcr.status @@ -302,7 +301,7 @@ namespace brep error << "failed to mutate check runs: error HTTP response status " << sc; } - catch (const json::invalid_json_input& e) + catch (const json::invalid_json_input& e) // struct resp (via github_post()) { // Note: e.name is the GitHub API endpoint. // @@ -310,16 +309,16 @@ namespace brep << e.line << ", column: " << e.column << ", byte offset: " << e.position << ", error: " << e; } - catch (const invalid_argument& e) + catch (const invalid_argument& e) // github_post() { error << "malformed header(s) in response: " << e; } - catch (const system_error& e) + catch (const system_error& e) // github_post() { error << "unable to mutate check runs (errno=" << e.code () << "): " << e.what (); } - catch (const runtime_error& e) // From gq_parse_response_check_runs(). + catch (const runtime_error& e) // gq_parse_response_check_runs() { // GitHub response contained error(s) (could be ours or theirs at this // point). @@ -361,6 +360,9 @@ namespace brep // The details URL argument (`du`) can be empty for queued but not for the // other states. // + // Throw invalid_argument if any of the observed check run members are not + // valid GraphQL values (string, enum, etc). + // static string gq_mutation_create_check_runs (const string& ri, // Repository ID const string& hs, // Head SHA @@ -422,6 +424,9 @@ namespace brep // The details URL argument (`du`) can be empty for queued but not for the // other states. // + // Throw invalid_argument if any of the arguments or observed check run + // members are not valid GraphQL values (string, enum, etc). + // static string gq_mutation_create_check_run (const string& ri, // Repository ID const string& hs, // Head SHA @@ -484,6 +489,9 @@ namespace brep // because GitHub does not allow updating a check run to completed without a // conclusion. // + // Throw invalid_argument if any of the arguments are invalid values (of + // GraphQL types or otherwise). + // static string gq_mutation_update_check_run (const string& ri, // Repository ID. const string& ni, // Node ID. @@ -505,8 +513,19 @@ namespace brep << " status: " << gq_enum (st); if (sa) { - os << '\n'; - os << " startedAt: " << gq_str (gh_to_iso8601 (*sa)); + try + { + os << '\n'; + os << " startedAt: " << gq_str (gh_to_iso8601 (*sa)); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw invalid_argument ("unable to convert started_at value " + + to_string (system_clock::to_time_t (*sa)) + + ": " + e.what ()); + } } if (du) { @@ -634,6 +653,8 @@ namespace brep // Serialize a GraphQL query that fetches a pull request from GitHub. // + // Throw invalid_argument if the node id is not a valid GraphQL string. + // static string gq_query_pr_mergeability (const string& nid) { @@ -658,6 +679,8 @@ namespace brep const string& iat, const string& nid) { + // Let invalid_argument from gq_query_pr_mergeability() propagate. + // string rq (gq_serialize_request (gq_query_pr_mergeability (nid))); try @@ -760,7 +783,7 @@ namespace brep error << "failed to fetch pull request: error HTTP response status " << sc; } - catch (const json::invalid_json_input& e) + catch (const json::invalid_json_input& e) // struct resp (via github_post()) { // Note: e.name is the GitHub API endpoint. // @@ -768,16 +791,16 @@ namespace brep << e.line << ", column: " << e.column << ", byte offset: " << e.position << ", error: " << e; } - catch (const invalid_argument& e) + catch (const invalid_argument& e) // github_post() { error << "malformed header(s) in response: " << e; } - catch (const system_error& e) + catch (const system_error& e) // github_post() { error << "unable to fetch pull request (errno=" << e.code () << "): " << e.what (); } - catch (const runtime_error& e) // From response type's parsing constructor. + catch (const runtime_error& e) // struct resp { // GitHub response contained error(s) (could be ours or theirs at this // point). @@ -804,8 +827,6 @@ namespace brep // // Return the name or throw invalid_argument if it is invalid. // - // @@ TODO: dangerous API. - // static const string& gq_name (const string& v) { @@ -824,6 +845,13 @@ namespace brep return v; } + static string + gq_name (string&& v) + { + gq_name (v); + return move (v); + } + // Serialize a string to GraphQL. // // Return the serialized string or throw invalid_argument if the string is @@ -878,8 +906,6 @@ namespace brep // // Return the enum value or throw invalid_argument if it is invalid. // - // @@ TODO: dangerous API. - // static const string& gq_enum (const string& v) { @@ -888,4 +914,12 @@ namespace brep return gq_name (v); } + + static string + gq_enum (string&& v) + { + gq_enum (v); + return move (v); + } + } diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx index 7c564d7..50950d4 100644 --- a/mod/mod-ci-github-gq.hxx +++ b/mod/mod-ci-github-gq.hxx @@ -24,6 +24,9 @@ namespace brep // `check_runs` with the new data (node id and state_synced). Return false // and issue diagnostics if the request failed. // + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. + // // Note that creating a check_run named `foo` will effectively replace any // existing check_runs with that name. They will still exist on the GitHub // servers but GitHub will only consider the latest one (for display in the @@ -40,6 +43,9 @@ namespace brep // data (node id, state, and state_synced). Return false and issue // diagnostics if the request failed. // + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. + // // If the details_url is absent GitHub will use the app's homepage. // // The gq_built_result is required if the build_state is built because @@ -66,6 +72,9 @@ namespace brep // Update a check run on GitHub. Update `cr` with the new data (state and // state_synced). Return false and issue diagnostics if the request failed. // + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. + // // Note that GitHub allows any state transitions except from built (but // built to built is allowed). The latter case is signalled by setting the // check_run state_synced member to false and the state member to built. @@ -99,6 +108,8 @@ namespace brep // Issue diagnostics and return absent if the request failed (which means it // will be treated by the caller as still being generated). // + // Throw invalid_argument if the node id is invalid. + // // Note that the first request causes GitHub to start preparing the test // merge commit. // diff --git a/mod/mod-ci-github-service-data.cxx b/mod/mod-ci-github-service-data.cxx index 3fbb0eb..9f66a6c 100644 --- a/mod/mod-ci-github-service-data.cxx +++ b/mod/mod-ci-github-service-data.cxx @@ -10,6 +10,15 @@ namespace brep { using event = json::event; + [[noreturn]] static void + throw_json (json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + service_data:: service_data (const string& json) { @@ -32,11 +41,7 @@ namespace brep if (v == "local") kind = local; else if (v == "remote") kind = remote; else - { - throw json::invalid_json_input ( - p.input_name, p.line (), p.column (), p.position (), - "invalid service data kind: '" + v + '\''); - } + throw_json (p, "invalid service data kind: '" + v + '\''); } pre_check = p.next_expect_member_boolean<bool> ("pre_check"); @@ -44,16 +49,13 @@ namespace brep warning_success = p.next_expect_member_boolean<bool> ("warning_success"); - // Installation access token. + // Installation access token (IAT). // - p.next_expect_member_object ("installation_access"); - installation_access.token = p.next_expect_member_string ("token"); - installation_access.expires_at = - gh_from_iso8601 (p.next_expect_member_string ("expires_at")); - p.next_expect (event::end_object); + p.next_expect_name ("installation_access"); + installation_access = gh_installation_access_token (p); - installation_id = - p.next_expect_member_number<uint64_t> ("installation_id"); + app_id = p.next_expect_member_string ("app_id"); + installation_id = p.next_expect_member_string ("installation_id"); repository_node_id = p.next_expect_member_string ("repository_node_id"); repository_clone_url = p.next_expect_member_string ("repository_clone_url"); @@ -82,7 +84,16 @@ namespace brep nid = *v; } - build_state s (to_build_state (p.next_expect_member_string ("state"))); + build_state s; + try + { + s = to_build_state (p.next_expect_member_string ("state")); + } + catch (const invalid_argument& e) + { + throw_json (p, e.what ()); + } + bool ss (p.next_expect_member_boolean<bool> ("state_synced")); optional<result_status> rs; @@ -90,7 +101,14 @@ namespace brep string* v (p.next_expect_member_string_null ("status")); if (v != nullptr) { - rs = bbot::to_result_status (*v); + try + { + rs = bbot::to_result_status (*v); + } + catch (const invalid_argument& e) + { + throw_json (p, e.what ()); + } assert (s == build_state::built); } } @@ -117,7 +135,8 @@ namespace brep service_data (bool ws, string iat_tok, timestamp iat_ea, - uint64_t iid, + string aid, + string iid, string rid, string rcu, kind_type k, @@ -128,7 +147,8 @@ namespace brep : kind (k), pre_check (pc), re_request (rr), warning_success (ws), installation_access (move (iat_tok), iat_ea), - installation_id (iid), + app_id (move (aid)), + installation_id (move (iid)), repository_node_id (move (rid)), repository_clone_url (move (rcu)), check_sha (move (cs)), @@ -143,7 +163,8 @@ namespace brep service_data (bool ws, string iat_tok, timestamp iat_ea, - uint64_t iid, + string aid, + string iid, string rid, string rcu, kind_type k, @@ -156,7 +177,8 @@ namespace brep : kind (k), pre_check (pc), re_request (rr), warning_success (ws), installation_access (move (iat_tok), iat_ea), - installation_id (iid), + app_id (move (aid)), + installation_id (move (iid)), repository_node_id (move (rid)), repository_clone_url (move (rcu)), pr_node_id (move (pid)), @@ -189,13 +211,33 @@ namespace brep s.member ("warning_success", warning_success); - // Installation access token. + // Installation access token (IAT). // s.member_begin_object ("installation_access"); s.member ("token", installation_access.token); - s.member ("expires_at", gh_to_iso8601 (installation_access.expires_at)); + + // IAT expires_at timestamp. + // + { + string v; + try + { + v = gh_to_iso8601 (installation_access.expires_at); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw invalid_argument ("unable to convert IAT expires_at value " + + to_string (system_clock::to_time_t ( + installation_access.expires_at))); + } + s.member ("expires_at", move (v)); + } + s.end_object (); + s.member ("app_id", app_id); s.member ("installation_id", installation_id); s.member ("repository_node_id", repository_node_id); s.member ("repository_clone_url", repository_clone_url); @@ -235,7 +277,7 @@ namespace brep if (cr.status) { assert (cr.state == build_state::built); - s.value (to_string (*cr.status)); + s.value (to_string (*cr.status)); // Doesn't throw. } else s.value (nullptr); diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx index cabd19a..50bb49d 100644 --- a/mod/mod-ci-github-service-data.hxx +++ b/mod/mod-ci-github-service-data.hxx @@ -11,8 +11,6 @@ namespace brep { - // @@@ Check if any data members are unused (once the dust settles). - // Service data associated with the tenant (corresponds to GH check suite). // // It is always a top-level JSON object and the first member is always the @@ -91,7 +89,8 @@ namespace brep // gh_installation_access_token installation_access; - uint64_t installation_id; + string app_id; + string installation_id; string repository_node_id; // GitHub-internal opaque repository id. @@ -99,7 +98,7 @@ namespace brep // The following two are only used for pull requests. // - // @@ TODO/LATER: maybe put them in a struct? + // @@ TODO/LATER: maybe put them in a struct, if more members? // optional<string> pr_node_id; optional<uint32_t> pr_number; @@ -139,6 +138,9 @@ namespace brep // // Throw invalid_argument if the schema version is not supported. // + // Throw invalid_argument (invalid_json_input) in case of malformed JSON + // or any invalid values. + // explicit service_data (const string& json); @@ -150,7 +152,8 @@ namespace brep service_data (bool warning_success, string iat_token, timestamp iat_expires_at, - uint64_t installation_id, + string app_id, + string installation_id, string repository_node_id, string repository_clone_url, kind_type kind, @@ -164,7 +167,8 @@ namespace brep service_data (bool warning_success, string iat_token, timestamp iat_expires_at, - uint64_t installation_id, + string app_id, + string installation_id, string repository_node_id, string repository_clone_url, kind_type kind, @@ -179,6 +183,11 @@ namespace brep // Serialize to JSON. // + // Throw invalid_argument if any values are invalid. + // + // May also throw invalid_json_output but that would be a programming + // error. + // string json () const; }; diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx index 119968d..5bcec98 100644 --- a/mod/mod-ci-github.cxx +++ b/mod/mod-ci-github.cxx @@ -19,26 +19,6 @@ #include <stdexcept> -// @@ Remaining TODOs -// -// - Rerequested checks -// -// - check_suite (action: rerequested): received when user re-runs all -// checks. -// -// - check_run (action: rerequested): received when user re-runs a -// specific check or all failed checks. -// -// @@ TMP I have confirmed that the above is accurate. -// -// Will need to extract a few more fields from check_runs, but the layout -// is very similar to that of check_suite. -// -// - Choose strong webhook secret (when deploying). -// -// - Check that delivery UUID has not been received before (replay attack). -// - // Resources: // // Creating an App: @@ -81,6 +61,8 @@ namespace brep void ci_github:: init (scanner& s) { + HANDLER_DIAG; + { shared_ptr<tenant_service_base> ts ( dynamic_pointer_cast<tenant_service_base> (shared_from_this ())); @@ -98,6 +80,9 @@ namespace brep if (options_->build_config_specified () && options_->ci_github_app_webhook_secret_specified ()) { + if (!options_->ci_github_app_id_private_key_specified ()) + fail << "no app id/private key mappings configured"; + ci_start::init (make_shared<options::ci_start> (*options_)); database_module::init (*options_, options_->build_db_retry ()); @@ -241,33 +226,48 @@ namespace brep fail << "unable to compute request HMAC: " << e; } - // Process the `warning` webhook request query parameter. + // Process the `app-id` and `warning` webhook request query parameters. // + string app_id; bool warning_success; { const name_values& rps (rq.parameters (1024, true /* url_only */)); - auto i (find_if (rps.begin (), rps.end (), - [] (auto&& rp) {return rp.name == "warning";})); + bool ai (false), wa (false); - if (i == rps.end ()) - throw invalid_request (400, - "missing 'warning' webhook query parameter"); + auto badreq = [] (const string& m) + { + throw invalid_request (400, m); + }; - if (!i->value) - throw invalid_request ( - 400, "missing 'warning' webhook query parameter value"); + for (const name_value& rp: rps) + { + if (rp.name == "app-id") + { + if (!rp.value) + badreq ("missing 'app-id' webhook query parameter value"); - const string& v (*i->value); + ai = true; + app_id = *rp.value; + } + else if (rp.name == "warning") + { + if (!rp.value) + badreq ("missing 'warning' webhook query parameter value"); - if (v == "success") warning_success = true; - else if (v == "failure") warning_success = false; - else - { - throw invalid_request ( - 400, - "invalid 'warning' webhook query parameter value: '" + v + '\''); + wa = true; + const string& v (*rp.value); + + if (v == "success") warning_success = true; + else if (v == "failure") warning_success = false; + else + badreq ("invalid 'warning' webhook query parameter value: '" + v + + '\''); + } } + + if (!ai) badreq ("missing 'app-id' webhook query parameter"); + if (!wa) badreq ("missing 'warning' webhook query parameter"); } // There is a webhook event (specified in the x-github-event header) and @@ -280,9 +280,6 @@ namespace brep // is that we want be "notified" of new actions at which point we can // decide whether to ignore them or to handle. // - // @@ There is also check_run even (re-requested by user, either - // individual check run or all the failed check runs). - // if (event == "check_suite") { gh_check_suite_event cs; @@ -302,6 +299,12 @@ namespace brep throw invalid_request (400, move (m)); } + if (cs.check_suite.app_id != app_id) + { + fail << "webhook check_suite app.id " << cs.check_suite.app_id + << " does not match app-id query parameter " << app_id; + } + if (cs.action == "requested") { return handle_check_suite_request (move (cs), warning_success); @@ -316,13 +319,10 @@ namespace brep else if (cs.action == "completed") { // GitHub thinks that "all the check runs in this check suite have - // completed and a conclusion is available". Looks like this one we - // ignore? - // - // What if our bookkeeping says otherwise? But then we can't even - // access the service data easily here. @@ TODO: maybe/later. + // completed and a conclusion is available". Check with our own + // bookkeeping and log an error if there is a mismatch. // - return true; + return handle_check_suite_completed (move (cs), warning_success); } else { @@ -353,6 +353,12 @@ namespace brep throw invalid_request (400, move (m)); } + if (cr.check_run.app_id != app_id) + { + fail << "webhook check_run app.id " << cr.check_run.app_id + << " does not match app-id query parameter " << app_id; + } + if (cr.action == "rerequested") { // Someone manually requested to re-run a specific check run. @@ -398,7 +404,16 @@ namespace brep throw invalid_request (400, move (m)); } - if (pr.action == "opened" || pr.action == "synchronize") + // Store the app-id webhook query parameter in the gh_pull_request + // object (see gh_pull_request for an explanation). + // + // When we receive the other webhooks we do check that the app ids in + // the payload and query match but here we have to assume it is valid. + // + pr.pull_request.app_id = app_id; + + if (pr.action == "opened" || + pr.action == "synchronize") { // opened // A pull request was opened. @@ -408,18 +423,74 @@ namespace brep // new commits were pushed to the head branch. (Note that there is // no equivalent event for the base branch.) // - // Note that both cases are handled the same: we start a new CI + // Note that both cases are handled similarly: we start a new CI // request which will be reported on the new commit id. // return handle_pull_request (move (pr), warning_success); } - else + else if (pr.action == "edited") + { + // PR base branch changed (to a different branch) besides other + // irrelevant changes (title, body, etc). + // + // This is in a sense a special case of the base branch moving. In + // that case we don't do anything (due to the head sharing problem) + // relying instead on the branch protection rule. So it makes sense + // to do the same here. + // + return true; + } + else if (pr.action == "closed") + { + // PR has been closed (as merged or not; see merged member). Also + // apparently received if base branch is deleted (and the same + // for head branch). See also the reopened event below. + // + // While it may seem natural to cancel the CI for the closed PR, it + // might actually be useful to have a completed CI record. GitHub + // doesn't prevent us from publishing CI results for the closed PR + // (even if both base and head branches were deleted). And if such a + // PR is reopened, the CI results remain. + // + return true; + } + else if (pr.action == "reopened") { - // Ignore the remaining actions by sending a 200 response with empty - // body. + // Previously closed PR has been reopened. + // + // Since we don't cancel the CI for a closed PR, there is nothing + // to do if it is reopened. + // + return true; + } + else if (pr.action == "assigned" || + pr.action == "auto_merge_disabled" || + pr.action == "auto_merge_enabled" || + pr.action == "converted_to_draft" || + pr.action == "demilestoned" || + pr.action == "dequeued" || + pr.action == "enqueued" || + pr.action == "labeled" || + pr.action == "locked" || + pr.action == "milestoned" || + pr.action == "ready_for_review" || + pr.action == "review_request_removed" || + pr.action == "review_requested" || + pr.action == "unassigned" || + pr.action == "unlabeled" || + pr.action == "unlocked") + { + // These have no relation to CI. // - // @@ Ignore known but log unknown, as in check_suite above? + return true; + } + else + { + // Ignore unknown actions by sending a 200 response with empty body + // but also log as an error since we want to notice new actions. // + error << "unknown action '" << pr.action << "' in pull_request event"; + return true; } } @@ -488,7 +559,7 @@ namespace brep // let's obtain it to flush out any permission issues early. Also, it is // valid for an hour so we will most likely make use of it. // - optional<string> jwt (generate_jwt (trace, error)); + optional<string> jwt (generate_jwt (cs.check_suite.app_id, trace, error)); if (!jwt) throw server_error (); @@ -501,10 +572,6 @@ namespace brep l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); - // @@ What happens if we call this functions with an already existing - // node_id (e.g., replay attack). See the UUID header above. - // - // While it would have been nice to cancel CIs of PRs with this branch as // base not to waste resources, there are complications: Firstly, we can // only do this for remote PRs (since local PRs will most likely share the @@ -542,10 +609,9 @@ namespace brep { kind = service_data::remote; - if (optional<pair<tenant_service, bool>> p = - find (*build_db_, "ci-github", sid)) + if (optional<tenant_data> d = find (*build_db_, "ci-github", sid)) { - tenant_service& ts (p->first); + tenant_service& ts (d->service); try { @@ -577,6 +643,7 @@ namespace brep service_data sd (warning_success, iat->token, iat->expires_at, + cs.check_suite.app_id, cs.installation.id, move (cs.repository.node_id), move (cs.repository.clone_url), @@ -634,8 +701,128 @@ namespace brep return true; } + bool ci_github:: + handle_check_suite_completed (gh_check_suite_event cs, bool warning_success) + { + // The plans is as follows: + // + // 1. Load the service data. + // + // 2. Verify it is completed. + // + // 3. Verify the check run counts match. + // + // 4. Verify (like in build_built()) that all the check runs are + // completed. + // + // 5. Verify the result matches what GitHub thinks it is. + + HANDLER_DIAG; + + l3 ([&]{trace << "check_suite event { " << cs << " }";}); + + // Service id that uniquely identifies the CI tenant. + // + string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha); + + // The common log entry subject. + // + string sub ("check suite " + cs.check_suite.node_id + '/' + sid); + + // Load the service data. + // + service_data sd; + + if (optional<tenant_data> d = find (*build_db_, "ci-github", sid)) + { + try + { + sd = service_data (*d->service.data); + } + catch (const invalid_argument& e) + { + fail << "failed to parse service data: " << e; + } + } + else + { + error << sub << ": tenant_service does not exist"; + return true; + } + + // Verify the completed flag and the number of check runs. + // + if (!sd.completed) + { + error << sub << " service data complete flag is false"; + return true; + } + + // Received count will be one higher because we don't store the conclusion + // check run. + // + size_t check_runs_count (sd.check_runs.size () + 1); + + if (check_runs_count == 1) + { + error << sub << ": no check runs in service data"; + return true; + } + + if (cs.check_suite.check_runs_count != check_runs_count) + { + error << sub << ": check runs count " << cs.check_suite.check_runs_count + << " does not match service data count " << check_runs_count; + return true; + } + + // Verify that all the check runs are built and compute the summary + // conclusion. + // + result_status conclusion (result_status::success); + + for (const check_run& cr: sd.check_runs) + { + if (cr.state == build_state::built) + { + assert (cr.status.has_value ()); + conclusion |= *cr.status; + } + else + { + error << sub << ": unbuilt check run in service data"; + return true; + } + } + + // Verify the conclusion. + // + if (!cs.check_suite.conclusion) + { + error << sub << ": absent conclusion in completed check suite"; + return true; + } + + // Note that the case mismatch is due to GraphQL (gh_conclusion()) + // requiring uppercase conclusion values while the received webhook values + // are lower case. + // + string gh_conclusion (gh_to_conclusion (conclusion, warning_success)); + + if (icasecmp (*cs.check_suite.conclusion, gh_conclusion) != 0) + { + error << sub << ": conclusion " << *cs.check_suite.conclusion + << " does not match service data conclusion " << gh_conclusion; + return true; + } + + return true; + } + // Create a gq_built_result. // + // Throw invalid_argument in case of invalid result_status. + // static gq_built_result make_built_result (result_status rs, bool warning_success, string message) { @@ -705,7 +892,7 @@ namespace brep auto get_iat = [this, &trace, &error, &cr] () -> optional<gh_installation_access_token> { - optional<string> jwt (generate_jwt (trace, error)); + optional<string> jwt (generate_jwt (cr.check_run.app_id, trace, error)); if (!jwt) return nullopt; @@ -737,15 +924,15 @@ namespace brep // archived. // service_data sd; + string tenant_id; { // Service id that uniquely identifies the CI tenant. // string sid (repo_node_id + ':' + head_sha); - if (optional<pair<tenant_service, bool>> p = - find (*build_db_, "ci-github", sid)) + if (optional<tenant_data> d = find (*build_db_, "ci-github", sid)) { - if (p->second) // Tenant is archived + if (d->archived) // Tenant is archived { // Fail (re-create) the check runs. // @@ -801,7 +988,7 @@ namespace brep return true; } - tenant_service& ts (p->first); + tenant_service& ts (d->service); try { @@ -811,6 +998,8 @@ namespace brep { fail << "failed to parse service data: " << e; } + + tenant_id = d->tenant_id; } else { @@ -958,7 +1147,9 @@ namespace brep // only if the build is actually restarted). // auto update_sd = [&error, &new_iat, &race, - &cr, &bcr, &ccr] (const tenant_service& ts, + tenant_id = move (tenant_id), + &cr, &bcr, &ccr] (const string& ti, + const tenant_service& ts, build_state) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction @@ -966,6 +1157,16 @@ namespace brep race = false; // Reset. + if (tenant_id != ti) + { + // The tenant got replaced since we loaded it but we managed to + // trigger a rebuild in the new tenant. Who knows whose check runs are + // visible, so let's fail ours similar to the cases below. + // + race = true; + return nullopt; + } + service_data sd; try { @@ -1109,312 +1310,6 @@ namespace brep return true; } - // @@ TMP - // -#if 0 - bool ci_github:: - handle_check_run_rerequest (const gh_check_run_event& cr, - bool warning_success) - { - HANDLER_DIAG; - - l3 ([&]{trace << "check_run event { " << cr << " }";}); - - // Fail if this is the conclusion check run. - // - if (cr.check_run.name == conclusion_check_run_name) - { - // @@ Fail conclusion check run with appropriate message and reurn - // true. - - l3 ([&]{trace << "ignoring conclusion check_run";}); - - // 422 Unprocessable Content: The request was well-formed (i.e., - // syntactically correct) but could not be processed. - // - throw invalid_request (422, "Conclusion check run cannot be rebuilt"); - } - - // Get a new installation access token. - // - auto get_iat = [this, &trace, &error, &cr] () - -> optional<gh_installation_access_token> - { - optional<string> jwt (generate_jwt (trace, error)); - if (!jwt) - return nullopt; - - optional<gh_installation_access_token> iat ( - obtain_installation_access_token (cr.installation.id, - move (*jwt), - error)); - - if (iat) - l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); - - return iat; - }; - - // Create a new conclusion check run, replacing the existing one. - // - // Return the check run on success or nullopt on failure. - // - auto create_conclusion_cr = - [&cr, &error, warning_success] (const gh_installation_access_token& iat, - build_state bs, - optional<result_status> rs = nullopt, - optional<string> msg = nullopt) - -> optional<check_run> - { - optional<gq_built_result> br; - if (rs) - { - assert (msg); - - br = make_built_result (*rs, warning_success, move (*msg)); - } - - check_run r; - r.name = conclusion_check_run_name; - - if (gq_create_check_run (error, r, iat.token, - rni, hs, - nullopt /* details_url */, - bs, move (br))) - { - return r; - } - else - return nullopt; - }; - - // The overall plan is as follows: - // - // 1. Call the rebuild() function to attempt to schedule a rebuild. Pass - // the update function that does the following (if called): - // - // a. Update the check run being rebuilt (may also not exist). - // - // b. Clear the completed flag if true. - // - // c. "Return" the service data to be used after the call. - // - // 2. If the result of rebuild() indicates the tenant is archived, fail - // the conclusion check run with appropriate diagnostics. - // - // 3. If original state is queued, then no rebuild was scheduled and we do - // nothing. - // - // 4. Otherwise (the original state is building or built): - // - // a. Change the check run state to queued. - // - // b. Change the conclusion check run to building (do unconditionally - // to mitigate races). - // - // Note that while conceptually we are updating existing check runs, in - // practice we have to create new check runs to replace the existing ones - // because GitHub does not allow transitioning out of the built state. - // - // This results in a new node id for each check run but we can't save them - // to the service data after the rebuild() call. As a workaround, when - // updating the service data we 1) clear the re-requested check run's node - // id and set the state_synced flag to true to signal to build_building() - // and build_built() that it needs to create a new check run; and 2) clear - // the conclusion check run's node id to cause build_built() to create a - // new conclusion check run. And these two check runs' node ids will be - // saved to the service data. - - // Parse the check_run's details_url to extract build id. - // - // While this is a bit hackish, there doesn't seem to be a better way - // (like associating custom data with a check run). Note that the GitHub - // UI only allows rebuilding completed check runs, so the details URL - // should be there. - // - optional<build_id> bid (parse_details_url (cr.check_run.details_url)); - if (!bid) - { - fail << "check run " << cr.check_run.node_id - << ": failed to extract build id from details_url"; - } - - // The IAT retrieved from the service data. - // - optional<gh_installation_access_token> iat; - - // True if the check run exists in the service data. - // - bool cr_found (false); - - // Update the state of the check run in the service data. Return (via - // captured references) the IAT and whether the check run was found. - // - // Called by rebuild(), but only if the build is actually restarted. - // - auto update_sd = [&iat, - &cr_found, - &error, - &cr] (const tenant_service& ts, build_state) - -> optional<string> - { - // NOTE: this lambda may be called repeatedly (e.g., due to transaction - // being aborted) and so should not move out of its captures. - - service_data sd; - try - { - sd = service_data (*ts.data); - } - catch (const invalid_argument& e) - { - error << "failed to parse service data: " << e; - return nullptr; - } - - if (!iat) - iat = sd.installation_access; - - // If the re-requested check run is found, update it in the service - // data. - // - const string& nid (cr.check_run.node_id); - - for (check_run& cr: sd.check_runs) - { - if (cr.node_id && *cr.node_id == nid) - { - cr_found = true; - cr.state = build_state::queued; - sd.completed = false; - - // Clear the check run node ids and set state_synced to true to - // cause build_building() and/or build_built() to create new check - // runs (see the plan above for details). - // - cr.node_id = nullopt; - cr.state_synced = true; - sd.conclusion_node_id = nullopt; - - return sd.json (); - } - } - - return nullopt; - }; - - optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd)); - - if (!bs) - { - // Build has expired (most probably the tenant has been archived). - // - // Update the conclusion check run to notify the user (but have to - // replace it with a new one because we don't know the existing one's - // node id). - // - optional<gh_installation_access_token> iat (get_iat ()); - if (!iat) - throw server_error (); - - if (optional<check_run> ccr = create_conclusion_cr ( - *iat, - build_state::built, - result_status::error, - "Unable to rebuild: tenant has been archived or no such build")) - { - l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";}); - } - else - { - // Log the error and return failure to GitHub which will presumably - // indicate this in its GUI. - // - fail << "check run " << cr.check_run.node_id - << ": unable to create conclusion check run"; - } - } - else if (*bs == build_state::queued) - { - // The build was already queued so nothing to be done. This might happen - // if the user clicked "re-run" multiple times before we managed to - // update the check run. - } - else - { - // The build has been requeued. - // - assert (*bs == build_state::building || *bs == build_state::built); - - if (!cr_found) - { - // Respond with an error otherwise GitHub will post a message in its - // GUI saying "you have successfully requested a rebuild of ..." - // - fail << "check_run " << cr.check_run.node_id - << ": build restarted but check run does not exist " - << "in service data"; - } - - // Get a new IAT if the one from the service data has expired. - // - assert (iat.has_value ()); - - if (system_clock::now () > iat->expires_at) - { - iat = get_iat (); - if (!iat) - throw server_error (); - } - - // Update (by replacing) the re-requested and conclusion check runs to - // queued and building, respectively. - // - // If either fails we can only log the error but build_building() and/or - // build_built() should correct the situation (see above for details). - // - - // Update re-requested check run. - // - { - check_run ncr; // New check run. - ncr.name = cr.check_run.name; - - if (gq_create_check_run (error, - ncr, - iat->token, - cr.repository.node_id, - cr.check_run.check_suite.head_sha, - cr.check_run.details_url, - build_state::queued)) - { - l3 ([&]{trace << "created check_run { " << ncr << " }";}); - } - else - { - error << "check_run " << cr.check_run.node_id - << ": unable to create (to update) check run in queued state"; - } - } - - // Update conclusion check run. - // - if (optional<check_run> ccr = - create_conclusion_cr (*iat, build_state::building)) - { - l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";}); - } - else - { - error << "check_run " << cr.check_run.node_id - << ": unable to create (to update) conclusion check run"; - } - } - - return true; - } -#endif - // Miscellaneous pull request facts // // - Although some of the GitHub documentation makes it sound like they @@ -1433,30 +1328,6 @@ namespace brep // gets updated with the head commit's SHA and check_suite.pull_requests[] // will contain all PRs with this branch as head. // - // Remaining TODOs - // - // - @@ TODO? PR base branch changed (to a different branch) - // - // => pull_request(edited) - // - // - PR closed @@ TODO - // - // Also received if base branch is deleted. (And presumably same for head - // branch.) - // - // => pull_request(closed) - // - // Cancel CI? - // - // - PR merged @@ TODO - // - // => pull_request(merged) - // - // => check_suite(PR_base) - // - // Probably wouldn't want to CI the base again because the PR CI would've - // done the equivalent already. - // bool ci_github:: handle_pull_request (gh_pull_request_event pr, bool warning_success) { @@ -1468,7 +1339,7 @@ namespace brep // let's obtain it to flush out any permission issues early. Also, it is // valid for an hour so we will most likely make use of it. // - optional<string> jwt (generate_jwt (trace, error)); + optional<string> jwt (generate_jwt (pr.pull_request.app_id, trace, error)); if (!jwt) throw server_error (); @@ -1549,6 +1420,7 @@ namespace brep service_data sd (warning_success, move (iat->token), iat->expires_at, + pr.pull_request.app_id, pr.installation.id, move (pr.repository.node_id), move (pr.repository.clone_url), @@ -1592,10 +1464,13 @@ namespace brep return true; } - function<optional<string> (const tenant_service&)> ci_github:: - build_unloaded (tenant_service&& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded (const string& ti, + tenant_service&& ts, const diag_epilogue& log_writer) const noexcept { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -1610,15 +1485,24 @@ namespace brep } return sd.pre_check - ? build_unloaded_pre_check (move (ts), move (sd), log_writer) - : build_unloaded_load (move (ts), move (sd), log_writer); + ? build_unloaded_pre_check (move (ts), move (sd), log_writer) + : build_unloaded_load (ti, move (ts), move (sd), log_writer); } - function<optional<string> (const tenant_service&)> ci_github:: + function<optional<string> (const string&, const tenant_service&)> ci_github:: build_unloaded_pre_check (tenant_service&& ts, service_data&& sd, const diag_epilogue& log_writer) const noexcept + try { + // NOTE: this function is noexcept and should not throw. + // + // In a few places where invalid_argument is unlikely to be thrown and/or + // would indicate that things are seriously broken we let it propagate to + // the function catch block where the pre-check tenant will be canceled + // (otherwise we could end up in an infinite loop, e.g., because the + // problematic arguments won't change). + NOTIFICATION_DIAG (log_writer); // We get here for PRs only (but both local and remote). The overall @@ -1644,6 +1528,8 @@ namespace brep // Request PR pre-check info (triggering the generation of the test merge // commit on the GitHub's side). // + // Let unlikely invalid_argument propagate (see above). + // optional<gq_pr_pre_check_info> pc ( gq_fetch_pull_request_pre_check_info (error, sd.installation_access.token, @@ -1703,38 +1589,50 @@ namespace brep // notifications until (1) we load the tenant, (2) we cancel it, or (3) // it gets archived after some timeout. // - if (auto pr = create (error, warn, verb_ ? &trace : nullptr, - *build_db_, retry_, - tenant_service (sid, "ci-github", sd.json ()), - chrono::seconds (30) /* interval */, - chrono::seconds (0) /* delay */, - duplicate_tenant_mode::ignore)) - { - if (pr->second == duplicate_tenant_result::ignored) + try + { + if (auto pr = create (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + tenant_service (sid, "ci-github", sd.json ()), + chrono::seconds (30) /* interval */, + chrono::seconds (0) /* delay */, + duplicate_tenant_mode::ignore)) { - // This PR is sharing a head commit with something else. - // - // If this is a local PR then it's probably the branch push, which - // is expected, so do nothing. - // - // If this is a remote PR then it could be anything (branch push, - // local PR, or another remote PR) which in turn means the CI result - // may end up being for head, not merge commit. There is nothing we - // can do about it on our side (the user can enable the head-behind- - // base protection on their side). - // - if (sd.kind == service_data::remote) + if (pr->second == duplicate_tenant_result::ignored) { - l3 ([&]{trace << "remote pull request " << *sd.pr_node_id - << ": CI tenant already exists for " << sid;}); + // This PR is sharing a head commit with something else. + // + // If this is a local PR then it's probably the branch push, which + // is expected, so do nothing. + // + // If this is a remote PR then it could be anything (branch push, + // local PR, or another remote PR) which in turn means the CI + // result may end up being for head, not merge commit. There is + // nothing we can do about it on our side (the user can enable the + // head-behind- base protection on their side). + // + if (sd.kind == service_data::remote) + { + l3 ([&]{trace << "remote pull request " << *sd.pr_node_id + << ": CI tenant already exists for " << sid;}); + } } } + else + { + error << "pull request " << *sd.pr_node_id + << ": failed to create unloaded CI tenant " + << "with tenant_service id " << sid; + + // Fall through to cancel. + } } - else + catch (const runtime_error& e) // Database retries exhausted. { error << "pull request " << *sd.pr_node_id - << ": unable to create unloaded CI tenant " - << "with tenant_service id " << sid; + << ": failed to create unloaded CI tenant " + << "with tenant_service id " << sid + << ": " << e.what (); // Fall through to cancel. } @@ -1742,26 +1640,70 @@ namespace brep // Cancel the pre-check tenant. // - if (!cancel (error, warn, verb_ ? &trace : nullptr, - *build_db_, retry_, - ts.type, - ts.id)) + try + { + if (!cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + ts.type, + ts.id)) + { + // Should never happen (no such tenant). + // + error << "pull request " << *sd.pr_node_id + << ": failed to cancel pre-check tenant with tenant_service id " + << ts.id; + } + } + catch (const runtime_error& e) // Database retries exhausted. { - // Should never happen (no such tenant). - // error << "pull request " << *sd.pr_node_id << ": failed to cancel pre-check tenant with tenant_service id " - << ts.id; + << ts.id << ": " << e.what (); + } + + return nullptr; + } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + error << "pull request " << *sd.pr_node_id + << ": unhandled exception: " << e.what (); + + // Cancel the pre-check tenant otherwise we could end up in an infinite + // loop (see top of function). + // + try + { + if (cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + ts.type, + ts.id)) + l3 ([&]{trace << "canceled pre-check tenant " << ts.id;}); + } + catch (const runtime_error& e) // Database retries exhausted. + { + l3 ([&]{trace << "failed to cancel pre-check tenant " << ts.id << ": " + << e.what ();}); } return nullptr; } - function<optional<string> (const tenant_service&)> ci_github:: - build_unloaded_load (tenant_service&& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded_load (const string& tenant_id, + tenant_service&& ts, service_data&& sd, const diag_epilogue& log_writer) const noexcept + try { + // NOTE: this function is noexcept and should not throw. + // + // In a few places where invalid_argument is unlikely to be thrown and/or + // would indicate that things are seriously broken we let it propagate to + // the function catch block where the tenant will be canceled (otherwise + // we could end up in an infinite loop, e.g., because the problematic + // arguments won't change). + NOTIFICATION_DIAG (log_writer); // Load the tenant, which is essentially the same for both branch push and @@ -1782,7 +1724,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (optional<string> jwt = generate_jwt (trace, error)) + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) { new_iat = obtain_installation_access_token (sd.installation_id, move (*jwt), @@ -1807,6 +1749,8 @@ namespace brep check_run cr; cr.name = move (name); + // Let unlikely invalid_argument propagate (see above). + // if (gq_create_check_run (error, cr, iat->token, @@ -1833,12 +1777,16 @@ namespace brep { assert (!node_id.empty ()); + // Let unlikely invalid_argument propagate (see above). + // gq_built_result br ( make_built_result (rs, sd.warning_success, move (summary))); check_run cr; cr.name = name; // For display purposes only. + // Let unlikely invalid_argument propagate (see above). + // if (gq_update_check_run (error, cr, iat->token, @@ -1865,16 +1813,24 @@ namespace brep // string conclusion_node_id; // Conclusion check run node ID. - if (auto cr = create_synthetic_cr (conclusion_check_run_name)) + if (!sd.conclusion_node_id) { - l3 ([&]{trace << "created check_run { " << *cr << " }";}); + if (auto cr = create_synthetic_cr (conclusion_check_run_name)) + { + l3 ([&]{trace << "created check_run { " << *cr << " }";}); - conclusion_node_id = move (*cr->node_id); + conclusion_node_id = move (*cr->node_id); + } } + const string& effective_conclusion_node_id ( + sd.conclusion_node_id + ? *sd.conclusion_node_id + : conclusion_node_id); + // Load the CI tenant if the conclusion check run was created. // - if (!conclusion_node_id.empty ()) + if (!effective_conclusion_node_id.empty ()) { string ru; // Repository URL. @@ -1891,46 +1847,65 @@ namespace brep else ru = sd.repository_clone_url + '#' + sd.check_sha; + // Let unlikely invalid_argument propagate (see above). + // repository_location rl (move (ru), repository_type::git); - optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr, - *build_db_, retry_, - move (ts), - move (rl))); - - if (!r || r->status != 200) + try { - if (auto cr = update_synthetic_cr (conclusion_node_id, - conclusion_check_run_name, - result_status::error, - to_check_run_summary (r))) - { - l3 ([&]{trace << "updated check_run { " << *cr << " }";}); - } - else + optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + move (ts), + move (rl))); + + if (!r || r->status != 200) { - // Nothing really we can do in this case since we will not receive - // any further notifications. Log the error as a last resort. + // Let unlikely invalid_argument propagate (see above). + // + if (auto cr = update_synthetic_cr (effective_conclusion_node_id, + conclusion_check_run_name, + result_status::error, + to_check_run_summary (r))) + { + l3 ([&]{trace << "updated check_run { " << *cr << " }";}); + } + else + { + // Nothing really we can do in this case since we will not receive + // any further notifications. Log the error as a last resort. - error << "failed to load CI tenant " << ts.id - << " and unable to update conclusion"; + error << "failed to load CI tenant " << ts.id + << " and unable to update conclusion"; + } + + return nullptr; // No need to update service data in this case. } + } + catch (const runtime_error& e) // Database retries exhausted. + { + error << "failed to load CI tenant " << ts.id << ": " << e.what (); - return nullptr; // No need to update service data in this case. + // Fall through to retry on next call. } } - else if (!new_iat) - return nullptr; // Nothing to save (but retry on next call). + + if (!new_iat && conclusion_node_id.empty ()) + return nullptr; // Nothing to save (but potentially retry on next call). return [&error, + tenant_id, iat = move (new_iat), cni = move (conclusion_node_id)] - (const tenant_service& ts) -> optional<string> + (const string& ti, + const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to // transaction being aborted) and so should not move out of its // captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -1951,6 +1926,28 @@ namespace brep return sd.json (); }; } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + error << "CI tenant " << ts.id << ": unhandled exception: " << e.what (); + + // Cancel the tenant otherwise we could end up in an infinite loop (see + // top of function). + // + try + { + if (cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, ts.type, ts.id)) + l3 ([&]{trace << "canceled CI tenant " << ts.id;}); + } + catch (const runtime_error& e) // Database retries exhausted. + { + l3 ([&]{trace << "failed to cancel CI tenant " << ts.id + << ": " << e.what ();}); + } + + return nullptr; + } // Build state change notifications (see tenant-services.hxx for // background). Mapping our state transitions to GitHub pose multiple @@ -1961,9 +1958,9 @@ namespace brep // them when notifying GitHub. The first is not important (we expect the // state to go back to building shortly). The second should normally not // happen and would mean that a completed check suite may go back on its - // conclusion (which would be pretty confusing for the user). @@@ This - // can/will happen on check run rebuild. Distinguish between internal - // and external rebuilds? + // conclusion (which would be pretty confusing for the user). Note that + // the ->queued state transition of a check run rebuild triggered by + // us is handled directly in handle_check_run_rerequest(). // // So, for GitHub notifications, we only have the following linear // transition sequence: @@ -2040,13 +2037,17 @@ namespace brep // if we have node_id, then we update, otherwise, we create (potentially // overriding the check run created previously). // - function<optional<string> (const tenant_service&)> ci_github:: - build_queued (const tenant_service& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_queued (const string& tenant_id, + const tenant_service& ts, const vector<build>& builds, optional<build_state> istate, const build_queued_hints& hs, const diag_epilogue& log_writer) const noexcept + try { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -2126,7 +2127,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (optional<string> jwt = generate_jwt (trace, error)) + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) { new_iat = obtain_installation_access_token (sd.installation_id, move (*jwt), @@ -2146,6 +2147,8 @@ namespace brep { // Create a check_run for each build as a single request. // + // Let unlikely invalid_argument propagate. + // if (gq_create_check_runs (error, crs, iat->token, @@ -2161,15 +2164,20 @@ namespace brep } } - return [bs = move (bs), + return [tenant_id, + bs = move (bs), iat = move (new_iat), crs = move (crs), error = move (error), - warn = move (warn)] (const tenant_service& ts) -> optional<string> + warn = move (warn)] (const string& ti, + const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -2209,12 +2217,24 @@ namespace brep return sd.json (); }; } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + error << "CI tenant " << ts.id << ": unhandled exception: " << e.what (); + + return nullptr; + } - function<optional<string> (const tenant_service&)> ci_github:: - build_building (const tenant_service& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_building (const string& tenant_id, + const tenant_service& ts, const build& b, const diag_epilogue& log_writer) const noexcept + try { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -2272,7 +2292,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (optional<string> jwt = generate_jwt (trace, error)) + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) { new_iat = obtain_installation_access_token (sd.installation_id, move (*jwt), @@ -2290,6 +2310,8 @@ namespace brep // if (iat != nullptr) { + // Let unlikely invalid_argument propagate. + // if (gq_update_check_run (error, *cr, iat->token, @@ -2313,14 +2335,19 @@ namespace brep } } - return [iat = move (new_iat), + return [tenant_id, + iat = move (new_iat), cr = move (*cr), error = move (error), - warn = move (warn)] (const tenant_service& ts) -> optional<string> + warn = move (warn)] (const string& ti, + const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -2355,18 +2382,31 @@ namespace brep return sd.json (); }; } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + string bid (gh_check_run_name (b)); // Full build id. + + error << "check run " << bid << ": unhandled exception: " << e.what(); + + return nullptr; + } - function<optional<string> (const tenant_service&)> ci_github:: - build_built (const tenant_service& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_built (const string& tenant_id, + const tenant_service& ts, const build& b, const diag_epilogue& log_writer) const noexcept + try { - // @@ TODO Include service_data::event_node_id and perhaps ts.id in - // diagnostics? E.g. when failing to update check runs we print the - // build ID only. - // + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); + // @@ TODO Include ts.id in diagnostics? Check run build ids alone seem + // kind of meaningless. Log lines get pretty long this way however. + service_data sd; try { @@ -2458,7 +2498,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (optional<string> jwt = generate_jwt (trace, error)) + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) { new_iat = obtain_installation_access_token (sd.installation_id, move (*jwt), @@ -2485,6 +2525,11 @@ namespace brep { using namespace web::xhtml; + // Note: let all serialization exceptions propagate. The XML + // serialization code can throw bad_alloc or xml::serialization in + // case of I/O failures, but we're serializing to a string stream so + // both exceptions are unlikely. + // ostringstream os; xml::serializer s (os, "check_run_summary"); @@ -2580,7 +2625,8 @@ namespace brep if (cr.node_id) { - // Update existing check run to built. + // Update existing check run to built. Let unlikely invalid_argument + // propagate. // if (gq_update_check_run (error, cr, @@ -2597,7 +2643,7 @@ namespace brep } else { - // Create new check run. + // Create new check run. Let unlikely invalid_argument propagate. // // Note that we don't have build hints so will be creating this check // run with the full build id as name. In the unlikely event that an @@ -2645,6 +2691,8 @@ namespace brep cr.node_id = *sd.conclusion_node_id; cr.name = conclusion_check_run_name; + // Let unlikely invalid_argument propagate. + // if (gq_update_check_run (error, cr, iat->token, @@ -2671,15 +2719,20 @@ namespace brep } } - return [iat = move (new_iat), + return [tenant_id, + iat = move (new_iat), cr = move (cr), completed = completed, error = move (error), - warn = move (warn)] (const tenant_service& ts) -> optional<string> + warn = move (warn)] (const string& ti, + const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -2749,6 +2802,16 @@ namespace brep return sd.json (); }; } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + string bid (gh_check_run_name (b)); // Full build id. + + error << "check run " << bid << ": unhandled exception: " << e.what(); + + return nullptr; + } string ci_github:: details_url (const build& b) const @@ -2860,19 +2923,32 @@ namespace brep } optional<string> ci_github:: - generate_jwt (const basic_mark& trace, + generate_jwt (const string& app_id, + const basic_mark& trace, const basic_mark& error) const { string jwt; try { + // Look up the private key path for the app id and fail if not found. + // + const map<string, dir_path>& pks ( + options_->ci_github_app_id_private_key ()); + + auto pk (pks.find (app_id)); + if (pk == pks.end ()) + { + error << "unable to generate JWT: " + << "no private key configured for app id " << app_id; + return nullopt; + } + // Set token's "issued at" time 60 seconds in the past to combat clock // drift (as recommended by GitHub). // jwt = brep::generate_jwt ( *options_, - options_->ci_github_app_private_key (), - to_string (options_->ci_github_app_id ()), + pk->second, app_id, chrono::seconds (options_->ci_github_jwt_validity_period ()), chrono::seconds (60)); @@ -2928,7 +3004,7 @@ namespace brep // example. // optional<gh_installation_access_token> ci_github:: - obtain_installation_access_token (uint64_t iid, + obtain_installation_access_token (const string& iid, string jwt, const basic_mark& error) const { @@ -2937,7 +3013,7 @@ namespace brep { // API endpoint. // - string ep ("app/installations/" + to_string (iid) + "/access_tokens"); + string ep ("app/installations/" + iid + "/access_tokens"); uint16_t sc ( github_post (iat, ep, strings {"Authorization: Bearer " + jwt})); @@ -2963,6 +3039,8 @@ namespace brep // iat.expires_at -= chrono::minutes (5); } + // gh_installation_access_token (via github_post()) + // catch (const json::invalid_json_input& e) { // Note: e.name is the GitHub API endpoint. @@ -2972,12 +3050,12 @@ namespace brep << e.position << ", error: " << e; return nullopt; } - catch (const invalid_argument& e) + catch (const invalid_argument& e) // github_post() { error << "malformed header(s) in response: " << e; return nullopt; } - catch (const system_error& e) + catch (const system_error& e) // github_post() { error << "unable to get installation access token (errno=" << e.code () << "): " << e.what (); diff --git a/mod/mod-ci-github.hxx b/mod/mod-ci-github.hxx index b88d5e4..059801a 100644 --- a/mod/mod-ci-github.hxx +++ b/mod/mod-ci-github.hxx @@ -42,33 +42,40 @@ namespace brep virtual const cli::options& cli_options () const {return options::ci_github::description ();} - virtual function<optional<string> (const tenant_service&)> - build_unloaded (tenant_service&&, + virtual function<optional<string> (const string&, const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, const diag_epilogue& log_writer) const noexcept override; - function<optional<string> (const tenant_service&)> + function<optional<string> (const string&, const tenant_service&)> build_unloaded_pre_check (tenant_service&&, service_data&&, const diag_epilogue&) const noexcept; - function<optional<string> (const tenant_service&)> - build_unloaded_load (tenant_service&&, + function<optional<string> (const string&, const tenant_service&)> + build_unloaded_load (const string& tenant_id, + tenant_service&&, service_data&&, const diag_epilogue&) const noexcept; - virtual function<optional<string> (const tenant_service&)> - build_queued (const tenant_service&, + virtual function<optional<string> (const string&, const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, const vector<build>&, optional<build_state> initial_state, const build_queued_hints&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_building (const tenant_service&, const build&, + virtual function<optional<string> (const string&, const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, + const build&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_built (const tenant_service&, const build&, + virtual function<optional<string> (const string&, const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, + const build&, const diag_epilogue& log_writer) const noexcept override; private: @@ -83,6 +90,14 @@ namespace brep bool handle_check_suite_request (gh_check_suite_event, bool warning_success); + // Handle the check_suite event `completed` action. + // + // If warning_success is true, then map result_status::warning to SUCCESS + // and to FAILURE otherwise. + // + bool + handle_check_suite_completed (gh_check_suite_event, bool warning_success); + // Handle the check_run event `rerequested` action. // // If warning_success is true, then map result_status::warning to SUCCESS @@ -105,12 +120,16 @@ namespace brep details_url (const build&) const; optional<string> - generate_jwt (const basic_mark& trace, const basic_mark& error) const; + generate_jwt (const string& app_id, + const basic_mark& trace, + const basic_mark& error) const; - // Authenticate to GitHub as an app installation. + // Authenticate to GitHub as an app installation. Return the installation + // access token (IAT). Issue diagnostics and return nullopt if something + // goes wrong. // optional<gh_installation_access_token> - obtain_installation_access_token (uint64_t install_id, + obtain_installation_access_token (const string& install_id, string jwt, const basic_mark& error) const; diff --git a/mod/mod-ci.cxx b/mod/mod-ci.cxx index 52f4644..46fbf6a 100644 --- a/mod/mod-ci.cxx +++ b/mod/mod-ci.cxx @@ -422,8 +422,10 @@ handle (request& rq, response& rs) } #ifdef BREP_CI_TENANT_SERVICE -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_queued (const tenant_service&, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_queued (const string& /*tenant_id*/, + const tenant_service&, const vector<build>& bs, optional<build_state> initial_state, const build_queued_hints& hints, @@ -437,7 +439,8 @@ build_queued (const tenant_service&, << hints.single_package_version << ' ' << hints.single_package_config;}); - return [&bs, initial_state] (const tenant_service& ts) + return [&bs, initial_state] (const string& tenant_id, + const tenant_service& ts) { optional<string> r (ts.data); @@ -446,6 +449,7 @@ build_queued (const tenant_service&, string s ((!initial_state ? "queued " : "queued " + to_string (*initial_state) + ' ') + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + @@ -467,14 +471,18 @@ build_queued (const tenant_service&, }; } -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_building (const tenant_service&, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_building (const string& /*tenant_id*/, + const tenant_service&, const build& b, const diag_epilogue&) const noexcept { - return [&b] (const tenant_service& ts) + return [&b] (const string& tenant_id, + const tenant_service& ts) { string s ("building " + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + @@ -487,14 +495,17 @@ build_building (const tenant_service&, }; } -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_built (const tenant_service&, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_built (const string& /*tenant_id*/, + const tenant_service&, const build& b, const diag_epilogue&) const noexcept { - return [&b] (const tenant_service& ts) + return [&b] (const string& tenant_id, const tenant_service& ts) { string s ("built " + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + diff --git a/mod/mod-ci.hxx b/mod/mod-ci.hxx index e4a343c..132b5b0 100644 --- a/mod/mod-ci.hxx +++ b/mod/mod-ci.hxx @@ -71,26 +71,34 @@ namespace brep cli_options () const override {return options::ci::description ();} #ifdef BREP_CI_TENANT_SERVICE - virtual function<optional<string> (const tenant_service&)> - build_queued (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, const vector<build>&, optional<build_state> initial_state, const build_queued_hints&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_building (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_built (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept override; #ifdef BREP_CI_TENANT_SERVICE_UNLOADED - virtual function<optional<string> (const tenant_service&)> - build_unloaded (tenant_service&&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, const diag_epilogue& log_writer) const noexcept override; #endif #endif diff --git a/mod/module.cli b/mod/module.cli index 5799697..f0d5cdc 100644 --- a/mod/module.cli +++ b/mod/module.cli @@ -850,24 +850,20 @@ namespace brep // GitHub CI-specific options. // - size_t ci-github-app-id - { - "<id>", - "The GitHub App ID. Found in the app's settings on GitHub." - } - string ci-github-app-webhook-secret { "<secret>", "The GitHub App's configured webhook secret. If not set, then the - GitHub CI service is disabled." + GitHub CI service is disabled. Note: make sure to choose a strong + (random) secret." } - path ci-github-app-private-key + std::map<string, dir_path> ci-github-app-id-private-key { - "<path>", - "The private key used during GitHub API authentication. Created in - the GitHub App's settings." + "<id>=<path>", + "The private key used during GitHub API authentication for the + specified GitHub App ID. Both vales are found in the GitHub App's + settings." } uint16_t ci-github-jwt-validity-period = 600 diff --git a/mod/tenant-service.hxx b/mod/tenant-service.hxx index 8ba199a..5564a56 100644 --- a/mod/tenant-service.hxx +++ b/mod/tenant-service.hxx @@ -74,9 +74,11 @@ namespace brep // If the returned function is not NULL, it is called to update the // service data. It should return the new data or nullopt if no update is // necessary. Note: tenant_service::data passed to the callback and to the - // returned function may not be the same. Also, the returned function may - // be called multiple times (on transaction retries). Note that the passed - // log_writer is valid during the calls to the returned function. + // returned function may not be the same. Furthermore, tenant_ids may not + // be the same either, in case the tenant was replaced. Also, the returned + // function may be called multiple times (on transaction retries). Note + // that the passed log_writer is valid during the calls to the returned + // function. // // The passed initial_state indicates the logical initial state and is // either absent, `building` (interrupted), or `built` (rebuild). Note @@ -101,8 +103,10 @@ namespace brep bool single_package_config; }; - virtual function<optional<string> (const tenant_service&)> - build_queued (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, const vector<build>&, optional<build_state> initial_state, const build_queued_hints&, @@ -112,8 +116,10 @@ namespace brep class tenant_service_build_building: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_building (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept = 0; }; @@ -121,8 +127,10 @@ namespace brep class tenant_service_build_built: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_built (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept = 0; }; @@ -140,8 +148,10 @@ namespace brep class tenant_service_build_unloaded: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_unloaded (tenant_service&&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, const diag_epilogue& log_writer) const noexcept = 0; }; |