diff options
Diffstat (limited to 'mod')
-rw-r--r-- | mod/ci-common.cxx | 75 | ||||
-rw-r--r-- | mod/ci-common.hxx | 24 | ||||
-rw-r--r-- | mod/database-module.cxx | 20 | ||||
-rw-r--r-- | mod/database-module.hxx | 14 | ||||
-rw-r--r-- | mod/mod-build-force.cxx | 5 | ||||
-rw-r--r-- | mod/mod-build-result.cxx | 9 | ||||
-rw-r--r-- | mod/mod-build-task.cxx | 26 | ||||
-rw-r--r-- | mod/mod-ci-github-gh.cxx | 182 | ||||
-rw-r--r-- | mod/mod-ci-github-gh.hxx | 137 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 250 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.hxx | 45 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.cxx | 90 | ||||
-rw-r--r-- | mod/mod-ci-github-service-data.hxx | 26 | ||||
-rw-r--r-- | mod/mod-ci-github.cxx | 1436 | ||||
-rw-r--r-- | mod/mod-ci-github.hxx | 49 | ||||
-rw-r--r-- | mod/mod-ci.cxx | 29 | ||||
-rw-r--r-- | mod/mod-ci.hxx | 24 | ||||
-rw-r--r-- | mod/module.cli | 18 | ||||
-rw-r--r-- | mod/tenant-service.hxx | 32 |
19 files changed, 1759 insertions, 732 deletions
diff --git a/mod/ci-common.cxx b/mod/ci-common.cxx index d750b1b..e720914 100644 --- a/mod/ci-common.cxx +++ b/mod/ci-common.cxx @@ -553,7 +553,11 @@ namespace brep assert (!transaction::has_current ()); build_tenant t; + + // Set the reference count to 1 for the `created` result. + // duplicate_tenant_result r (duplicate_tenant_result::created); + service.ref_count = 1; for (string request_id;;) { @@ -584,14 +588,31 @@ namespace brep : duplicate_tenant_mode::ignore); } + // Shouldn't be here otherwise. + // + assert (t->service); + // Bail out in the ignore mode and cancel the tenant in the // replace mode. // if (mode == duplicate_tenant_mode::ignore) + { + // Increment the reference count for the `ignored` result. + // + ++(t->service->ref_count); + + db.update (t); + tr.commit (); + return make_pair (move (t->id), duplicate_tenant_result::ignored); + } assert (mode == duplicate_tenant_mode::replace); + // Preserve the current reference count for the `replaced` result. + // + service.ref_count = t->service->ref_count; + if (t->unloaded_timestamp) { db.erase (t); @@ -678,6 +699,7 @@ namespace brep // request_id = move (t.id); service = move (*t.service); + service.ref_count = 1; r = duplicate_tenant_result::created; } } @@ -788,7 +810,8 @@ namespace brep odb::core::database& db, size_t retry, const string& type, - const string& id) const + const string& id, + bool ref_count) const { using namespace odb::core; @@ -810,25 +833,44 @@ namespace brep if (t == nullptr) return nullopt; - r = move (t->service); + // Shouldn't be here otherwise. + // + assert (t->service && t->service->ref_count != 0); - if (t->unloaded_timestamp) + bool cancel (!ref_count || --(t->service->ref_count) == 0); + + if (cancel) { - db.erase (t); + // Move out the service state before it is dropped from the tenant. + // + r = move (t->service); + + if (t->unloaded_timestamp) + { + db.erase (t); + } + else + { + t->service = nullopt; + t->archived = true; + db.update (t); + } + + if (trace != nullptr) + *trace << "CI request " << t->id << " for service " << id << ' ' + << type << " is canceled"; } else { - t->service = nullopt; - t->archived = true; - db.update (t); + db.update (t); // Update the service reference count. + + // Move out the service state after the tenant is updated. + // + r = move (t->service); } tr.commit (); - if (trace != nullptr) - *trace << "CI request " << t->id << " for service " << id << ' ' - << type << " is canceled"; - // Bail out if we have successfully updated or erased the tenant // object. // @@ -913,7 +955,8 @@ namespace brep rebuild (odb::core::database& db, size_t retry, const build_id& id, - function<optional<string> (const tenant_service&, + function<optional<string> (const string& tenant_id, + const tenant_service&, build_state)> uf) const { using namespace odb::core; @@ -960,7 +1003,7 @@ namespace brep tenant_service& ts (*t->service); - if (optional<string> data = uf (ts, s)) + if (optional<string> data = uf (t->id, ts, s)) { ts.data = move (*data); db.update (t); @@ -988,7 +1031,7 @@ namespace brep return s; } - optional<pair<tenant_service, bool>> ci_start:: + optional<ci_start::tenant_data> ci_start:: find (odb::core::database& db, const string& type, const string& id) const @@ -1007,9 +1050,9 @@ namespace brep tr.commit (); - if (t == nullptr) + if (t == nullptr || !t->service) return nullopt; - return pair<tenant_service, bool> (move (t->service), t->archived); + return tenant_data {move (t->id), move (*t->service), t->archived}; } } diff --git a/mod/ci-common.hxx b/mod/ci-common.hxx index 36d5f0e..a38ac54 100644 --- a/mod/ci-common.hxx +++ b/mod/ci-common.hxx @@ -103,6 +103,10 @@ namespace brep // Finally note that only duplicate_tenant_mode::fail can be used if the // service id is empty. // + // The tenant reference count is set to 1 if the result is `created`, + // incremented if the result is `ignored`, and preserved if the result is + // `replaced`. + // // Repeat the attempts on the recoverable database failures (deadlocks, // etc) and throw runtime_error if no more retries left. // @@ -150,6 +154,11 @@ namespace brep // dropped. Note that the latter allow using unloaded tenants as a // relatively cheap asynchronous execution mechanism. // + // If ref_count is true, then decrement the tenant reference count and + // only cancel the CI request if it becomes 0. In this mode the caller can + // determine if the request was actually canceled by checking if the + // reference count in the returned service state is 0. + // // Repeat the attempts on the recoverable database failures (deadlocks, // etc) and throw runtime_error if no more retries left. // @@ -162,7 +171,8 @@ namespace brep odb::core::database&, size_t retry, const string& type, - const string& id) const; + const string& id, + bool ref_count = false) const; // Cancel previously created or started CI request. Return false if there // is no tenant for the specified tenant id. Note that the reason argument @@ -232,7 +242,8 @@ namespace brep rebuild (odb::core::database&, size_t retry, const build_id&, - function<optional<string> (const tenant_service&, + function<optional<string> (const string& tenant_id, + const tenant_service&, build_state)> = nullptr) const; // Find the tenant given the tenant service type and id and return the @@ -241,7 +252,14 @@ namespace brep // // Note: should be called out of the database transaction. // - optional<pair<tenant_service, bool /*archived*/>> + struct tenant_data + { + string tenant_id; + tenant_service service; + bool archived; + }; + + optional<tenant_data> find (odb::core::database&, const string& type, const string& id) const; diff --git a/mod/database-module.cxx b/mod/database-module.cxx index bce8c93..629e393 100644 --- a/mod/database-module.cxx +++ b/mod/database-module.cxx @@ -79,8 +79,10 @@ namespace brep optional<string> database_module:: update_tenant_service_state ( const connection_ptr& conn, - const string& tid, - const function<optional<string> (const tenant_service&)>& f) + const string& type, + const string& id, + const function<optional<string> (const string& tenant_id, + const tenant_service&)>& f) { assert (f != nullptr); // Shouldn't be called otherwise. @@ -96,13 +98,21 @@ namespace brep { transaction tr (conn->begin ()); - shared_ptr<build_tenant> t (build_db_->find<build_tenant> (tid)); + using query = query<build_tenant>; - if (t != nullptr && t->service) + shared_ptr<build_tenant> t ( + build_db_->query_one<build_tenant> (query::service.id == id && + query::service.type == type)); + + if (t != nullptr) { + // Shouldn't be here otherwise. + // + assert (t->service); + tenant_service& s (*t->service); - if (optional<string> data = f (s)) + if (optional<string> data = f (t->id, s)) { s.data = move (*data); build_db_->update (t); diff --git a/mod/database-module.hxx b/mod/database-module.hxx index 298afbf..76f13d4 100644 --- a/mod/database-module.hxx +++ b/mod/database-module.hxx @@ -61,16 +61,18 @@ namespace brep // and nullopt otherwise. // // Specifically, start the database transaction, query the service state, - // and call the callback-returned function on this state. If this call - // returns the data string (rather than nullopt), then update the service - // state with this data and persist the change. Repeat all the above steps - // on the recoverable database failures (deadlocks, etc). + // and, if present, call the callback-returned function on this state. If + // this call returns the data string (rather than nullopt), then update + // the service state with this data and persist the change. Repeat all the + // above steps on the recoverable database failures (deadlocks, etc). // optional<string> update_tenant_service_state ( const odb::core::connection_ptr&, - const string& tid, - const function<optional<string> (const tenant_service&)>&); + const string& type, + const string& id, + const function<optional<string> (const string& tenant_id, + const tenant_service&)>&); protected: size_t retry_ = 0; // Max of all retries. diff --git a/mod/mod-build-force.cxx b/mod/mod-build-force.cxx index 8666889..d37674f 100644 --- a/mod/mod-build-force.cxx +++ b/mod/mod-build-force.cxx @@ -314,14 +314,15 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, build_state::building, qhs, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, qbs.back ().tenant, f); + update_tenant_service_state (conn, ss.type, ss.id, f); } } diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx index bc44bd2..cc058b5 100644 --- a/mod/mod-build-result.cxx +++ b/mod/mod-build-result.cxx @@ -545,14 +545,15 @@ handle (request& rq, response&) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, build_state::building, qhs, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, qbs.back ().tenant, f); + update_tenant_service_state (conn, ss.type, ss.id, f); } } @@ -572,10 +573,10 @@ handle (request& rq, response&) // conn.reset (); - if (auto f = tsb->build_built (ss, b, log_writer_)) + if (auto f = tsb->build_built (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, b.tenant, f); + update_tenant_service_state (conn, ss.type, ss.id, f); } } diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index 2ae1237..c8b1bb2 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -499,10 +499,14 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsu->build_unloaded (move (*t->service), log_writer_)) + tenant_service& ts (*t->service); + string type (ts.type); + string id (ts.id); + + if (auto f = tsu->build_unloaded (t->id, move (ts), log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, t->id, f); + update_tenant_service_state (conn, type, id, f); } } } @@ -2350,7 +2354,8 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, nullopt /* initial_state */, qhs, @@ -2359,7 +2364,7 @@ handle (request& rq, response& rs) conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, qbs.back ().tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2382,7 +2387,8 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, initial_state, qhs, @@ -2391,7 +2397,7 @@ handle (request& rq, response& rs) conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, qbs.back ().tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2418,12 +2424,12 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsb->build_building (ss, b, log_writer_)) + if (auto f = tsb->build_building (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, b.tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2546,12 +2552,12 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsb->build_built (ss, b, log_writer_)) + if (auto f = tsb->build_built (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, b.tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } diff --git a/mod/mod-ci-github-gh.cxx b/mod/mod-ci-github-gh.cxx index 6372ef0..021ff6b 100644 --- a/mod/mod-ci-github-gh.cxx +++ b/mod/mod-ci-github-gh.cxx @@ -7,6 +7,15 @@ namespace brep { + [[noreturn]] static void + throw_json (const json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + // Return the GitHub check run status corresponding to a build_state. // string @@ -102,10 +111,7 @@ namespace brep [[noreturn]] static void missing_member (const json::parser& p, const char* o, const char* m) { - throw json::invalid_json_input ( - p.input_name, - p.line (), p.column (), p.position (), - o + string (" object is missing member '") + m + '\''); + throw_json (p, o + string (" object is missing member '") + m + '\''); } using event = json::event; @@ -154,6 +160,93 @@ namespace brep return os; } + // gh_check_suite_ex + // + gh_check_suite_ex:: + gh_check_suite_ex (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), hb (false), hs (false), cc (false), co (false), + ap (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (hb, "head_branch")) + { + string* v (p.next_expect_string_null ()); + if (v != nullptr) + head_branch = *v; + } + else if (c (hs, "head_sha")) head_sha = p.next_expect_string (); + else if (c (cc, "latest_check_runs_count")) + check_runs_count = p.next_expect_number <size_t> (); + else if (c (co, "conclusion")) + { + string* v (p.next_expect_string_null ()); + if (v != nullptr) + conclusion = *v; + } + else if (c (ap, "app")) + { + p.next_expect (event::begin_object); + + bool ai (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + if (c (ai, "id")) + { + // Note: unlike the check_run webhook's app.id, the check_suite + // one can be null. It's unclear under what circumstances, but it + // shouldn't happen unless something is broken. + // + string* v (p.next_expect_number_null ()); + + if (v == nullptr) + throw_json (p, "check_suite.app.id is null"); + + app_id = *v; + } + else p.next_expect_value_skip (); + } + + if (!ai) missing_member (p, "gh_check_suite_ex.app", "id"); + } + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_check_suite_ex", "node_id"); + if (!hb) missing_member (p, "gh_check_suite_ex", "head_branch"); + if (!hs) missing_member (p, "gh_check_suite_ex", "head_sha"); + if (!cc) missing_member (p, "gh_check_suite_ex", "latest_check_runs_count"); + if (!co) missing_member (p, "gh_check_suite_ex", "conclusion"); + if (!ap) missing_member (p, "gh_check_suite_ex", "app"); + } + + ostream& + operator<< (ostream& os, const gh_check_suite_ex& cs) + { + os << "node_id: " << cs.node_id + << ", head_branch: " << (cs.head_branch ? *cs.head_branch : "null") + << ", head_sha: " << cs.head_sha + << ", latest_check_runs_count: " << cs.check_runs_count + << ", conclusion: " << (cs.conclusion ? *cs.conclusion : "null") + << ", app_id: " << cs.app_id; + + return os; + } + // gh_check_run // gh_check_run:: @@ -190,7 +283,8 @@ namespace brep { p.next_expect (event::begin_object); - bool ni (false), nm (false), st (false), du (false), cs (false); + bool ni (false), nm (false), st (false), du (false), cs (false), + ap (false); // Skip unknown/uninteresting members. // @@ -206,14 +300,31 @@ namespace brep else if (c (st, "status")) status = p.next_expect_string (); else if (c (du, "details_url")) details_url = p.next_expect_string (); else if (c (cs, "check_suite")) check_suite = gh_check_suite (p); + else if (c (ap, "app")) + { + p.next_expect (event::begin_object); + + bool ai (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + if (c (ai, "id")) app_id = p.next_expect_number (); + else p.next_expect_value_skip (); + } + + if (!ai) missing_member (p, "gh_check_run_ex.app", "id"); + } else p.next_expect_value_skip (); } - if (!ni) missing_member (p, "gh_check_run", "node_id"); - if (!nm) missing_member (p, "gh_check_run", "name"); - if (!st) missing_member (p, "gh_check_run", "status"); - if (!du) missing_member (p, "gh_check_run", "details_url"); - if (!cs) missing_member (p, "gh_check_run", "check_suite"); + if (!ni) missing_member (p, "gh_check_run_ex", "node_id"); + if (!nm) missing_member (p, "gh_check_run_ex", "name"); + if (!st) missing_member (p, "gh_check_run_ex", "status"); + if (!du) missing_member (p, "gh_check_run_ex", "details_url"); + if (!cs) missing_member (p, "gh_check_run_ex", "check_suite"); + if (!ap) missing_member (p, "gh_check_run_ex", "app"); } @@ -232,7 +343,8 @@ namespace brep { os << static_cast<const gh_check_run&> (cr) << ", details_url: " << cr.details_url - << ", check_suite: { " << cr.check_suite << " }"; + << ", check_suite: { " << cr.check_suite << " }" + << ", app_id: " << cr.app_id; return os; } @@ -338,7 +450,8 @@ namespace brep << "path: " << pr.head_path << ", ref: " << pr.head_ref << ", sha: " << pr.head_sha - << " }"; + << " }" + << ", app_id: " << pr.app_id; return os; } @@ -350,7 +463,7 @@ namespace brep { p.next_expect (event::begin_object); - bool ni (false), nm (false), fn (false), cu (false); + bool ni (false), fn (false), cu (false); // Skip unknown/uninteresting members. // @@ -362,14 +475,12 @@ namespace brep }; if (c (ni, "node_id")) node_id = p.next_expect_string (); - else if (c (nm, "name")) name = p.next_expect_string (); else if (c (fn, "full_name")) path = p.next_expect_string (); else if (c (cu, "clone_url")) clone_url = p.next_expect_string (); else p.next_expect_value_skip (); } if (!ni) missing_member (p, "gh_repository", "node_id"); - if (!nm) missing_member (p, "gh_repository", "name"); if (!fn) missing_member (p, "gh_repository", "full_name"); if (!cu) missing_member (p, "gh_repository", "clone_url"); } @@ -378,7 +489,6 @@ namespace brep operator<< (ostream& os, const gh_repository& rep) { os << "node_id: " << rep.node_id - << ", name: " << rep.name << ", path: " << rep.path << ", clone_url: " << rep.clone_url; @@ -403,7 +513,7 @@ namespace brep return p.name () == s ? (v = true) : false; }; - if (c (i, "id")) id = p.next_expect_number<uint64_t> (); + if (c (i, "id")) id = p.next_expect_number (); else p.next_expect_value_skip (); } @@ -437,7 +547,7 @@ namespace brep }; if (c (ac, "action")) action = p.next_expect_string (); - else if (c (cs, "check_suite")) check_suite = gh_check_suite (p); + else if (c (cs, "check_suite")) check_suite = gh_check_suite_ex (p); else if (c (rp, "repository")) repository = gh_repository (p); else if (c (in, "installation")) installation = gh_installation (p); else p.next_expect_value_skip (); @@ -478,10 +588,6 @@ namespace brep return p.name () == s ? (v = true) : false; }; - // Pass true to gh_check_run() to indicate that the we're parsing a - // webhook event or REST API response (in which case more fields are - // expected to be present than in a GraphQL response). - // if (c (ac, "action")) action = p.next_expect_string (); else if (c (cs, "check_run")) check_run = gh_check_run_ex (p); else if (c (rp, "repository")) repository = gh_repository (p); @@ -513,7 +619,7 @@ namespace brep { p.next_expect (event::begin_object); - bool ac (false), pr (false), rp (false), in (false); + bool ac (false), pr (false), bf (false), rp (false), in (false); // Skip unknown/uninteresting members. // @@ -526,6 +632,7 @@ namespace brep if (c (ac, "action")) action = p.next_expect_string (); else if (c (pr, "pull_request")) pull_request = gh_pull_request (p); + else if (c (bf, "before")) before = p.next_expect_string (); else if (c (rp, "repository")) repository = gh_repository (p); else if (c (in, "installation")) installation = gh_installation (p); else p.next_expect_value_skip (); @@ -542,6 +649,7 @@ namespace brep { os << "action: " << pr.action; os << ", pull_request { " << pr.pull_request << " }"; + os << ", before: " << (pr.before ? *pr.before : "null"); os << ", repository { " << pr.repository << " }"; os << ", installation { " << pr.installation << " }"; @@ -575,7 +683,29 @@ namespace brep }; if (c (tk, "token")) token = p.next_expect_string (); - else if (c (ea, "expires_at")) expires_at = gh_from_iso8601 (p.next_expect_string ()); + else if (c (ea, "expires_at")) + { + string v (p.next_expect_string ()); + + try + { + expires_at = gh_from_iso8601 (v); + } + catch (const invalid_argument& e) + { + throw_json (p, + "invalid IAT expires_at value '" + v + + "': " + e.what ()); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw_json (p, + "unable to convert IAT expires_at value '" + v + + "': " + e.what ()); + } + } else p.next_expect_value_skip (); } @@ -610,6 +740,8 @@ namespace brep timestamp gh_from_iso8601 (const string& s) { - return butl::from_string (s.c_str (), "%Y-%m-%dT%TZ", false /* local */); + return butl::from_string (s.c_str (), + "%Y-%m-%dT%TZ", + false /* local */); } } diff --git a/mod/mod-ci-github-gh.hxx b/mod/mod-ci-github-gh.hxx index b29904b..ab6dbaa 100644 --- a/mod/mod-ci-github-gh.hxx +++ b/mod/mod-ci-github-gh.hxx @@ -21,16 +21,15 @@ namespace butl namespace brep { - // @@@ Check if any data members are unused (once the dust settles). - using build_queued_hints = tenant_service_build_queued::build_queued_hints; // GitHub request/response types (all start with gh_). // // Note that the GitHub REST and GraphQL APIs use different id types and - // values. In the REST API they are usually integers (but sometimes - // strings!) whereas in GraphQL they are always strings (note: - // base64-encoded and opaque, not just the REST id value as a string). + // values. In the REST API they are usually integers (but check the API + // reference for the object in question) whereas in GraphQL they are always + // strings (note: base64-encoded and opaque, not just the REST id value as a + // string). // // In both APIs the id field is called `id`, but REST responses and webhook // events also contain the corresponding GraphQL object's id in the @@ -45,7 +44,7 @@ namespace brep // namespace json = butl::json; - // The "check_suite" object within a check_suite webhook event request. + // The check_suite member of a check_run webhook event (gh_check_run_event). // struct gh_check_suite { @@ -59,6 +58,26 @@ namespace brep gh_check_suite () = default; }; + // The check_suite member of a check_suite webhook event + // (gh_check_suite_event). + // + struct gh_check_suite_ex: gh_check_suite + { + size_t check_runs_count; + optional<string> conclusion; + + string app_id; + + explicit + gh_check_suite_ex (json::parser&); + + gh_check_suite_ex () = default; + }; + + // The check_run object returned in response to GraphQL requests + // (e.g. create or update check run). Note that we specifiy the set of + // members to return in the GraphQL request. + // struct gh_check_run { string node_id; @@ -71,66 +90,58 @@ namespace brep gh_check_run () = default; }; + // The check_run member of a check_run webhook event (gh_check_run_event). + // struct gh_check_run_ex: gh_check_run { string details_url; gh_check_suite check_suite; + string app_id; + explicit gh_check_run_ex (json::parser&); gh_check_run_ex () = default; }; + // The pull_request member of a pull_request webhook event + // (gh_pull_request_event). + // struct gh_pull_request { string node_id; unsigned int number; - // @@ TMP The unused base/head members may be useful for trace output when - // we receive the pull_request webhook. - string base_path; // Repository path (<org>/<repo>) under github.com. - string base_ref; // @@ TODO Remove if remains unused. - string base_sha; // @@ TODO Remove if remains unused. + string base_ref; + string base_sha; string head_path; - string head_ref; // @@ TODO Remove if remains unused. + string head_ref; string head_sha; + // Note: not received from GitHub but set from the app-id webhook query + // parameter instead. + // + // For some reason, unlike the check_suite and check_run webhooks, the + // pull_request webhook does not contain the app id. For the sake of + // simplicity we emulate check_suite and check_run by storing the app-id + // webhook query parameter here. + // + string app_id; + explicit gh_pull_request (json::parser&); gh_pull_request () = default; }; - // Return the GitHub check run status corresponding to a build_state. - // - string - gh_to_status (build_state st); - - // Return the build_state corresponding to a GitHub check run status - // string. Throw invalid_argument if the passed status was invalid. + // The repository member of various webhook events. // - build_state - gh_from_status (const string&); - - // If warning_success is true, then map result_status::warning to SUCCESS - // and to FAILURE otherwise. - // - string - gh_to_conclusion (result_status, bool warning_success); - - // Create a check_run name from a build. If the second argument is not - // NULL, return an abbreviated id if possible. - // - string - gh_check_run_name (const build&, const build_queued_hints* = nullptr); - struct gh_repository { string node_id; - string name; string path; // Repository path (<org>/<repo>) under github.com. string clone_url; @@ -140,9 +151,11 @@ namespace brep gh_repository () = default; }; + // The installation member of various webhook events. + // struct gh_installation { - uint64_t id; // Note: used for installation access token (REST API). + string id; // Note: used for installation access token (REST API). explicit gh_installation (json::parser&); @@ -150,12 +163,12 @@ namespace brep gh_installation () = default; }; - // The check_suite webhook event request. + // The check_suite webhook event. // struct gh_check_suite_event { string action; - gh_check_suite check_suite; + gh_check_suite_ex check_suite; gh_repository repository; gh_installation installation; @@ -165,6 +178,8 @@ namespace brep gh_check_suite_event () = default; }; + // The check_run webhook event. + // struct gh_check_run_event { string action; @@ -178,11 +193,19 @@ namespace brep gh_check_run_event () = default; }; + // The pull_request webhook event. + // struct gh_pull_request_event { string action; gh_pull_request pull_request; + + // The SHA of the previous commit on the head branch before the current + // one. Only present if action is "synchronize". + // + optional<string> before; + gh_repository repository; gh_installation installation; @@ -192,6 +215,9 @@ namespace brep gh_pull_request_event () = default; }; + // Installation access token (IAT) returned when we authenticate as a GitHub + // app installation. + // struct gh_installation_access_token { string token; @@ -205,9 +231,41 @@ namespace brep gh_installation_access_token () = default; }; + // Return the GitHub check run status corresponding to a build_state. + // + string + gh_to_status (build_state); + + // Return the build_state corresponding to a GitHub check run status + // string. Throw invalid_argument if the passed status was invalid. + // + build_state + gh_from_status (const string&); + + // If warning_success is true, then map result_status::warning to `SUCCESS` + // and to `FAILURE` otherwise. + // + // Throw invalid_argument in case of unsupported result_status value + // (currently skip, interrupt). + // + string + gh_to_conclusion (result_status, bool warning_success); + + // Create a check_run name from a build. If the second argument is not + // NULL, return an abbreviated id if possible. + // + string + gh_check_run_name (const build&, const build_queued_hints* = nullptr); + + // Throw system_error if the conversion fails due to underlying operating + // system errors. + // string gh_to_iso8601 (timestamp); + // Throw invalid_argument if the conversion fails due to the invalid + // argument and system_error if due to underlying operating system errors. + // timestamp gh_from_iso8601 (const string&); @@ -215,6 +273,9 @@ namespace brep operator<< (ostream&, const gh_check_suite&); ostream& + operator<< (ostream&, const gh_check_suite_ex&); + + ostream& operator<< (ostream&, const gh_check_run&); ostream& diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx index 4d1c583..774eeed 100644 --- a/mod/mod-ci-github-gq.cxx +++ b/mod/mod-ci-github-gq.cxx @@ -17,9 +17,11 @@ namespace brep // bottom). // static const string& gq_name (const string&); + static string gq_name (string&&); static string gq_str (const string&); static string gq_bool (bool); static const string& gq_enum (const string&); + static string gq_enum (string&&); [[noreturn]] static void throw_json (json::parser& p, const string& m) @@ -163,6 +165,8 @@ namespace brep // Parse a response to a check_run GraphQL mutation such as `createCheckRun` // or `updateCheckRun`. // + // Throw invalid_json_input. + // // Example response (only the part we need to parse here): // // { @@ -219,16 +223,17 @@ namespace brep return r; } - // Send a GraphQL mutation request `rq` that operates on one or more check - // runs. Update the check runs in `crs` with the new state and the node ID - // if unset. Return false and issue diagnostics if the request failed. + // Send a GraphQL mutation request `rq` that creates or updates one or more + // check runs. The requested build state is taken from each check_run + // object. Update the check runs in `crs` with the new data (state, node ID + // if unset, and state_synced). Return false and issue diagnostics if the + // request failed. // static bool gq_mutate_check_runs (const basic_mark& error, vector<check_run>& crs, const string& iat, - string rq, - build_state st) noexcept + string rq) { vector<gh_check_run> rcrs; @@ -264,16 +269,12 @@ namespace brep // const gh_check_run& rcr (rcrs[i]); // Received check run. + build_state st (crs[i].state); // Requested state. build_state rst (gh_from_status (rcr.status)); // Received state. // Note that GitHub won't allow us to change a built check run to // any other state (but all other transitions are allowed). // - // @@ Are we handling the case where the resulting state (built) - // differs from what we expect? - // - // @@@ Does built-to-built transition updates status? - // if (rst != st && rst != build_state::built) { error << "unexpected check_run status: received '" << rcr.status @@ -297,10 +298,10 @@ namespace brep error << "unexpected number of check_run objects in response"; } else - error << "failed to update check run: error HTTP response status " + error << "failed to mutate check runs: error HTTP response status " << sc; } - catch (const json::invalid_json_input& e) + catch (const json::invalid_json_input& e) // struct resp (via github_post()) { // Note: e.name is the GitHub API endpoint. // @@ -308,16 +309,16 @@ namespace brep << e.line << ", column: " << e.column << ", byte offset: " << e.position << ", error: " << e; } - catch (const invalid_argument& e) + catch (const invalid_argument& e) // github_post() { error << "malformed header(s) in response: " << e; } - catch (const system_error& e) + catch (const system_error& e) // github_post() { error << "unable to mutate check runs (errno=" << e.code () << "): " << e.what (); } - catch (const runtime_error& e) // From gq_parse_response_check_runs(). + catch (const runtime_error& e) // gq_parse_response_check_runs() { // GitHub response contained error(s) (could be ours or theirs at this // point). @@ -350,25 +351,23 @@ namespace brep // Serialize `createCheckRun` mutations for one or more builds to GraphQL. // - // The conclusion argument (`co`) is required if the build_state is built - // because GitHub does not allow a check run status of completed without a - // conclusion. + // The check run parameters (names, build states, details_urls, etc.) are + // taken from each object in `crs`. + // + // Note that build results are not supported because we never create + // multiple check runs in the built state. // // The details URL argument (`du`) can be empty for queued but not for the // other states. // + // Throw invalid_argument if any of the observed check run members are not + // valid GraphQL values (string, enum, etc). + // static string gq_mutation_create_check_runs (const string& ri, // Repository ID const string& hs, // Head SHA - const optional<string>& du, // Details URL. - const vector<check_run>& crs, - const string& st, // Check run status. - optional<gq_built_result> br = nullopt) + const vector<check_run>& crs) { - // Ensure details URL is non-empty if present. - // - assert (!du || !du->empty ()); - ostringstream os; os << "mutation {" << '\n'; @@ -377,26 +376,25 @@ namespace brep // for (size_t i (0); i != crs.size (); ++i) { + const check_run& cr (crs[i]); + + assert (cr.state != build_state::built); // Not supported. + + // Ensure details URL is non-empty if present. + // + assert (!cr.details_url || !cr.details_url->empty ()); + string al ("cr" + to_string (i)); // Field alias. os << gq_name (al) << ":createCheckRun(input: {" << '\n' - << " name: " << gq_str (crs[i].name) << '\n' + << " name: " << gq_str (cr.name) << '\n' << " repositoryId: " << gq_str (ri) << '\n' << " headSha: " << gq_str (hs) << '\n' - << " status: " << gq_enum (st); - if (du) - { - os << '\n'; - os << " detailsUrl: " << gq_str (*du); - } - if (br) + << " status: " << gq_enum (gh_to_status (cr.state)); + if (cr.details_url) { os << '\n'; - os << " conclusion: " << gq_enum (br->conclusion) << '\n' - << " output: {" << '\n' - << " title: " << gq_str (br->title) << '\n' - << " summary: " << gq_str (br->summary) << '\n' - << " }"; + os << " detailsUrl: " << gq_str (*cr.details_url); } os << "})" << '\n' // Specify the selection set (fields to be returned). Note that we @@ -417,12 +415,83 @@ namespace brep return os.str (); } + // Serialize a `createCheckRun` mutation for a build to GraphQL. + // + // The build result argument (`br`) is required if the build_state is built + // because GitHub does not allow a check run status of completed without a + // conclusion. + // + // The details URL argument (`du`) can be empty for queued but not for the + // other states. + // + // Throw invalid_argument if any of the arguments or observed check run + // members are not valid GraphQL values (string, enum, etc). + // + static string + gq_mutation_create_check_run (const string& ri, // Repository ID + const string& hs, // Head SHA + const optional<string>& du, // Details URL. + const check_run& cr, + const string& st, // Check run status. + optional<gq_built_result> br = nullopt) + { + // Ensure details URL is non-empty if present. + // + assert (!du || !du->empty ()); + + ostringstream os; + + os << "mutation {" << '\n'; + + // Serialize a `createCheckRun` for the build. + // + os << gq_name ("cr0") << ":createCheckRun(input: {" << '\n' + << " name: " << gq_str (cr.name) << '\n' + << " repositoryId: " << gq_str (ri) << '\n' + << " headSha: " << gq_str (hs) << '\n' + << " status: " << gq_enum (st); + if (du) + { + os << '\n'; + os << " detailsUrl: " << gq_str (*du); + } + if (br) + { + os << '\n'; + os << " conclusion: " << gq_enum (br->conclusion) << '\n' + << " output: {" << '\n' + << " title: " << gq_str (br->title) << '\n' + << " summary: " << gq_str (br->summary) << '\n' + << " }"; + } + os << "})" << '\n' + // Specify the selection set (fields to be returned). Note that we + // rename `id` to `node_id` (using a field alias) for consistency with + // webhook events and REST API responses. + // + << "{" << '\n' + << " checkRun {" << '\n' + << " node_id: id" << '\n' + << " name" << '\n' + << " status" << '\n' + << " }" << '\n' + << "}" << '\n'; + + os << "}" << '\n'; + + return os.str (); + } + + // Serialize an `updateCheckRun` mutation for one build to GraphQL. // // The `co` (conclusion) argument is required if the build_state is built // because GitHub does not allow updating a check run to completed without a // conclusion. // + // Throw invalid_argument if any of the arguments are invalid values (of + // GraphQL types or otherwise). + // static string gq_mutation_update_check_run (const string& ri, // Repository ID. const string& ni, // Node ID. @@ -444,8 +513,19 @@ namespace brep << " status: " << gq_enum (st); if (sa) { - os << '\n'; - os << " startedAt: " << gq_str (gh_to_iso8601 (*sa)); + try + { + os << '\n'; + os << " startedAt: " << gq_str (gh_to_iso8601 (*sa)); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw invalid_argument ("unable to convert started_at value " + + to_string (system_clock::to_time_t (*sa)) + + ": " + e.what ()); + } } if (du) { @@ -483,23 +563,19 @@ namespace brep vector<check_run>& crs, const string& iat, const string& rid, - const string& hs, - build_state st) + const string& hs) { // No support for result_status so state cannot be built. // - assert (st != build_state::built); +#ifndef NDEBUG + for (const check_run& cr: crs) + assert (cr.state != build_state::built); +#endif - // Empty details URL because it's not available until building. - // string rq ( - gq_serialize_request (gq_mutation_create_check_runs (rid, - hs, - nullopt, - crs, - gh_to_status (st)))); + gq_serialize_request (gq_mutation_create_check_runs (rid, hs, crs))); - return gq_mutate_check_runs (error, crs, iat, move (rq), st); + return gq_mutate_check_runs (error, crs, iat, move (rq)); } bool @@ -516,18 +592,19 @@ namespace brep // assert (st != build_state::built || br); - vector<check_run> crs {move (cr)}; - string rq ( gq_serialize_request ( - gq_mutation_create_check_runs (rid, - hs, - du, - crs, - gh_to_status (st), - move (br)))); + gq_mutation_create_check_run (rid, + hs, + du, + cr, + gh_to_status (st), + move (br)))); - bool r (gq_mutate_check_runs (error, crs, iat, move (rq), st)); + vector<check_run> crs {move (cr)}; + crs[0].state = st; + + bool r (gq_mutate_check_runs (error, crs, iat, move (rq))); cr = move (crs[0]); @@ -565,33 +642,19 @@ namespace brep move (br)))); vector<check_run> crs {move (cr)}; + crs[0].state = st; - bool r (gq_mutate_check_runs (error, crs, iat, move (rq), st)); + bool r (gq_mutate_check_runs (error, crs, iat, move (rq))); cr = move (crs[0]); return r; } - bool - gq_update_or_create_check_run (const basic_mark& error, - check_run& cr, - const string& iat, - const string& rid, - const optional<string>& nid, - const string& hs, - const optional<string>& du, - build_state st, - optional<gq_built_result> br) - { - if (nid) - return gq_update_check_run (error, cr, iat, rid, *nid, du, st, br); - else - return gq_create_check_run (error, cr, iat, rid, hs, du, st, br); - } - // Serialize a GraphQL query that fetches a pull request from GitHub. // + // Throw invalid_argument if the node id is not a valid GraphQL string. + // static string gq_query_pr_mergeability (const string& nid) { @@ -616,6 +679,8 @@ namespace brep const string& iat, const string& nid) { + // Let invalid_argument from gq_query_pr_mergeability() propagate. + // string rq (gq_serialize_request (gq_query_pr_mergeability (nid))); try @@ -718,7 +783,7 @@ namespace brep error << "failed to fetch pull request: error HTTP response status " << sc; } - catch (const json::invalid_json_input& e) + catch (const json::invalid_json_input& e) // struct resp (via github_post()) { // Note: e.name is the GitHub API endpoint. // @@ -726,16 +791,16 @@ namespace brep << e.line << ", column: " << e.column << ", byte offset: " << e.position << ", error: " << e; } - catch (const invalid_argument& e) + catch (const invalid_argument& e) // github_post() { error << "malformed header(s) in response: " << e; } - catch (const system_error& e) + catch (const system_error& e) // github_post() { error << "unable to fetch pull request (errno=" << e.code () << "): " << e.what (); } - catch (const runtime_error& e) // From response type's parsing constructor. + catch (const runtime_error& e) // struct resp { // GitHub response contained error(s) (could be ours or theirs at this // point). @@ -762,8 +827,6 @@ namespace brep // // Return the name or throw invalid_argument if it is invalid. // - // @@ TODO: dangerous API. - // static const string& gq_name (const string& v) { @@ -782,6 +845,13 @@ namespace brep return v; } + static string + gq_name (string&& v) + { + gq_name (v); + return move (v); + } + // Serialize a string to GraphQL. // // Return the serialized string or throw invalid_argument if the string is @@ -836,8 +906,6 @@ namespace brep // // Return the enum value or throw invalid_argument if it is invalid. // - // @@ TODO: dangerous API. - // static const string& gq_enum (const string& v) { @@ -846,4 +914,12 @@ namespace brep return gq_name (v); } + + static string + gq_enum (string&& v) + { + gq_enum (v); + return move (v); + } + } diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx index 0353281..50950d4 100644 --- a/mod/mod-ci-github-gq.hxx +++ b/mod/mod-ci-github-gq.hxx @@ -19,12 +19,13 @@ namespace brep // GraphQL functions (all start with gq_). // - // Create a new check run on GitHub for each build. Update `check_runs` with - // the new data (node id, state, and state_synced). Return false and issue - // diagnostics if the request failed. + // Create a new check run on GitHub for each build with the build state, + // name, and details_url taken from each check_run object. Update + // `check_runs` with the new data (node id and state_synced). Return false + // and issue diagnostics if the request failed. // - // Note: no details_url yet since there will be no entry in the build result - // search page until the task starts building. + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. // // Note that creating a check_run named `foo` will effectively replace any // existing check_runs with that name. They will still exist on the GitHub @@ -36,13 +37,15 @@ namespace brep vector<check_run>& check_runs, const string& installation_access_token, const string& repository_id, - const string& head_sha, - build_state); + const string& head_sha); // Create a new check run on GitHub for a build. Update `cr` with the new // data (node id, state, and state_synced). Return false and issue // diagnostics if the request failed. // + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. + // // If the details_url is absent GitHub will use the app's homepage. // // The gq_built_result is required if the build_state is built because @@ -66,11 +69,11 @@ namespace brep build_state, optional<gq_built_result> = nullopt); - // Update a check run on GitHub. + // Update a check run on GitHub. Update `cr` with the new data (state and + // state_synced). Return false and issue diagnostics if the request failed. // - // Send a GraphQL request that updates an existing check run. Update `cr` - // with the new data (state and state_synced). Return false and issue - // diagnostics if the request failed. + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. // // Note that GitHub allows any state transitions except from built (but // built to built is allowed). The latter case is signalled by setting the @@ -92,24 +95,6 @@ namespace brep build_state, optional<gq_built_result> = nullopt); - // Update a check run on GitHub if node_id is present, otherwise create a - // new check run associated with head_sha. In the latter case, the new - // node_id is set in the passed check_run object. - // - // This is a wrapper of gq_update_check_run() and gq_create_check_run() for - // convenience. - // - bool - gq_update_or_create_check_run (const basic_mark& error, - check_run& cr, - const string& installation_access_token, - const string& repository_id, - const optional<string>& node_id, - const string& head_sha, - const optional<string>& details_url, - build_state, - optional<gq_built_result> = nullopt); - // Fetch pre-check information for a pull request from GitHub. This // information is used to decide whether or not to CI the PR and is // comprised of the PR's head commit SHA, whether its head branch is behind @@ -123,6 +108,8 @@ namespace brep // Issue diagnostics and return absent if the request failed (which means it // will be treated by the caller as still being generated). // + // Throw invalid_argument if the node id is invalid. + // // Note that the first request causes GitHub to start preparing the test // merge commit. // diff --git a/mod/mod-ci-github-service-data.cxx b/mod/mod-ci-github-service-data.cxx index 18f6eeb..9f66a6c 100644 --- a/mod/mod-ci-github-service-data.cxx +++ b/mod/mod-ci-github-service-data.cxx @@ -10,6 +10,15 @@ namespace brep { using event = json::event; + [[noreturn]] static void + throw_json (json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + service_data:: service_data (const string& json) { @@ -32,11 +41,7 @@ namespace brep if (v == "local") kind = local; else if (v == "remote") kind = remote; else - { - throw json::invalid_json_input ( - p.input_name, p.line (), p.column (), p.position (), - "invalid service data kind: '" + v + '\''); - } + throw_json (p, "invalid service data kind: '" + v + '\''); } pre_check = p.next_expect_member_boolean<bool> ("pre_check"); @@ -44,16 +49,13 @@ namespace brep warning_success = p.next_expect_member_boolean<bool> ("warning_success"); - // Installation access token. + // Installation access token (IAT). // - p.next_expect_member_object ("installation_access"); - installation_access.token = p.next_expect_member_string ("token"); - installation_access.expires_at = - gh_from_iso8601 (p.next_expect_member_string ("expires_at")); - p.next_expect (event::end_object); + p.next_expect_name ("installation_access"); + installation_access = gh_installation_access_token (p); - installation_id = - p.next_expect_member_number<uint64_t> ("installation_id"); + app_id = p.next_expect_member_string ("app_id"); + installation_id = p.next_expect_member_string ("installation_id"); repository_node_id = p.next_expect_member_string ("repository_node_id"); repository_clone_url = p.next_expect_member_string ("repository_clone_url"); @@ -82,7 +84,16 @@ namespace brep nid = *v; } - build_state s (to_build_state (p.next_expect_member_string ("state"))); + build_state s; + try + { + s = to_build_state (p.next_expect_member_string ("state")); + } + catch (const invalid_argument& e) + { + throw_json (p, e.what ()); + } + bool ss (p.next_expect_member_boolean<bool> ("state_synced")); optional<result_status> rs; @@ -90,7 +101,14 @@ namespace brep string* v (p.next_expect_member_string_null ("status")); if (v != nullptr) { - rs = bbot::to_result_status (*v); + try + { + rs = bbot::to_result_status (*v); + } + catch (const invalid_argument& e) + { + throw_json (p, e.what ()); + } assert (s == build_state::built); } } @@ -100,6 +118,8 @@ namespace brep p.next_expect (event::end_object); } + completed = p.next_expect_member_boolean<bool> ("completed"); + { string* s (p.next_expect_member_string_null ("conclusion_node_id")); if (s != nullptr) @@ -115,7 +135,8 @@ namespace brep service_data (bool ws, string iat_tok, timestamp iat_ea, - uint64_t iid, + string aid, + string iid, string rid, string rcu, kind_type k, @@ -126,7 +147,8 @@ namespace brep : kind (k), pre_check (pc), re_request (rr), warning_success (ws), installation_access (move (iat_tok), iat_ea), - installation_id (iid), + app_id (move (aid)), + installation_id (move (iid)), repository_node_id (move (rid)), repository_clone_url (move (rcu)), check_sha (move (cs)), @@ -141,7 +163,8 @@ namespace brep service_data (bool ws, string iat_tok, timestamp iat_ea, - uint64_t iid, + string aid, + string iid, string rid, string rcu, kind_type k, @@ -154,7 +177,8 @@ namespace brep : kind (k), pre_check (pc), re_request (rr), warning_success (ws), installation_access (move (iat_tok), iat_ea), - installation_id (iid), + app_id (move (aid)), + installation_id (move (iid)), repository_node_id (move (rid)), repository_clone_url (move (rcu)), pr_node_id (move (pid)), @@ -187,13 +211,33 @@ namespace brep s.member ("warning_success", warning_success); - // Installation access token. + // Installation access token (IAT). // s.member_begin_object ("installation_access"); s.member ("token", installation_access.token); - s.member ("expires_at", gh_to_iso8601 (installation_access.expires_at)); + + // IAT expires_at timestamp. + // + { + string v; + try + { + v = gh_to_iso8601 (installation_access.expires_at); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw invalid_argument ("unable to convert IAT expires_at value " + + to_string (system_clock::to_time_t ( + installation_access.expires_at))); + } + s.member ("expires_at", move (v)); + } + s.end_object (); + s.member ("app_id", app_id); s.member ("installation_id", installation_id); s.member ("repository_node_id", repository_node_id); s.member ("repository_clone_url", repository_clone_url); @@ -233,7 +277,7 @@ namespace brep if (cr.status) { assert (cr.state == build_state::built); - s.value (to_string (*cr.status)); + s.value (to_string (*cr.status)); // Doesn't throw. } else s.value (nullptr); @@ -242,6 +286,8 @@ namespace brep } s.end_array (); + s.member ("completed", completed); + s.member_name ("conclusion_node_id"); if (conclusion_node_id) s.value (*conclusion_node_id); diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx index bb845cb..50bb49d 100644 --- a/mod/mod-ci-github-service-data.hxx +++ b/mod/mod-ci-github-service-data.hxx @@ -11,8 +11,6 @@ namespace brep { - // @@@ Check if any data members are unused (once the dust settles). - // Service data associated with the tenant (corresponds to GH check suite). // // It is always a top-level JSON object and the first member is always the @@ -35,6 +33,11 @@ namespace brep optional<result_status> status; // Only if state is built & synced. + // Note: never serialized (only used to pass information to the GraphQL + // functions). + // + optional<string> details_url; + string state_string () const { @@ -86,7 +89,8 @@ namespace brep // gh_installation_access_token installation_access; - uint64_t installation_id; + string app_id; + string installation_id; string repository_node_id; // GitHub-internal opaque repository id. @@ -94,7 +98,7 @@ namespace brep // The following two are only used for pull requests. // - // @@ TODO/LATER: maybe put them in a struct? + // @@ TODO/LATER: maybe put them in a struct, if more members? // optional<string> pr_node_id; optional<uint32_t> pr_number; @@ -134,6 +138,9 @@ namespace brep // // Throw invalid_argument if the schema version is not supported. // + // Throw invalid_argument (invalid_json_input) in case of malformed JSON + // or any invalid values. + // explicit service_data (const string& json); @@ -145,7 +152,8 @@ namespace brep service_data (bool warning_success, string iat_token, timestamp iat_expires_at, - uint64_t installation_id, + string app_id, + string installation_id, string repository_node_id, string repository_clone_url, kind_type kind, @@ -159,7 +167,8 @@ namespace brep service_data (bool warning_success, string iat_token, timestamp iat_expires_at, - uint64_t installation_id, + string app_id, + string installation_id, string repository_node_id, string repository_clone_url, kind_type kind, @@ -174,6 +183,11 @@ namespace brep // Serialize to JSON. // + // Throw invalid_argument if any values are invalid. + // + // May also throw invalid_json_output but that would be a programming + // error. + // string json () const; }; diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx index 6dfaa5f..5bcec98 100644 --- a/mod/mod-ci-github.cxx +++ b/mod/mod-ci-github.cxx @@ -19,26 +19,6 @@ #include <stdexcept> -// @@ Remaining TODOs -// -// - Rerequested checks -// -// - check_suite (action: rerequested): received when user re-runs all -// checks. -// -// - check_run (action: rerequested): received when user re-runs a -// specific check or all failed checks. -// -// @@ TMP I have confirmed that the above is accurate. -// -// Will need to extract a few more fields from check_runs, but the layout -// is very similar to that of check_suite. -// -// - Choose strong webhook secret (when deploying). -// -// - Check that delivery UUID has not been received before (replay attack). -// - // Resources: // // Creating an App: @@ -81,6 +61,8 @@ namespace brep void ci_github:: init (scanner& s) { + HANDLER_DIAG; + { shared_ptr<tenant_service_base> ts ( dynamic_pointer_cast<tenant_service_base> (shared_from_this ())); @@ -98,6 +80,9 @@ namespace brep if (options_->build_config_specified () && options_->ci_github_app_webhook_secret_specified ()) { + if (!options_->ci_github_app_id_private_key_specified ()) + fail << "no app id/private key mappings configured"; + ci_start::init (make_shared<options::ci_start> (*options_)); database_module::init (*options_, options_->build_db_retry ()); @@ -241,33 +226,48 @@ namespace brep fail << "unable to compute request HMAC: " << e; } - // Process the `warning` webhook request query parameter. + // Process the `app-id` and `warning` webhook request query parameters. // + string app_id; bool warning_success; { const name_values& rps (rq.parameters (1024, true /* url_only */)); - auto i (find_if (rps.begin (), rps.end (), - [] (auto&& rp) {return rp.name == "warning";})); + bool ai (false), wa (false); + + auto badreq = [] (const string& m) + { + throw invalid_request (400, m); + }; - if (i == rps.end ()) - throw invalid_request (400, - "missing 'warning' webhook query parameter"); + for (const name_value& rp: rps) + { + if (rp.name == "app-id") + { + if (!rp.value) + badreq ("missing 'app-id' webhook query parameter value"); - if (!i->value) - throw invalid_request ( - 400, "missing 'warning' webhook query parameter value"); + ai = true; + app_id = *rp.value; + } + else if (rp.name == "warning") + { + if (!rp.value) + badreq ("missing 'warning' webhook query parameter value"); - const string& v (*i->value); + wa = true; + const string& v (*rp.value); - if (v == "success") warning_success = true; - else if (v == "failure") warning_success = false; - else - { - throw invalid_request ( - 400, - "invalid 'warning' webhook query parameter value: '" + v + '\''); + if (v == "success") warning_success = true; + else if (v == "failure") warning_success = false; + else + badreq ("invalid 'warning' webhook query parameter value: '" + v + + '\''); + } } + + if (!ai) badreq ("missing 'app-id' webhook query parameter"); + if (!wa) badreq ("missing 'warning' webhook query parameter"); } // There is a webhook event (specified in the x-github-event header) and @@ -280,9 +280,6 @@ namespace brep // is that we want be "notified" of new actions at which point we can // decide whether to ignore them or to handle. // - // @@ There is also check_run even (re-requested by user, either - // individual check run or all the failed check runs). - // if (event == "check_suite") { gh_check_suite_event cs; @@ -302,6 +299,12 @@ namespace brep throw invalid_request (400, move (m)); } + if (cs.check_suite.app_id != app_id) + { + fail << "webhook check_suite app.id " << cs.check_suite.app_id + << " does not match app-id query parameter " << app_id; + } + if (cs.action == "requested") { return handle_check_suite_request (move (cs), warning_success); @@ -316,13 +319,10 @@ namespace brep else if (cs.action == "completed") { // GitHub thinks that "all the check runs in this check suite have - // completed and a conclusion is available". Looks like this one we - // ignore? - // - // What if our bookkeeping says otherwise? But then we can't even - // access the service data easily here. @@ TODO: maybe/later. + // completed and a conclusion is available". Check with our own + // bookkeeping and log an error if there is a mismatch. // - return true; + return handle_check_suite_completed (move (cs), warning_success); } else { @@ -353,6 +353,12 @@ namespace brep throw invalid_request (400, move (m)); } + if (cr.check_run.app_id != app_id) + { + fail << "webhook check_run app.id " << cr.check_run.app_id + << " does not match app-id query parameter " << app_id; + } + if (cr.action == "rerequested") { // Someone manually requested to re-run a specific check run. @@ -398,7 +404,16 @@ namespace brep throw invalid_request (400, move (m)); } - if (pr.action == "opened" || pr.action == "synchronize") + // Store the app-id webhook query parameter in the gh_pull_request + // object (see gh_pull_request for an explanation). + // + // When we receive the other webhooks we do check that the app ids in + // the payload and query match but here we have to assume it is valid. + // + pr.pull_request.app_id = app_id; + + if (pr.action == "opened" || + pr.action == "synchronize") { // opened // A pull request was opened. @@ -406,23 +421,78 @@ namespace brep // synchronize // A pull request's head branch was updated from the base branch or // new commits were pushed to the head branch. (Note that there is - // no equivalent event for the base branch. That case gets handled - // in handle_check_suite_request() instead. @@ Not anymore.) + // no equivalent event for the base branch.) // - // Note that both cases are handled the same: we start a new CI + // Note that both cases are handled similarly: we start a new CI // request which will be reported on the new commit id. // return handle_pull_request (move (pr), warning_success); } - else + else if (pr.action == "edited") { - // Ignore the remaining actions by sending a 200 response with empty - // body. + // PR base branch changed (to a different branch) besides other + // irrelevant changes (title, body, etc). // - // @@ Ignore known but log unknown, as in check_suite above? + // This is in a sense a special case of the base branch moving. In + // that case we don't do anything (due to the head sharing problem) + // relying instead on the branch protection rule. So it makes sense + // to do the same here. // return true; } + else if (pr.action == "closed") + { + // PR has been closed (as merged or not; see merged member). Also + // apparently received if base branch is deleted (and the same + // for head branch). See also the reopened event below. + // + // While it may seem natural to cancel the CI for the closed PR, it + // might actually be useful to have a completed CI record. GitHub + // doesn't prevent us from publishing CI results for the closed PR + // (even if both base and head branches were deleted). And if such a + // PR is reopened, the CI results remain. + // + return true; + } + else if (pr.action == "reopened") + { + // Previously closed PR has been reopened. + // + // Since we don't cancel the CI for a closed PR, there is nothing + // to do if it is reopened. + // + return true; + } + else if (pr.action == "assigned" || + pr.action == "auto_merge_disabled" || + pr.action == "auto_merge_enabled" || + pr.action == "converted_to_draft" || + pr.action == "demilestoned" || + pr.action == "dequeued" || + pr.action == "enqueued" || + pr.action == "labeled" || + pr.action == "locked" || + pr.action == "milestoned" || + pr.action == "ready_for_review" || + pr.action == "review_request_removed" || + pr.action == "review_requested" || + pr.action == "unassigned" || + pr.action == "unlabeled" || + pr.action == "unlocked") + { + // These have no relation to CI. + // + return true; + } + else + { + // Ignore unknown actions by sending a 200 response with empty body + // but also log as an error since we want to notice new actions. + // + error << "unknown action '" << pr.action << "' in pull_request event"; + + return true; + } } else { @@ -489,7 +559,7 @@ namespace brep // let's obtain it to flush out any permission issues early. Also, it is // valid for an hour so we will most likely make use of it. // - optional<string> jwt (generate_jwt (trace, error)); + optional<string> jwt (generate_jwt (cs.check_suite.app_id, trace, error)); if (!jwt) throw server_error (); @@ -502,20 +572,18 @@ namespace brep l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); - // @@ What happens if we call this functions with an already existing - // node_id (e.g., replay attack). See the UUID header above. - // - // While it would have been nice to cancel CIs of PRs with this branch as - // base not to waste resources, there are complications: Firsty, we can - // only do this for remote PRs (since local PRs may share the result with - // branch push). Secondly, we try to do our best even if the branch - // protection rule for head behind is not enabled. In this case, it would - // be good to complete the CI. So maybe/later. + // base not to waste resources, there are complications: Firstly, we can + // only do this for remote PRs (since local PRs will most likely share the + // result with branch push). Secondly, we try to do our best even if the + // branch protection rule for head behind is not enabled. In this case, it + // would be good to complete the CI. So maybe/later. See also the head + // case in handle_pull_request(), where we do cancel remote PRs that are + // not shared. // Service id that uniquely identifies the CI tenant. // - string sid (cs.repository.node_id + ":" + cs.check_suite.head_sha); + string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha); // If the user requests a rebuild of the (entire) PR, then this manifests // as the check_suite rather than pull_request event. Specifically: @@ -541,11 +609,13 @@ namespace brep { kind = service_data::remote; - if (optional<tenant_service> ts = find (*build_db_, "ci-github", sid)) + if (optional<tenant_data> d = find (*build_db_, "ci-github", sid)) { + tenant_service& ts (d->service); + try { - service_data sd (*ts->data); + service_data sd (*ts.data); check_sha = move (sd.check_sha); // Test merge commit. } catch (const invalid_argument& e) @@ -573,6 +643,7 @@ namespace brep service_data sd (warning_success, iat->token, iat->expires_at, + cs.check_suite.app_id, cs.installation.id, move (cs.repository.node_id), move (cs.repository.clone_url), @@ -630,6 +701,136 @@ namespace brep return true; } + bool ci_github:: + handle_check_suite_completed (gh_check_suite_event cs, bool warning_success) + { + // The plans is as follows: + // + // 1. Load the service data. + // + // 2. Verify it is completed. + // + // 3. Verify the check run counts match. + // + // 4. Verify (like in build_built()) that all the check runs are + // completed. + // + // 5. Verify the result matches what GitHub thinks it is. + + HANDLER_DIAG; + + l3 ([&]{trace << "check_suite event { " << cs << " }";}); + + // Service id that uniquely identifies the CI tenant. + // + string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha); + + // The common log entry subject. + // + string sub ("check suite " + cs.check_suite.node_id + '/' + sid); + + // Load the service data. + // + service_data sd; + + if (optional<tenant_data> d = find (*build_db_, "ci-github", sid)) + { + try + { + sd = service_data (*d->service.data); + } + catch (const invalid_argument& e) + { + fail << "failed to parse service data: " << e; + } + } + else + { + error << sub << ": tenant_service does not exist"; + return true; + } + + // Verify the completed flag and the number of check runs. + // + if (!sd.completed) + { + error << sub << " service data complete flag is false"; + return true; + } + + // Received count will be one higher because we don't store the conclusion + // check run. + // + size_t check_runs_count (sd.check_runs.size () + 1); + + if (check_runs_count == 1) + { + error << sub << ": no check runs in service data"; + return true; + } + + if (cs.check_suite.check_runs_count != check_runs_count) + { + error << sub << ": check runs count " << cs.check_suite.check_runs_count + << " does not match service data count " << check_runs_count; + return true; + } + + // Verify that all the check runs are built and compute the summary + // conclusion. + // + result_status conclusion (result_status::success); + + for (const check_run& cr: sd.check_runs) + { + if (cr.state == build_state::built) + { + assert (cr.status.has_value ()); + conclusion |= *cr.status; + } + else + { + error << sub << ": unbuilt check run in service data"; + return true; + } + } + + // Verify the conclusion. + // + if (!cs.check_suite.conclusion) + { + error << sub << ": absent conclusion in completed check suite"; + return true; + } + + // Note that the case mismatch is due to GraphQL (gh_conclusion()) + // requiring uppercase conclusion values while the received webhook values + // are lower case. + // + string gh_conclusion (gh_to_conclusion (conclusion, warning_success)); + + if (icasecmp (*cs.check_suite.conclusion, gh_conclusion) != 0) + { + error << sub << ": conclusion " << *cs.check_suite.conclusion + << " does not match service data conclusion " << gh_conclusion; + return true; + } + + return true; + } + + // Create a gq_built_result. + // + // Throw invalid_argument in case of invalid result_status. + // + static gq_built_result + make_built_result (result_status rs, bool warning_success, string message) + { + return {gh_to_conclusion (rs, warning_success), + circle (rs) + ' ' + ucase (to_string (rs)), + move (message)}; + } + // Parse a check run details URL into a build_id. // // Return nullopt if the URL is invalid. @@ -637,6 +838,12 @@ namespace brep static optional<build_id> parse_details_url (const string& details_url); + // Note that GitHub always posts a message to their GUI saying "You have + // successfully requested <check_run_name> be rerun", regardless of what + // HTTP status code we respond with. However we do return error status codes + // when there is no better option (like failing the conclusion) in case they + // start handling them someday. + // bool ci_github:: handle_check_run_rerequest (const gh_check_run_event& cr, bool warning_success) @@ -645,27 +852,47 @@ namespace brep l3 ([&]{trace << "check_run event { " << cr << " }";}); - // Fail if this is the conclusion check run. + // The overall plan is as follows: // - if (cr.check_run.name == conclusion_check_run_name) - { - // @@ Fail conclusion check run with appropriate message and reurn - // true. - - l3 ([&]{trace << "ignoring conclusion check_run";}); - - // 422 Unprocessable Content: The request was well-formed (i.e., - // syntactically correct) but could not be processed. - // - throw invalid_request (422, "Conclusion check run cannot be rebuilt"); - } + // 1. Load service data. + // + // 2. If the tenant is archived, then fail (re-create) both the check run + // and the conclusion with appropriate diagnostics. + // + // 3. If the check run is in the queued state, then do nothing. + // + // 4. Re-create the check run in the queued state and the conclusion in + // the building state. Note: do in a single request to make sure we + // either "win" or "loose" the potential race for both (important + // for #7). + // + // 5. Call the rebuild() function to attempt to schedule a rebuild. Pass + // the update function that does the following (if called): + // + // a. Save new node ids. + // + // b. Update the check run state (may also not exist). + // + // c. Clear the completed flag if true. + // + // 6. If the result of rebuild() indicates the tenant is archived, then + // fail (update) both the check run and conclusion with appropriate + // diagnostics. + // + // 7. If original state is queued (no rebuild was scheduled), then fail + // (update) both the check run and the conclusion. + // + // Note that while conceptually we are updating existing check runs, in + // practice we have to re-create as new check runs in order to replace the + // existing ones because GitHub does not allow transitioning out of the + // built state. // Get a new installation access token. // auto get_iat = [this, &trace, &error, &cr] () -> optional<gh_installation_access_token> { - optional<string> jwt (generate_jwt (trace, error)); + optional<string> jwt (generate_jwt (cr.check_run.app_id, trace, error)); if (!jwt) return nullopt; @@ -680,77 +907,156 @@ namespace brep return iat; }; - // Create a new conclusion check run, replacing the existing one. + const string& repo_node_id (cr.repository.node_id); + const string& head_sha (cr.check_run.check_suite.head_sha); + + // Prepare the build and conclusion check runs. They are sent to GitHub in + // a single request (unless something goes wrong) so store them together + // from the outset. // - // Return the check run on success or nullopt on failure. + vector<check_run> check_runs (2); + check_run& bcr (check_runs[0]); // Build check run + check_run& ccr (check_runs[1]); // Conclusion check run + + ccr.name = conclusion_check_run_name; + + // Load the service data, failing the check runs if the tenant has been + // archived. // - auto create_conclusion_cr = - [&cr, &error, warning_success] (const gh_installation_access_token& iat, - build_state bs, - optional<result_status> rs = nullopt, - optional<string> msg = nullopt) - -> optional<check_run> + service_data sd; + string tenant_id; { - optional<gq_built_result> br; - if (rs) + // Service id that uniquely identifies the CI tenant. + // + string sid (repo_node_id + ':' + head_sha); + + if (optional<tenant_data> d = find (*build_db_, "ci-github", sid)) { - assert (msg); + if (d->archived) // Tenant is archived + { + // Fail (re-create) the check runs. + // + optional<gh_installation_access_token> iat (get_iat ()); + if (!iat) + throw server_error (); + + gq_built_result br ( + make_built_result ( + result_status::error, warning_success, + "Unable to rebuild individual configuration: build has " + "been archived")); + + // Try to update the conclusion check run even if the first update + // fails. + // + bool f (false); // Failed. - br = gq_built_result (gh_to_conclusion (*rs, warning_success), - circle (*rs) + ' ' + ucase (to_string (*rs)), - move (*msg)); + if (gq_create_check_run (error, bcr, iat->token, + repo_node_id, head_sha, + cr.check_run.details_url, + build_state::built, br)) + { + l3 ([&]{trace << "created check_run { " << bcr << " }";}); + } + else + { + error << "check_run " << cr.check_run.node_id + << ": unable to re-create check run"; + f = true; + } + + if (gq_create_check_run (error, ccr, iat->token, + repo_node_id, head_sha, + nullopt /* details_url */, + build_state::built, move (br))) + { + l3 ([&]{trace << "created conclusion check_run { " << ccr << " }";}); + } + else + { + error << "check_run " << cr.check_run.node_id + << ": unable to re-create conclusion check run"; + f = true; + } + + // Fail the handler if either of the check runs could not be + // updated. + // + if (f) + throw server_error (); + + return true; + } + + tenant_service& ts (d->service); + + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + fail << "failed to parse service data: " << e; + } + + tenant_id = d->tenant_id; + } + else + { + // No such tenant. + // + fail << "check run " << cr.check_run.node_id + << " re-requested but tenant_service with id " << sid + << " does not exist"; } + } - check_run r; - r.name = conclusion_check_run_name; + // Get a new IAT if the one from the service data has expired. + // + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; - if (gq_create_check_run (error, r, iat.token, - rni, hs, + if (system_clock::now () > sd.installation_access.expires_at) + { + if ((new_iat = get_iat ())) + iat = &*new_iat; + else + throw server_error (); + } + else + iat = &sd.installation_access; + + // Fail if it's the conclusion check run that is being re-requested. + // + if (cr.check_run.name == conclusion_check_run_name) + { + l3 ([&]{trace << "re-requested conclusion check_run";}); + + if (!sd.conclusion_node_id) + fail << "no conclusion node id for check run " << cr.check_run.node_id; + + gq_built_result br ( + make_built_result (result_status::error, warning_success, + "Conclusion check run cannot be rebuilt")); + + // Fail (update) the conclusion check run. + // + if (gq_update_check_run (error, ccr, iat->token, + repo_node_id, *sd.conclusion_node_id, nullopt /* details_url */, - bs, move (br))) + build_state::built, move (br))) { - return r; + l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";}); } else - return nullopt; - }; + { + fail << "check run " << cr.check_run.node_id + << ": unable to update conclusion check run " + << *sd.conclusion_node_id; + } - // The overall plan is as follows: - // - // 1. Call the rebuild() function to attempt to schedule a rebuild. Pass - // the update function that does the following (if called): - // - // a. Update the check run being rebuilt (may also not exist). - // - // b. Clear the completed flag if true. - // - // c. "Return" the service data to be used after the call. - // - // 2. If the result of rebuild() indicates the tenant is archived, fail - // the conclusion check run with appropriate diagnostics. - // - // 3. If original state is queued, then no rebuild was scheduled and we do - // nothing. - // - // 4. Otherwise (the original state is building or built): - // - // a. Change the check run state to queued. - // - // b. Change the conclusion check run to building (do unconditionally - // to mitigate races). - // - // Note that while conceptually we are updating existing check runs, in - // practice we have to create new check runs to replace the existing ones - // because GitHub does not allow transitioning out of the built state. - // - // This results in a new node id for each check run but we can't save them - // to the service data after the rebuild() call. As a workaround, when - // updating the service data we 1) clear the re-requested check run's node - // id and set the state_synced flag to true to signal to build_building() - // and build_built() that it needs to create a new check run; and 2) clear - // the conclusion check run's node id to cause build_built() to create a - // new conclusion check run. And these two check runs' node ids will be - // saved to the service data. + return true; + } // Parse the check_run's details_url to extract build id. // @@ -766,28 +1072,101 @@ namespace brep << ": failed to extract build id from details_url"; } - // The IAT retrieved from the service data. + // Initialize the check run (`bcr`) with state from the service data. // - optional<gh_installation_access_token> iat; + { + // Search for the check run in the service data. + // + // Note that we look by name in case node id got replaced by a racing + // re-request (in which case we ignore this request). + // + auto i (find_if (sd.check_runs.begin (), sd.check_runs.end (), + [&cr] (const check_run& scr) + { + return scr.name == cr.check_run.name; + })); - // True if the check run exists in the service data. + if (i == sd.check_runs.end ()) + fail << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "re-requested but does not exist in service data"; + + // Do nothing if node ids don't match. + // + if (i->node_id && *i->node_id != cr.check_run.node_id) + { + l3 ([&]{trace << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "node id has changed in service data";}); + return true; + } + + // Do nothing if the build is already queued. + // + if (i->state == build_state::queued) + { + l3 ([&]{trace << "ignoring already-queued check run";}); + return true; + } + + bcr.name = i->name; + bcr.build_id = i->build_id; + bcr.state = i->state; + } + + // Transition the build and conclusion check runs out of the built state + // (or any other state) by re-creating them. // - bool cr_found (false); + bcr.state = build_state::queued; + bcr.state_synced = false; + bcr.details_url = cr.check_run.details_url; + + ccr.state = build_state::building; + ccr.state_synced = false; + + if (gq_create_check_runs (error, check_runs, iat->token, + repo_node_id, head_sha)) + { + assert (bcr.state == build_state::queued); + assert (ccr.state == build_state::building); + + l3 ([&]{trace << "created check_run { " << bcr << " }";}); + l3 ([&]{trace << "created conclusion check_run { " << ccr << " }";}); + } + else + { + fail << "check run " << cr.check_run.node_id + << ": unable to re-create build and conclusion check runs"; + } - // Update the state of the check run in the service data. Return (via - // captured references) the IAT and whether the check run was found. + // Request the rebuild and update service data. // - // Called by rebuild(), but only if the build is actually restarted. + bool race (false); + + // Callback function called by rebuild() to update the service data (but + // only if the build is actually restarted). // - auto update_sd = [&iat, - &cr_found, - &error, - &cr] (const tenant_service& ts, build_state) - -> optional<string> + auto update_sd = [&error, &new_iat, &race, + tenant_id = move (tenant_id), + &cr, &bcr, &ccr] (const string& ti, + const tenant_service& ts, + build_state) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + race = false; // Reset. + + if (tenant_id != ti) + { + // The tenant got replaced since we loaded it but we managed to + // trigger a rebuild in the new tenant. Who knows whose check runs are + // visible, so let's fail ours similar to the cases below. + // + race = true; + return nullopt; + } + service_data sd; try { @@ -796,147 +1175,137 @@ namespace brep catch (const invalid_argument& e) { error << "failed to parse service data: " << e; - return nullptr; + return nullopt; } - if (!iat) - iat = sd.installation_access; - - // If the re-requested check run is found, update it in the service - // data. + // Note that we again look by name in case node id got replaced by a + // racing re-request. In this case, however, it's impossible to decide + // who won that race, so let's fail the check suite to be on the safe + // side (in a sense, similar to the rebuild() returning queued below). // - const string& nid (cr.check_run.node_id); + auto i (find_if ( + sd.check_runs.begin (), sd.check_runs.end (), + [&cr] (const check_run& scr) + { + return scr.name == cr.check_run.name; + })); - for (check_run& cr: sd.check_runs) + if (i == sd.check_runs.end ()) { - if (cr.node_id && *cr.node_id == nid) - { - cr_found = true; - cr.state = build_state::queued; - sd.completed = false; - - // Clear the check run node ids and set state_synced to true to - // cause build_building() and/or build_built() to create new check - // runs (see the plan above for details). - // - cr.node_id = nullopt; - cr.state_synced = true; - sd.conclusion_node_id = nullopt; + error << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "re-requested but does not exist in service data"; + return nullopt; + } - return sd.json (); - } + if (i->node_id && *i->node_id != cr.check_run.node_id) + { + // Keep the old conclusion node id to make sure any further state + // transitions are ignored. A bit of a hack. + // + race = true; + return nullopt; } - return nullopt; + *i = bcr; // Update with new node_id, state, state_synced. + + sd.conclusion_node_id = ccr.node_id; + sd.completed = false; + + // Save the IAT if we created a new one. + // + if (new_iat) + sd.installation_access = *new_iat; + + return sd.json (); }; optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd)); - if (!bs) + // If the build has been archived or re-enqueued since we loaded the + // service data, fail (by updating) both the build check run and the + // conclusion check run. Otherwise the build has been successfully + // re-enqueued so do nothing further. + // + if (!race && bs && *bs != build_state::queued) + return true; + + gq_built_result br; // Built result for both check runs. + + if (race || bs) // Race or re-enqueued. { - // Build has expired (most probably the tenant has been archived). + // The re-enqueued case: this build has been re-enqueued since we first + // loaded the service data. This could happen if the user clicked + // "re-run" multiple times and another handler won the rebuild() race. // - // Update the conclusion check run to notify the user (but have to - // replace it with a new one because we don't know the existing one's - // node id). + // However the winner of the check runs race cannot be determined. // - optional<gh_installation_access_token> iat (get_iat ()); - if (!iat) - throw server_error (); - - if (optional<check_run> ccr = create_conclusion_cr ( - *iat, - build_state::built, - result_status::error, - "Unable to rebuild: tenant has been archived or no such build")) - { - l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";}); - } - else - { - // Log the error and return failure to GitHub which will presumably - // indicate this in its GUI. - // - fail << "check run " << cr.check_run.node_id - << ": unable to create conclusion check run"; - } - } - else if (*bs == build_state::queued) - { - // The build was already queued so nothing to be done. This might happen - // if the user clicked "re-run" multiple times before we managed to - // update the check run. - } - else - { - // The build has been requeued. + // Best case the other handler won the check runs race as well and + // thus everything will proceed normally. Our check runs will be + // invisible and disregarded. // - assert (*bs == build_state::building || *bs == build_state::built); - - if (!cr_found) - { - // Respond with an error otherwise GitHub will post a message in its - // GUI saying "you have successfully requested a rebuild of ..." - // - fail << "check_run " << cr.check_run.node_id - << ": build restarted but check run does not exist " - << "in service data"; - } - - // Get a new IAT if the one from the service data has expired. + // Worst case we won the check runs race and the other handler's check + // runs -- the ones that will be updated by the build_*() notifications + // -- are no longer visible, leaving things quite broken. // - assert (iat.has_value ()); - - if (system_clock::now () > iat->expires_at) - { - iat = get_iat (); - if (!iat) - throw server_error (); - } - - // Update (by replacing) the re-requested and conclusion check runs to - // queued and building, respectively. + // Either way, we fail our check runs. In the best case scenario it + // will have no effect; in the worst case scenario it lets the user + // know something has gone wrong. // - // If either fails we can only log the error but build_building() and/or - // build_built() should correct the situation (see above for details). + br = make_built_result (result_status::error, warning_success, + "Unable to rebuild, try again"); + } + else // Archived. + { + // The build has expired since we loaded the service data. Most likely + // the tenant has been archived. // + br = make_built_result ( + result_status::error, warning_success, + "Unable to rebuild individual configuration: build has been archived"); + } - // Update re-requested check run. - // - { - check_run ncr; // New check run. - ncr.name = cr.check_run.name; + // Try to update the conclusion check run even if the first update fails. + // + bool f (false); // Failed. - if (gq_create_check_run (error, - ncr, - iat->token, - cr.repository.node_id, - cr.check_run.check_suite.head_sha, - cr.check_run.details_url, - build_state::queued)) - { - l3 ([&]{trace << "created check_run { " << ncr << " }";}); - } - else - { - error << "check_run " << cr.check_run.node_id - << ": unable to create (to update) check run in queued state"; - } - } + // Fail the build check run. + // + if (gq_update_check_run (error, bcr, iat->token, + repo_node_id, *bcr.node_id, + nullopt /* details_url */, + build_state::built, br)) + { + l3 ([&]{trace << "updated check_run { " << bcr << " }";}); + } + else + { + error << "check run " << cr.check_run.node_id + << ": unable to update (replacement) check run " + << *bcr.node_id; + f = true; + } - // Update conclusion check run. - // - if (optional<check_run> ccr = - create_conclusion_cr (*iat, build_state::building)) - { - l3 ([&]{trace << "created conclusion check_run { " << *ccr << " }";}); - } - else - { - error << "check_run " << cr.check_run.node_id - << ": unable to create (to update) conclusion check run"; - } + // Fail the conclusion check run. + // + if (gq_update_check_run (error, ccr, iat->token, + repo_node_id, *ccr.node_id, + nullopt /* details_url */, + build_state::built, move (br))) + { + l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";}); } + else + { + error << "check run " << cr.check_run.node_id + << ": unable to update conclusion check run " << *ccr.node_id; + f = true; + } + + // Fail the handler if either of the check runs could not be updated. + // + if (f) + throw server_error (); return true; } @@ -959,30 +1328,6 @@ namespace brep // gets updated with the head commit's SHA and check_suite.pull_requests[] // will contain all PRs with this branch as head. // - // Remaining TODOs - // - // - @@ TODO? PR base branch changed (to a different branch) - // - // => pull_request(edited) - // - // - PR closed @@ TODO - // - // Also received if base branch is deleted. (And presumably same for head - // branch.) - // - // => pull_request(closed) - // - // Cancel CI? - // - // - PR merged @@ TODO - // - // => pull_request(merged) - // - // => check_suite(PR_base) - // - // Probably wouldn't want to CI the base again because the PR CI would've - // done the equivalent already. - // bool ci_github:: handle_pull_request (gh_pull_request_event pr, bool warning_success) { @@ -994,7 +1339,7 @@ namespace brep // let's obtain it to flush out any permission issues early. Also, it is // valid for an hour so we will most likely make use of it. // - optional<string> jwt (generate_jwt (trace, error)); + optional<string> jwt (generate_jwt (pr.pull_request.app_id, trace, error)); if (!jwt) throw server_error (); @@ -1007,12 +1352,6 @@ namespace brep l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); - // Note that similar to the branch push case above, while it would have - // been nice to cancel the previous CI job once the PR head moves (the - // "synchronize" event), due to the head sharing problem the previous CI - // job might actually still be relevant (in both local and remote PR - // cases). - // Distinguish between local and remote PRs by comparing the head and base // repositories' paths. // @@ -1021,6 +1360,48 @@ namespace brep ? service_data::local : service_data::remote); + // Note that similar to the branch push case above, while it would have + // been nice to cancel the previous CI job once the PR head moves (the + // "synchronize" event), due to the head sharing problem the previous CI + // job might actually still be relevant (in both local and remote PR + // cases). So we only do it for the remote PRs and only if the head is not + // shared (via tenant reference counting). + // + if (kind == service_data::remote && pr.action == "synchronize") + { + if (pr.before) + { + // Service id that will uniquely identify the CI tenant. + // + string sid (pr.repository.node_id + ':' + *pr.before); + + if (optional<tenant_service> ts = cancel (error, warn, + verb_ ? &trace : nullptr, + *build_db_, retry_, + "ci-github", sid, + true /* ref_count */)) + { + l3 ([&]{trace << "pull request " << pr.pull_request.node_id + << ": attempted to cancel CI of previous head commit" + << " (ref_count: " << ts->ref_count << ')';}); + } + else + { + // It's possible that there was no CI for the previous commit for + // various reasons (e.g., CI was not enabled). + // + l3 ([&]{trace << "pull request " << pr.pull_request.node_id + << ": failed to cancel CI of previous head commit " + << "with tenant_service id " << sid;}); + } + } + else + { + error << "pull request " << pr.pull_request.node_id + << ": before commit is missing in synchronize event"; + } + } + // Note: for remote PRs the check_sha will be set later, in // build_unloaded_pre_check(), to test merge commit id. // @@ -1039,6 +1420,7 @@ namespace brep service_data sd (warning_success, move (iat->token), iat->expires_at, + pr.pull_request.app_id, pr.installation.id, move (pr.repository.node_id), move (pr.repository.clone_url), @@ -1082,10 +1464,13 @@ namespace brep return true; } - function<optional<string> (const tenant_service&)> ci_github:: - build_unloaded (tenant_service&& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded (const string& ti, + tenant_service&& ts, const diag_epilogue& log_writer) const noexcept { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -1100,15 +1485,24 @@ namespace brep } return sd.pre_check - ? build_unloaded_pre_check (move (ts), move (sd), log_writer) - : build_unloaded_load (move (ts), move (sd), log_writer); + ? build_unloaded_pre_check (move (ts), move (sd), log_writer) + : build_unloaded_load (ti, move (ts), move (sd), log_writer); } - function<optional<string> (const tenant_service&)> ci_github:: + function<optional<string> (const string&, const tenant_service&)> ci_github:: build_unloaded_pre_check (tenant_service&& ts, service_data&& sd, const diag_epilogue& log_writer) const noexcept + try { + // NOTE: this function is noexcept and should not throw. + // + // In a few places where invalid_argument is unlikely to be thrown and/or + // would indicate that things are seriously broken we let it propagate to + // the function catch block where the pre-check tenant will be canceled + // (otherwise we could end up in an infinite loop, e.g., because the + // problematic arguments won't change). + NOTIFICATION_DIAG (log_writer); // We get here for PRs only (but both local and remote). The overall @@ -1134,6 +1528,8 @@ namespace brep // Request PR pre-check info (triggering the generation of the test merge // commit on the GitHub's side). // + // Let unlikely invalid_argument propagate (see above). + // optional<gq_pr_pre_check_info> pc ( gq_fetch_pull_request_pre_check_info (error, sd.installation_access.token, @@ -1177,7 +1573,7 @@ namespace brep // Service id that will uniquely identify the CI tenant. // - string sid (sd.repository_node_id + ":" + sd.report_sha); + string sid (sd.repository_node_id + ':' + sd.report_sha); // Create an unloaded CI tenant, doing nothing if one already exists // (which could've been created by a head branch push or another PR @@ -1193,38 +1589,50 @@ namespace brep // notifications until (1) we load the tenant, (2) we cancel it, or (3) // it gets archived after some timeout. // - if (auto pr = create (error, warn, verb_ ? &trace : nullptr, - *build_db_, retry_, - tenant_service (sid, "ci-github", sd.json ()), - chrono::seconds (30) /* interval */, - chrono::seconds (0) /* delay */, - duplicate_tenant_mode::ignore)) - { - if (pr->second == duplicate_tenant_result::ignored) + try + { + if (auto pr = create (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + tenant_service (sid, "ci-github", sd.json ()), + chrono::seconds (30) /* interval */, + chrono::seconds (0) /* delay */, + duplicate_tenant_mode::ignore)) { - // This PR is sharing a head commit with something else. - // - // If this is a local PR then it's probably the branch push, which - // is expected, so do nothing. - // - // If this is a remote PR then it could be anything (branch push, - // local PR, or another remote PR) which in turn means the CI result - // may end up being for head, not merge commit. There is nothing we - // can do about it on our side (the user can enable the head-behind- - // base protection on their side). - // - if (sd.kind == service_data::remote) + if (pr->second == duplicate_tenant_result::ignored) { - l3 ([&]{trace << "remote pull request " << *sd.pr_node_id - << ": CI tenant already exists for " << sid;}); + // This PR is sharing a head commit with something else. + // + // If this is a local PR then it's probably the branch push, which + // is expected, so do nothing. + // + // If this is a remote PR then it could be anything (branch push, + // local PR, or another remote PR) which in turn means the CI + // result may end up being for head, not merge commit. There is + // nothing we can do about it on our side (the user can enable the + // head-behind- base protection on their side). + // + if (sd.kind == service_data::remote) + { + l3 ([&]{trace << "remote pull request " << *sd.pr_node_id + << ": CI tenant already exists for " << sid;}); + } } } + else + { + error << "pull request " << *sd.pr_node_id + << ": failed to create unloaded CI tenant " + << "with tenant_service id " << sid; + + // Fall through to cancel. + } } - else + catch (const runtime_error& e) // Database retries exhausted. { error << "pull request " << *sd.pr_node_id - << ": unable to create unloaded CI tenant " - << "with tenant_service id " << sid; + << ": failed to create unloaded CI tenant " + << "with tenant_service id " << sid + << ": " << e.what (); // Fall through to cancel. } @@ -1232,26 +1640,70 @@ namespace brep // Cancel the pre-check tenant. // - if (!cancel (error, warn, verb_ ? &trace : nullptr, - *build_db_, retry_, - ts.type, - ts.id)) + try + { + if (!cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + ts.type, + ts.id)) + { + // Should never happen (no such tenant). + // + error << "pull request " << *sd.pr_node_id + << ": failed to cancel pre-check tenant with tenant_service id " + << ts.id; + } + } + catch (const runtime_error& e) // Database retries exhausted. { - // Should never happen (no such tenant). - // error << "pull request " << *sd.pr_node_id << ": failed to cancel pre-check tenant with tenant_service id " - << ts.id; + << ts.id << ": " << e.what (); + } + + return nullptr; + } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + error << "pull request " << *sd.pr_node_id + << ": unhandled exception: " << e.what (); + + // Cancel the pre-check tenant otherwise we could end up in an infinite + // loop (see top of function). + // + try + { + if (cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + ts.type, + ts.id)) + l3 ([&]{trace << "canceled pre-check tenant " << ts.id;}); + } + catch (const runtime_error& e) // Database retries exhausted. + { + l3 ([&]{trace << "failed to cancel pre-check tenant " << ts.id << ": " + << e.what ();}); } return nullptr; } - function<optional<string> (const tenant_service&)> ci_github:: - build_unloaded_load (tenant_service&& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded_load (const string& tenant_id, + tenant_service&& ts, service_data&& sd, const diag_epilogue& log_writer) const noexcept + try { + // NOTE: this function is noexcept and should not throw. + // + // In a few places where invalid_argument is unlikely to be thrown and/or + // would indicate that things are seriously broken we let it propagate to + // the function catch block where the tenant will be canceled (otherwise + // we could end up in an infinite loop, e.g., because the problematic + // arguments won't change). + NOTIFICATION_DIAG (log_writer); // Load the tenant, which is essentially the same for both branch push and @@ -1272,7 +1724,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (optional<string> jwt = generate_jwt (trace, error)) + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) { new_iat = obtain_installation_access_token (sd.installation_id, move (*jwt), @@ -1297,6 +1749,8 @@ namespace brep check_run cr; cr.name = move (name); + // Let unlikely invalid_argument propagate (see above). + // if (gq_create_check_run (error, cr, iat->token, @@ -1323,14 +1777,16 @@ namespace brep { assert (!node_id.empty ()); - optional<gq_built_result> br ( - gq_built_result (gh_to_conclusion (rs, sd.warning_success), - circle (rs) + ' ' + ucase (to_string (rs)), - move (summary))); + // Let unlikely invalid_argument propagate (see above). + // + gq_built_result br ( + make_built_result (rs, sd.warning_success, move (summary))); check_run cr; cr.name = name; // For display purposes only. + // Let unlikely invalid_argument propagate (see above). + // if (gq_update_check_run (error, cr, iat->token, @@ -1357,16 +1813,24 @@ namespace brep // string conclusion_node_id; // Conclusion check run node ID. - if (auto cr = create_synthetic_cr (conclusion_check_run_name)) + if (!sd.conclusion_node_id) { - l3 ([&]{trace << "created check_run { " << *cr << " }";}); + if (auto cr = create_synthetic_cr (conclusion_check_run_name)) + { + l3 ([&]{trace << "created check_run { " << *cr << " }";}); - conclusion_node_id = move (*cr->node_id); + conclusion_node_id = move (*cr->node_id); + } } + const string& effective_conclusion_node_id ( + sd.conclusion_node_id + ? *sd.conclusion_node_id + : conclusion_node_id); + // Load the CI tenant if the conclusion check run was created. // - if (!conclusion_node_id.empty ()) + if (!effective_conclusion_node_id.empty ()) { string ru; // Repository URL. @@ -1383,46 +1847,65 @@ namespace brep else ru = sd.repository_clone_url + '#' + sd.check_sha; + // Let unlikely invalid_argument propagate (see above). + // repository_location rl (move (ru), repository_type::git); - optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr, - *build_db_, retry_, - move (ts), - move (rl))); - - if (!r || r->status != 200) + try { - if (auto cr = update_synthetic_cr (conclusion_node_id, - conclusion_check_run_name, - result_status::error, - to_check_run_summary (r))) - { - l3 ([&]{trace << "updated check_run { " << *cr << " }";}); - } - else + optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + move (ts), + move (rl))); + + if (!r || r->status != 200) { - // Nothing really we can do in this case since we will not receive - // any further notifications. Log the error as a last resort. + // Let unlikely invalid_argument propagate (see above). + // + if (auto cr = update_synthetic_cr (effective_conclusion_node_id, + conclusion_check_run_name, + result_status::error, + to_check_run_summary (r))) + { + l3 ([&]{trace << "updated check_run { " << *cr << " }";}); + } + else + { + // Nothing really we can do in this case since we will not receive + // any further notifications. Log the error as a last resort. + + error << "failed to load CI tenant " << ts.id + << " and unable to update conclusion"; + } - error << "failed to load CI tenant " << ts.id - << " and unable to update conclusion"; + return nullptr; // No need to update service data in this case. } + } + catch (const runtime_error& e) // Database retries exhausted. + { + error << "failed to load CI tenant " << ts.id << ": " << e.what (); - return nullptr; // No need to update service data in this case. + // Fall through to retry on next call. } } - else if (!new_iat) - return nullptr; // Nothing to save (but retry on next call). + + if (!new_iat && conclusion_node_id.empty ()) + return nullptr; // Nothing to save (but potentially retry on next call). return [&error, + tenant_id, iat = move (new_iat), cni = move (conclusion_node_id)] - (const tenant_service& ts) -> optional<string> + (const string& ti, + const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to // transaction being aborted) and so should not move out of its // captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -1443,6 +1926,28 @@ namespace brep return sd.json (); }; } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + error << "CI tenant " << ts.id << ": unhandled exception: " << e.what (); + + // Cancel the tenant otherwise we could end up in an infinite loop (see + // top of function). + // + try + { + if (cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, ts.type, ts.id)) + l3 ([&]{trace << "canceled CI tenant " << ts.id;}); + } + catch (const runtime_error& e) // Database retries exhausted. + { + l3 ([&]{trace << "failed to cancel CI tenant " << ts.id + << ": " << e.what ();}); + } + + return nullptr; + } // Build state change notifications (see tenant-services.hxx for // background). Mapping our state transitions to GitHub pose multiple @@ -1453,9 +1958,9 @@ namespace brep // them when notifying GitHub. The first is not important (we expect the // state to go back to building shortly). The second should normally not // happen and would mean that a completed check suite may go back on its - // conclusion (which would be pretty confusing for the user). @@@ This - // can/will happen on check run rebuild. Distinguish between internal - // and external rebuilds? + // conclusion (which would be pretty confusing for the user). Note that + // the ->queued state transition of a check run rebuild triggered by + // us is handled directly in handle_check_run_rerequest(). // // So, for GitHub notifications, we only have the following linear // transition sequence: @@ -1532,13 +2037,17 @@ namespace brep // if we have node_id, then we update, otherwise, we create (potentially // overriding the check run created previously). // - function<optional<string> (const tenant_service&)> ci_github:: - build_queued (const tenant_service& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_queued (const string& tenant_id, + const tenant_service& ts, const vector<build>& builds, optional<build_state> istate, const build_queued_hints& hs, const diag_epilogue& log_writer) const noexcept + try { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -1618,7 +2127,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (optional<string> jwt = generate_jwt (trace, error)) + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) { new_iat = obtain_installation_access_token (sd.installation_id, move (*jwt), @@ -1638,11 +2147,12 @@ namespace brep { // Create a check_run for each build as a single request. // + // Let unlikely invalid_argument propagate. + // if (gq_create_check_runs (error, crs, iat->token, - sd.repository_node_id, sd.report_sha, - build_state::queued)) + sd.repository_node_id, sd.report_sha)) { for (const check_run& cr: crs) { @@ -1654,15 +2164,20 @@ namespace brep } } - return [bs = move (bs), + return [tenant_id, + bs = move (bs), iat = move (new_iat), crs = move (crs), error = move (error), - warn = move (warn)] (const tenant_service& ts) -> optional<string> + warn = move (warn)] (const string& ti, + const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -1702,12 +2217,24 @@ namespace brep return sd.json (); }; } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + error << "CI tenant " << ts.id << ": unhandled exception: " << e.what (); - function<optional<string> (const tenant_service&)> ci_github:: - build_building (const tenant_service& ts, + return nullptr; + } + + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_building (const string& tenant_id, + const tenant_service& ts, const build& b, const diag_epilogue& log_writer) const noexcept + try { + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); service_data sd; @@ -1765,7 +2292,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (optional<string> jwt = generate_jwt (trace, error)) + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) { new_iat = obtain_installation_access_token (sd.installation_id, move (*jwt), @@ -1783,6 +2310,8 @@ namespace brep // if (iat != nullptr) { + // Let unlikely invalid_argument propagate. + // if (gq_update_check_run (error, *cr, iat->token, @@ -1806,14 +2335,19 @@ namespace brep } } - return [iat = move (new_iat), + return [tenant_id, + iat = move (new_iat), cr = move (*cr), error = move (error), - warn = move (warn)] (const tenant_service& ts) -> optional<string> + warn = move (warn)] (const string& ti, + const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -1848,18 +2382,31 @@ namespace brep return sd.json (); }; } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + string bid (gh_check_run_name (b)); // Full build id. + + error << "check run " << bid << ": unhandled exception: " << e.what(); + + return nullptr; + } - function<optional<string> (const tenant_service&)> ci_github:: - build_built (const tenant_service& ts, + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_built (const string& tenant_id, + const tenant_service& ts, const build& b, const diag_epilogue& log_writer) const noexcept + try { - // @@ TODO Include service_data::event_node_id and perhaps ts.id in - // diagnostics? E.g. when failing to update check runs we print the - // build ID only. - // + // NOTE: this function is noexcept and should not throw. + NOTIFICATION_DIAG (log_writer); + // @@ TODO Include ts.id in diagnostics? Check run build ids alone seem + // kind of meaningless. Log lines get pretty long this way however. + service_data sd; try { @@ -1951,7 +2498,7 @@ namespace brep if (system_clock::now () > sd.installation_access.expires_at) { - if (optional<string> jwt = generate_jwt (trace, error)) + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) { new_iat = obtain_installation_access_token (sd.installation_id, move (*jwt), @@ -1978,6 +2525,11 @@ namespace brep { using namespace web::xhtml; + // Note: let all serialization exceptions propagate. The XML + // serialization code can throw bad_alloc or xml::serialization in + // case of I/O failures, but we're serializing to a string stream so + // both exceptions are unlikely. + // ostringstream os; xml::serializer s (os, "check_run_summary"); @@ -2069,13 +2621,12 @@ namespace brep } gq_built_result br ( - gh_to_conclusion (*b.status, sd.warning_success), - circle (*b.status) + ' ' + ucase (to_string (*b.status)), - move (sm)); + make_built_result (*b.status, sd.warning_success, move (sm))); if (cr.node_id) { - // Update existing check run to built. + // Update existing check run to built. Let unlikely invalid_argument + // propagate. // if (gq_update_check_run (error, cr, @@ -2092,7 +2643,7 @@ namespace brep } else { - // Create new check run. + // Create new check run. Let unlikely invalid_argument propagate. // // Note that we don't have build hints so will be creating this check // run with the full build id as name. In the unlikely event that an @@ -2129,10 +2680,9 @@ namespace brep result_status rs (*conclusion); - optional<gq_built_result> br ( - gq_built_result (gh_to_conclusion (rs, sd.warning_success), - circle (rs) + ' ' + ucase (to_string (rs)), - "All configurations are built")); + gq_built_result br ( + make_built_result (rs, sd.warning_success, + "All configurations are built")); check_run cr; @@ -2141,6 +2691,8 @@ namespace brep cr.node_id = *sd.conclusion_node_id; cr.name = conclusion_check_run_name; + // Let unlikely invalid_argument propagate. + // if (gq_update_check_run (error, cr, iat->token, @@ -2167,15 +2719,20 @@ namespace brep } } - return [iat = move (new_iat), + return [tenant_id, + iat = move (new_iat), cr = move (cr), completed = completed, error = move (error), - warn = move (warn)] (const tenant_service& ts) -> optional<string> + warn = move (warn)] (const string& ti, + const tenant_service& ts) -> optional<string> { // NOTE: this lambda may be called repeatedly (e.g., due to transaction // being aborted) and so should not move out of its captures. + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + service_data sd; try { @@ -2245,6 +2802,16 @@ namespace brep return sd.json (); }; } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + string bid (gh_check_run_name (b)); // Full build id. + + error << "check run " << bid << ": unhandled exception: " << e.what(); + + return nullptr; + } string ci_github:: details_url (const build& b) const @@ -2270,22 +2837,22 @@ namespace brep url u (details_url); - if (!u.query || !u.path || u.path->size () <= 1) - return nullopt; - build_id r; // Extract the tenant from the URL path. // // Example path: @d2586f57-21dc-40b7-beb2-6517ad7917dd // - r.package.tenant = u.path->substr (1); - - if (r.package.tenant.empty ()) + if (!u.path || u.path->size () != 37 || (*u.path)[0] != '@') return nullopt; + r.package.tenant = u.path->substr (1); + // Extract the rest of the build_id members from the URL query. // + if (!u.query) + return nullopt; + bool pn (false), pv (false), tg (false), tc (false), pc (false), th (false); @@ -2304,21 +2871,25 @@ namespace brep ++vp; // Skip '=' - const char* ve (ep ? ep : vp + strlen (vp)); // Value end pointer. + const char* ve (ep != nullptr ? ep : vp + strlen (vp)); // Value end. // Get the value as-is or URL-decode it. // - auto getval = [vp, ve] () { return string (vp, ve); }; + auto rawval = [vp, ve] () { return string (vp, ve); }; auto decval = [vp, ve] () { return mime_url_decode (vp, ve); }; auto make_version = [] (string&& v) - { return canonical_version (brep::version (move (v))); }; + { + return canonical_version (brep::version (move (v))); + }; auto c = [&n] (bool& b, const char* s) - { return n == s ? (b = true) : false; }; + { + return n == s ? (b = true) : false; + }; if (c (pn, "builds")) r.package.name = package_name (decval ()); - else if (c (pv, "pv")) r.package.version = make_version (getval ()); + else if (c (pv, "pv")) r.package.version = make_version (rawval ()); else if (c (tg, "tg")) r.target = target_triplet (decval ()); else if (c (tc, "tc")) r.target_config_name = decval (); else if (c (pc, "pc")) r.package_config_name = decval (); @@ -2326,7 +2897,7 @@ namespace brep { // Toolchain name and version. E.g. "public-0.17.0" - string v (getval ()); + string v (rawval ()); // Note: parsing code based on mod/mod-builds.cxx. // @@ -2338,7 +2909,7 @@ namespace brep r.toolchain_version = make_version (v.substr (p + 1)); } - qp = ep ? ep + 1 : nullptr; + qp = ep != nullptr ? ep + 1 : nullptr; } if (!pn || !pv || !tg || !tc || !pc || !th) @@ -2346,25 +2917,38 @@ namespace brep return r; } - catch (const invalid_argument&) + catch (const invalid_argument&) // Invalid url, brep::version, etc. { return nullopt; } optional<string> ci_github:: - generate_jwt (const basic_mark& trace, + generate_jwt (const string& app_id, + const basic_mark& trace, const basic_mark& error) const { string jwt; try { + // Look up the private key path for the app id and fail if not found. + // + const map<string, dir_path>& pks ( + options_->ci_github_app_id_private_key ()); + + auto pk (pks.find (app_id)); + if (pk == pks.end ()) + { + error << "unable to generate JWT: " + << "no private key configured for app id " << app_id; + return nullopt; + } + // Set token's "issued at" time 60 seconds in the past to combat clock // drift (as recommended by GitHub). // jwt = brep::generate_jwt ( *options_, - options_->ci_github_app_private_key (), - to_string (options_->ci_github_app_id ()), + pk->second, app_id, chrono::seconds (options_->ci_github_jwt_validity_period ()), chrono::seconds (60)); @@ -2420,7 +3004,7 @@ namespace brep // example. // optional<gh_installation_access_token> ci_github:: - obtain_installation_access_token (uint64_t iid, + obtain_installation_access_token (const string& iid, string jwt, const basic_mark& error) const { @@ -2429,7 +3013,7 @@ namespace brep { // API endpoint. // - string ep ("app/installations/" + to_string (iid) + "/access_tokens"); + string ep ("app/installations/" + iid + "/access_tokens"); uint16_t sc ( github_post (iat, ep, strings {"Authorization: Bearer " + jwt})); @@ -2455,6 +3039,8 @@ namespace brep // iat.expires_at -= chrono::minutes (5); } + // gh_installation_access_token (via github_post()) + // catch (const json::invalid_json_input& e) { // Note: e.name is the GitHub API endpoint. @@ -2464,12 +3050,12 @@ namespace brep << e.position << ", error: " << e; return nullopt; } - catch (const invalid_argument& e) + catch (const invalid_argument& e) // github_post() { error << "malformed header(s) in response: " << e; return nullopt; } - catch (const system_error& e) + catch (const system_error& e) // github_post() { error << "unable to get installation access token (errno=" << e.code () << "): " << e.what (); diff --git a/mod/mod-ci-github.hxx b/mod/mod-ci-github.hxx index aa601d2..059801a 100644 --- a/mod/mod-ci-github.hxx +++ b/mod/mod-ci-github.hxx @@ -42,33 +42,40 @@ namespace brep virtual const cli::options& cli_options () const {return options::ci_github::description ();} - virtual function<optional<string> (const tenant_service&)> - build_unloaded (tenant_service&&, + virtual function<optional<string> (const string&, const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, const diag_epilogue& log_writer) const noexcept override; - function<optional<string> (const tenant_service&)> + function<optional<string> (const string&, const tenant_service&)> build_unloaded_pre_check (tenant_service&&, service_data&&, const diag_epilogue&) const noexcept; - function<optional<string> (const tenant_service&)> - build_unloaded_load (tenant_service&&, + function<optional<string> (const string&, const tenant_service&)> + build_unloaded_load (const string& tenant_id, + tenant_service&&, service_data&&, const diag_epilogue&) const noexcept; - virtual function<optional<string> (const tenant_service&)> - build_queued (const tenant_service&, + virtual function<optional<string> (const string&, const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, const vector<build>&, optional<build_state> initial_state, const build_queued_hints&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_building (const tenant_service&, const build&, + virtual function<optional<string> (const string&, const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, + const build&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_built (const tenant_service&, const build&, + virtual function<optional<string> (const string&, const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, + const build&, const diag_epilogue& log_writer) const noexcept override; private: @@ -83,13 +90,21 @@ namespace brep bool handle_check_suite_request (gh_check_suite_event, bool warning_success); + // Handle the check_suite event `completed` action. + // + // If warning_success is true, then map result_status::warning to SUCCESS + // and to FAILURE otherwise. + // + bool + handle_check_suite_completed (gh_check_suite_event, bool warning_success); + // Handle the check_run event `rerequested` action. // // If warning_success is true, then map result_status::warning to SUCCESS // and to FAILURE otherwise. // bool - handle_check_run_rerequest (gh_check_run_event, bool warning_success); + handle_check_run_rerequest (const gh_check_run_event&, bool warning_success); // Handle the pull_request event `opened` and `synchronize` actions. // @@ -105,12 +120,16 @@ namespace brep details_url (const build&) const; optional<string> - generate_jwt (const basic_mark& trace, const basic_mark& error) const; + generate_jwt (const string& app_id, + const basic_mark& trace, + const basic_mark& error) const; - // Authenticate to GitHub as an app installation. + // Authenticate to GitHub as an app installation. Return the installation + // access token (IAT). Issue diagnostics and return nullopt if something + // goes wrong. // optional<gh_installation_access_token> - obtain_installation_access_token (uint64_t install_id, + obtain_installation_access_token (const string& install_id, string jwt, const basic_mark& error) const; diff --git a/mod/mod-ci.cxx b/mod/mod-ci.cxx index 52f4644..46fbf6a 100644 --- a/mod/mod-ci.cxx +++ b/mod/mod-ci.cxx @@ -422,8 +422,10 @@ handle (request& rq, response& rs) } #ifdef BREP_CI_TENANT_SERVICE -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_queued (const tenant_service&, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_queued (const string& /*tenant_id*/, + const tenant_service&, const vector<build>& bs, optional<build_state> initial_state, const build_queued_hints& hints, @@ -437,7 +439,8 @@ build_queued (const tenant_service&, << hints.single_package_version << ' ' << hints.single_package_config;}); - return [&bs, initial_state] (const tenant_service& ts) + return [&bs, initial_state] (const string& tenant_id, + const tenant_service& ts) { optional<string> r (ts.data); @@ -446,6 +449,7 @@ build_queued (const tenant_service&, string s ((!initial_state ? "queued " : "queued " + to_string (*initial_state) + ' ') + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + @@ -467,14 +471,18 @@ build_queued (const tenant_service&, }; } -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_building (const tenant_service&, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_building (const string& /*tenant_id*/, + const tenant_service&, const build& b, const diag_epilogue&) const noexcept { - return [&b] (const tenant_service& ts) + return [&b] (const string& tenant_id, + const tenant_service& ts) { string s ("building " + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + @@ -487,14 +495,17 @@ build_building (const tenant_service&, }; } -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_built (const tenant_service&, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_built (const string& /*tenant_id*/, + const tenant_service&, const build& b, const diag_epilogue&) const noexcept { - return [&b] (const tenant_service& ts) + return [&b] (const string& tenant_id, const tenant_service& ts) { string s ("built " + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + diff --git a/mod/mod-ci.hxx b/mod/mod-ci.hxx index e4a343c..132b5b0 100644 --- a/mod/mod-ci.hxx +++ b/mod/mod-ci.hxx @@ -71,26 +71,34 @@ namespace brep cli_options () const override {return options::ci::description ();} #ifdef BREP_CI_TENANT_SERVICE - virtual function<optional<string> (const tenant_service&)> - build_queued (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, const vector<build>&, optional<build_state> initial_state, const build_queued_hints&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_building (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_built (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept override; #ifdef BREP_CI_TENANT_SERVICE_UNLOADED - virtual function<optional<string> (const tenant_service&)> - build_unloaded (tenant_service&&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, const diag_epilogue& log_writer) const noexcept override; #endif #endif diff --git a/mod/module.cli b/mod/module.cli index 5799697..f0d5cdc 100644 --- a/mod/module.cli +++ b/mod/module.cli @@ -850,24 +850,20 @@ namespace brep // GitHub CI-specific options. // - size_t ci-github-app-id - { - "<id>", - "The GitHub App ID. Found in the app's settings on GitHub." - } - string ci-github-app-webhook-secret { "<secret>", "The GitHub App's configured webhook secret. If not set, then the - GitHub CI service is disabled." + GitHub CI service is disabled. Note: make sure to choose a strong + (random) secret." } - path ci-github-app-private-key + std::map<string, dir_path> ci-github-app-id-private-key { - "<path>", - "The private key used during GitHub API authentication. Created in - the GitHub App's settings." + "<id>=<path>", + "The private key used during GitHub API authentication for the + specified GitHub App ID. Both vales are found in the GitHub App's + settings." } uint16_t ci-github-jwt-validity-period = 600 diff --git a/mod/tenant-service.hxx b/mod/tenant-service.hxx index 8ba199a..5564a56 100644 --- a/mod/tenant-service.hxx +++ b/mod/tenant-service.hxx @@ -74,9 +74,11 @@ namespace brep // If the returned function is not NULL, it is called to update the // service data. It should return the new data or nullopt if no update is // necessary. Note: tenant_service::data passed to the callback and to the - // returned function may not be the same. Also, the returned function may - // be called multiple times (on transaction retries). Note that the passed - // log_writer is valid during the calls to the returned function. + // returned function may not be the same. Furthermore, tenant_ids may not + // be the same either, in case the tenant was replaced. Also, the returned + // function may be called multiple times (on transaction retries). Note + // that the passed log_writer is valid during the calls to the returned + // function. // // The passed initial_state indicates the logical initial state and is // either absent, `building` (interrupted), or `built` (rebuild). Note @@ -101,8 +103,10 @@ namespace brep bool single_package_config; }; - virtual function<optional<string> (const tenant_service&)> - build_queued (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, const vector<build>&, optional<build_state> initial_state, const build_queued_hints&, @@ -112,8 +116,10 @@ namespace brep class tenant_service_build_building: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_building (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept = 0; }; @@ -121,8 +127,10 @@ namespace brep class tenant_service_build_built: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_built (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept = 0; }; @@ -140,8 +148,10 @@ namespace brep class tenant_service_build_unloaded: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_unloaded (tenant_service&&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, const diag_epilogue& log_writer) const noexcept = 0; }; |