diff options
Diffstat (limited to 'mod')
31 files changed, 8261 insertions, 247 deletions
diff --git a/mod/ci-common.cxx b/mod/ci-common.cxx index c0ef89f..deee977 100644 --- a/mod/ci-common.cxx +++ b/mod/ci-common.cxx @@ -14,6 +14,8 @@ #include <libbutl/process-io.hxx> // operator<<(ostream, process_args) #include <libbutl/manifest-serializer.hxx> +#include <libbrep/build.hxx> +#include <libbrep/build-odb.hxx> #include <libbrep/build-package.hxx> #include <libbrep/build-package-odb.hxx> @@ -431,7 +433,8 @@ namespace brep }, 2 /* stderr */, ops.email (), - "CI request submission (" + sr.reference + ')', + ((service ? service->type : "ci") + + " request submission: " + repository.string ()), {ops.ci_email ()}); // Write the CI request manifest. @@ -534,77 +537,175 @@ namespace brep s.next ("", ""); // End of manifest. } - optional<string> ci_start:: + optional<pair<string, ci_start::duplicate_tenant_result>> ci_start:: create (const basic_mark& error, const basic_mark&, const basic_mark* trace, odb::core::database& db, + size_t retry, tenant_service&& service, duration notify_interval, - duration notify_delay) const + duration notify_delay, + duplicate_tenant_mode mode) const { using namespace odb::core; - // Generate the request id. - // - string request_id; + assert (mode == duplicate_tenant_mode::fail || !service.id.empty ()); + assert (!transaction::has_current ()); - try - { - request_id = uuid::generate ().string (); - } - catch (const system_error& e) - { - error << "unable to generate request id: " << e; - return nullopt; - } + build_tenant t; - // Use the generated request id if the tenant service id is not specified. + // Set the reference count to 1 for the `created` result. // - if (service.id.empty ()) - service.id = request_id; + duplicate_tenant_result r (duplicate_tenant_result::created); + service.ref_count = 1; - build_tenant t (move (request_id), - move (service), - system_clock::now () - notify_interval + notify_delay, - notify_interval); + for (string request_id;;) { - assert (!transaction::has_current ()); + try + { + transaction tr (db.begin ()); - transaction tr (db.begin ()); + // Unless we are in the 'fail on duplicate' mode, check if this + // service type/id pair is already in use and, if that's the case, + // either ignore it or reassign this service to a new tenant, + // canceling the old one. + // + if (mode != duplicate_tenant_mode::fail) + { + using query = query<build_tenant>; + + shared_ptr<build_tenant> t ( + db.query_one<build_tenant> (query::service.id == service.id && + query::service.type == service.type)); + if (t != nullptr) + { + // Reduce the replace_archived mode to the replace or ignore mode. + // + if (mode == duplicate_tenant_mode::replace_archived) + { + mode = (t->archived + ? duplicate_tenant_mode::replace + : duplicate_tenant_mode::ignore); + } + + // Shouldn't be here otherwise. + // + assert (t->service); + + // Bail out in the ignore mode and cancel the tenant in the + // replace mode. + // + if (mode == duplicate_tenant_mode::ignore) + { + // Increment the reference count for the `ignored` result. + // + ++(t->service->ref_count); + + db.update (t); + tr.commit (); + + return make_pair (move (t->id), duplicate_tenant_result::ignored); + } + + assert (mode == duplicate_tenant_mode::replace); + + // Preserve the current reference count for the `replaced` result. + // + service.ref_count = t->service->ref_count; + + if (t->unloaded_timestamp) + { + db.erase (t); + } + else + { + t->service = nullopt; + t->archived = true; + db.update (t); + } + + r = duplicate_tenant_result::replaced; + } + } - // Note that in contrast to brep-load, we know that the tenant id is - // unique and thus we don't try to remove a tenant with such an id. - // There is also not much reason to assume that we may have switched - // from the single-tenant mode here and remove the respective tenant, - // unless we are in the tenant-service functionality development mode. - // + // Generate the request id. + // + if (request_id.empty ()) + try + { + request_id = uuid::generate ().string (); + } + catch (const system_error& e) + { + error << "unable to generate request id: " << e; + return nullopt; + } + + // Use the generated request id if the tenant service id is not + // specified. + // + if (service.id.empty ()) + service.id = request_id; + + t = build_tenant (move (request_id), + move (service), + system_clock::now () - notify_interval + notify_delay, + notify_interval); + + // Note that in contrast to brep-load, we know that the tenant id is + // unique and thus we don't try to remove a tenant with such an id. + // There is also not much reason to assume that we may have switched + // from the single-tenant mode here and remove the respective tenant, + // unless we are in the tenant-service functionality development mode. + // #ifdef BREP_CI_TENANT_SERVICE_UNLOADED - cstrings ts ({""}); + cstrings ts ({""}); - db.erase_query<build_package> ( - query<build_package>::id.tenant.in_range (ts.begin (), ts.end ())); + db.erase_query<build_package> ( + query<build_package>::id.tenant.in_range (ts.begin (), ts.end ())); - db.erase_query<build_repository> ( - query<build_repository>::id.tenant.in_range (ts.begin (), ts.end ())); + db.erase_query<build_repository> ( + query<build_repository>::id.tenant.in_range (ts.begin (), ts.end ())); - db.erase_query<build_public_key> ( - query<build_public_key>::id.tenant.in_range (ts.begin (), ts.end ())); + db.erase_query<build_public_key> ( + query<build_public_key>::id.tenant.in_range (ts.begin (), ts.end ())); - db.erase_query<build_tenant> ( - query<build_tenant>::id.in_range (ts.begin (), ts.end ())); + db.erase_query<build_tenant> ( + query<build_tenant>::id.in_range (ts.begin (), ts.end ())); #endif - db.persist (t); + db.persist (t); - tr.commit (); - } + tr.commit (); - if (trace != nullptr) - *trace << "unloaded CI request " << t.id << " for service " - << t.service->id << ' ' << t.service->type << " is created"; + if (trace != nullptr) + *trace << "unloaded CI request " << t.id << " for service " + << t.service->id << ' ' << t.service->type << " is created"; + + // Bail out if we have successfully erased, updated, or persisted the + // tenant object. + // + break; + } + catch (const odb::recoverable& e) + { + // If no more retries left, don't re-throw odb::recoverable not to + // retry at the upper level. + // + if (retry-- == 0) + throw runtime_error (e.what ()); + + // Prepare for the next iteration. + // + request_id = move (t.id); + service = move (*t.service); + service.ref_count = 1; + r = duplicate_tenant_result::created; + } + } - return move (t.id); + return make_pair (move (t.id), r); } optional<ci_start::start_result> ci_start:: @@ -612,51 +713,69 @@ namespace brep const basic_mark& warn, const basic_mark* trace, odb::core::database& db, + size_t retry, tenant_service&& service, const repository_location& repository) const { using namespace odb::core; string request_id; + + for (;;) { - assert (!transaction::has_current ()); + try + { + assert (!transaction::has_current ()); - transaction tr (db.begin ()); + transaction tr (db.begin ()); - using query = query<build_tenant>; + using query = query<build_tenant>; - shared_ptr<build_tenant> t ( - db.query_one<build_tenant> (query::service.id == service.id && - query::service.type == service.type)); + shared_ptr<build_tenant> t ( + db.query_one<build_tenant> (query::service.id == service.id && + query::service.type == service.type)); - if (t == nullptr) - { - error << "unable to find tenant for service " << service.id << ' ' - << service.type; + if (t == nullptr) + { + error << "unable to find tenant for service " << service.id << ' ' + << service.type; - return nullopt; - } - else if (t->archived) - { - error << "tenant " << t->id << " for service " << service.id << ' ' - << service.type << " is already archived"; + return nullopt; + } + else if (t->archived) + { + error << "tenant " << t->id << " for service " << service.id << ' ' + << service.type << " is already archived"; - return nullopt; - } - else if (!t->unloaded_timestamp) - { - error << "tenant " << t->id << " for service " << service.id << ' ' - << service.type << " is already loaded"; + return nullopt; + } + else if (!t->unloaded_timestamp) + { + error << "tenant " << t->id << " for service " << service.id << ' ' + << service.type << " is already loaded"; - return nullopt; - } + return nullopt; + } - t->unloaded_timestamp = nullopt; - db.update (t); + t->unloaded_timestamp = nullopt; + db.update (t); - tr.commit (); + tr.commit (); - request_id = move (t->id); + request_id = move (t->id); + + // Bail out if we have successfully updated the tenant object. + // + break; + } + catch (const odb::recoverable& e) + { + // If no more retries left, don't re-throw odb::recoverable not to + // retry at the upper level. + // + if (retry-- == 0) + throw runtime_error (e.what ()); + } } assert (options_ != nullptr); // Shouldn't be called otherwise. @@ -690,33 +809,85 @@ namespace brep const basic_mark&, const basic_mark* trace, odb::core::database& db, + size_t retry, const string& type, - const string& id) const + const string& id, + bool ref_count) const { using namespace odb::core; assert (!transaction::has_current ()); - transaction tr (db.begin ()); + optional<tenant_service> r; - using query = query<build_tenant>; + for (;;) + { + try + { + transaction tr (db.begin ()); - shared_ptr<build_tenant> t ( - db.query_one<build_tenant> (query::service.id == id && - query::service.type == type)); - if (t == nullptr) - return nullopt; + using query = query<build_tenant>; - optional<tenant_service> r (move (t->service)); - t->service = nullopt; - t->archived = true; - db.update (t); + shared_ptr<build_tenant> t ( + db.query_one<build_tenant> (query::service.id == id && + query::service.type == type)); + if (t == nullptr) + return nullopt; - tr.commit (); + // Shouldn't be here otherwise. + // + assert (t->service && t->service->ref_count != 0); - if (trace != nullptr) - *trace << "CI request " << t->id << " for service " << id << ' ' << type - << " is canceled"; + bool cancel (!ref_count || --(t->service->ref_count) == 0); + + if (cancel) + { + // Move out the service state before it is dropped from the tenant. + // + r = move (t->service); + + if (t->unloaded_timestamp) + { + db.erase (t); + } + else + { + t->service = nullopt; + t->archived = true; + db.update (t); + } + + if (trace != nullptr) + *trace << "CI request " << t->id << " for service " << id << ' ' + << type << " is canceled"; + } + else + { + db.update (t); // Update the service reference count. + + // Move out the service state after the tenant is updated. + // + r = move (t->service); + } + + tr.commit (); + + // Bail out if we have successfully updated or erased the tenant + // object. + // + break; + } + catch (const odb::recoverable& e) + { + // If no more retries left, don't re-throw odb::recoverable not to + // retry at the upper level. + // + if (retry-- == 0) + throw runtime_error (e.what ()); + + r = nullopt; // Prepare for the next iteration. + } + } return r; } @@ -727,26 +898,50 @@ namespace brep const basic_mark* trace, const string& reason, odb::core::database& db, + size_t retry, const string& tid) const { using namespace odb::core; assert (!transaction::has_current ()); - transaction tr (db.begin ()); + for (;;) + { + try + { + transaction tr (db.begin ()); - shared_ptr<build_tenant> t (db.find<build_tenant> (tid)); + shared_ptr<build_tenant> t (db.find<build_tenant> (tid)); - if (t == nullptr) - return false; + if (t == nullptr) + return false; - if (!t->archived) - { - t->archived = true; - db.update (t); - } + if (t->unloaded_timestamp) + { + db.erase (t); + } + else if (!t->archived) + { + t->archived = true; + db.update (t); + } - tr.commit (); + tr.commit (); + + // Bail out if we have successfully updated or erased the tenant + // object. + // + break; + } + catch (const odb::recoverable& e) + { + // If no more retries left, don't re-throw odb::recoverable not to + // retry at the upper level. + // + if (retry-- == 0) + throw runtime_error (e.what ()); + } + } if (trace != nullptr) *trace << "CI request " << tid << " is canceled: " @@ -756,4 +951,109 @@ namespace brep return true; } + + optional<build_state> ci_start:: + rebuild (odb::core::database& db, + size_t retry, + const build_id& id, + function<optional<string> (const string& tenant_id, + const tenant_service&, + build_state)> uf) const + { + using namespace odb::core; + + build_state s; + + for (;;) + { + try + { + // NOTE: don't forget to update build_force::handle() if changing + // anything here. + // + transaction t (db.begin ()); + + package_build pb; + if (!db.query_one<package_build> (query<package_build>::build::id == id, + pb) || + pb.archived) + { + return nullopt; + } + + const shared_ptr<build>& b (pb.build); + s = b->state; + + if (s != build_state::queued) + { + force_state force (s == build_state::built + ? force_state::forced + : force_state::forcing); + + if (b->force != force) + { + b->force = force; + db.update (b); + } + + if (uf != nullptr) + { + shared_ptr<build_tenant> t (db.load<build_tenant> (b->tenant)); + + assert (t->service); + + tenant_service& ts (*t->service); + + if (optional<string> data = uf (t->id, ts, s)) + { + ts.data = move (*data); + db.update (t); + } + } + } + + t.commit (); + + // Bail out if we have successfully updated the build and tenant + // objects. + // + break; + } + catch (const odb::recoverable& e) + { + // If no more retries left, don't re-throw odb::recoverable not to + // retry at the upper level. + // + if (retry-- == 0) + throw runtime_error (e.what ()); + } + } + + return s; + } + + optional<ci_start::tenant_data> ci_start:: + find (odb::core::database& db, + const string& type, + const string& id) const + { + using namespace odb::core; + + assert (!transaction::has_current ()); + + transaction tr (db.begin ()); + + using query = query<build_tenant>; + + shared_ptr<build_tenant> t ( + db.query_one<build_tenant> (query::service.id == id && + query::service.type == type)); + + tr.commit (); + + if (t == nullptr || !t->service) + return nullopt; + + return tenant_data {move (t->id), move (*t->service), t->archived}; + } } diff --git a/mod/ci-common.hxx b/mod/ci-common.hxx index 848bca1..a38ac54 100644 --- a/mod/ci-common.hxx +++ b/mod/ci-common.hxx @@ -9,6 +9,7 @@ #include <libbrep/types.hxx> #include <libbrep/utility.hxx> +#include <libbrep/build.hxx> #include <libbrep/common.hxx> #include <mod/diagnostics.hxx> @@ -24,7 +25,7 @@ namespace brep // If the request handling has been performed normally, then return the // information that corresponds to the CI result manifest (see CI Result - // Manifest in the manual). Otherwise (some internal has error occured), + // Manifest in the manual). Otherwise (some internal error has occured), // log the error and return nullopt. // // The arguments correspond to the CI request and overrides manifest @@ -40,6 +41,10 @@ namespace brep // Note that the inability to generate the reference is an internal // error. Thus, it is not optional. // + // Note that if the CI request information is persisted to the database + // (which, depending on the CI request handler, may not be the case), then + // the reference is assumed to be the respective tenant id. + // struct start_result { uint16_t status; @@ -48,8 +53,13 @@ namespace brep vector<pair<string, string>> custom_result; }; - // In the optional service information, if id is empty, then the generated - // reference is used instead. + // In the optional tenant service information, if service id is empty, + // then the generated tenant id is used instead. + // + // Note that if the tenant service is specified, then the CI request + // information is expected to be persisted to the database and thus + // start_result::reference denotes the tenant id in this case (see above + // for details). // optional<start_result> start (const basic_mark& error, @@ -65,27 +75,63 @@ namespace brep const vector<pair<string, string>>& custom_request = {}, const vector<pair<string, string>>& overrides = {}) const; - // Create an unloaded CI request returning start_result::reference on - // success and nullopt on an internal error. Such a request is not started - // until loaded with the load() function below. Configure the time - // interval between the build_unloaded() notifications for the being - // created tenant and set the initial delay for the first notification. - // See also the build_unloaded() tenant services notification. + // Create an unloaded CI request returning tenant id on success and + // nullopt on an internal error. Such a request is not started until + // loaded with the load() function below. Configure the time interval + // between the build_unloaded() notifications for the being created tenant + // and set the initial delay for the first notification. See also the + // build_unloaded() tenant services notification. + // + // The duplicate_tenant_mode argument specifies the behavior in case of + // the duplicate tenant_service type/id pair. The default is to fail by + // throwing an exception. Alternatively, this can be ignored or the + // previous tenant can be canceled (thus freeing the type/id pair; see + // below) and a new tenant with the same type/id created. In both these + // modes (ignore and replace), the second half of the returned pair + // indicates whether there was a duplicate. If there were, then for the + // ignore mode the returned tenant id corresponds to the old tenant and + // for the replace mode -- to the new tenant. + // + // The replace_archived mode is a variant of replace that replaces if the + // tenant is already archived and ignores it otherwise (with the result + // having the same semantics as in the replace and ignore modes). + // + // Note also that the duplicate_tenant_mode::replace modes are not the + // same as separate calls to cancel() and then to create() since the + // latter would happen in two separate transactions and will thus be racy. + // + // Finally note that only duplicate_tenant_mode::fail can be used if the + // service id is empty. + // + // The tenant reference count is set to 1 if the result is `created`, + // incremented if the result is `ignored`, and preserved if the result is + // `replaced`. + // + // Repeat the attempts on the recoverable database failures (deadlocks, + // etc) and throw runtime_error if no more retries left. // // Note: should be called out of the database transaction. // - optional<string> + enum class duplicate_tenant_mode {fail, ignore, replace, replace_archived}; + enum class duplicate_tenant_result {created, ignored, replaced}; + + optional<pair<string, duplicate_tenant_result>> create (const basic_mark& error, const basic_mark& warn, const basic_mark* trace, odb::core::database&, + size_t retry, tenant_service&&, duration notify_interval, - duration notify_delay) const; + duration notify_delay, + duplicate_tenant_mode = duplicate_tenant_mode::fail) const; // Load (and start) previously created (as unloaded) CI request. Similarly // to the start() function, return nullopt on an internal error. // + // Repeat the attempts on the recoverable database failures (deadlocks, + // etc) and throw runtime_error if no more retries left. + // // Note that tenant_service::id is used to identify the CI request tenant. // // Note: should be called out of the database transaction. @@ -95,12 +141,27 @@ namespace brep const basic_mark& warn, const basic_mark* trace, odb::core::database&, + size_t retry, tenant_service&&, const repository_location& repository) const; // Cancel previously created or started CI request. Return the service // state or nullopt if there is no tenant for such a type/id pair. // + // Specifically, this function clears the tenant service state (thus + // allowing reusing the same service type/id pair in another tenant) and + // archives the tenant, unless the tenant is unloaded, in which case it is + // dropped. Note that the latter allow using unloaded tenants as a + // relatively cheap asynchronous execution mechanism. + // + // If ref_count is true, then decrement the tenant reference count and + // only cancel the CI request if it becomes 0. In this mode the caller can + // determine if the request was actually canceled by checking if the + // reference count in the returned service state is 0. + // + // Repeat the attempts on the recoverable database failures (deadlocks, + // etc) and throw runtime_error if no more retries left. + // // Note: should be called out of the database transaction. // optional<tenant_service> @@ -108,13 +169,23 @@ namespace brep const basic_mark& warn, const basic_mark* trace, odb::core::database&, + size_t retry, const string& type, - const string& id) const; + const string& id, + bool ref_count = false) const; // Cancel previously created or started CI request. Return false if there // is no tenant for the specified tenant id. Note that the reason argument // is only used for tracing. // + // Similarly to above, this function archives the tenant, unless the + // tenant is unloaded, in which case it is dropped. Note, however, that + // this version does not touch the service state (use the above version if + // you want to clear it). + // + // Repeat the attempts on the recoverable database failures (deadlocks, + // etc) and throw runtime_error if no more retries left. + // // Note: should be called out of the database transaction. // bool @@ -123,8 +194,76 @@ namespace brep const basic_mark* trace, const string& reason, odb::core::database&, + size_t retry, const string& tenant_id) const; + // Schedule the re-build of the package build and return the build object + // current state. + // + // Specifically: + // + // - If the build has expired (build or package object doesn't exist or + // the package is archived or is not buildable anymore, etc), then do + // nothing and return nullopt. + // + // Note, however, that this function doesn't check if the build + // configuration still exists in the buildtab. It is supposed that the + // caller has already checked for that if necessary (see + // build_force::handle() for an example of this check). And if not + // then a re-build will be scheduled and later cleaned by the cleaner + // (without notifications). + // + // - Otherwise, if the build object is in the queued state, then do + // nothing and return build_state::queued. It is assumed that a build + // object in such a state is already about to be built. + // + // - Otherwise (the build object is in the building or built state), + // schedule the object for the rebuild and return the current state. + // + // Note that in contrast to the build-force handler, this function doesn't + // send the build_queued() notification to the tenant-associated service + // if the object is in the building state (which is done as soon as + // possible to avoid races). Instead, it is assumed the service will + // perform any equivalent actions directly based on the returned state. + // + // The last argument, if not NULL, is called to update the service data + // associated with the tenant to which this build object belongs. It has + // the same semantics as the returned function in the tenant service + // callbacks (see tenant_service_build_queued). Note that it is only + // called if the rebuild was actually scheduled, that is, the current + // state is building or built. + // + // Repeat the attempts on the recoverable database failures (deadlocks, + // etc) and throw runtime_error if no more retries left. + // + // Note: should be called out of the database transaction. + // + optional<build_state> + rebuild (odb::core::database&, + size_t retry, + const build_id&, + function<optional<string> (const string& tenant_id, + const tenant_service&, + build_state)> = nullptr) const; + + // Find the tenant given the tenant service type and id and return the + // associated data plus the indication of whether the tenant is archived, + // or nullopt if there is no such tenant. + // + // Note: should be called out of the database transaction. + // + struct tenant_data + { + string tenant_id; + tenant_service service; + bool archived; + }; + + optional<tenant_data> + find (odb::core::database&, + const string& type, + const string& id) const; + // Helpers. // diff --git a/mod/database-module.cxx b/mod/database-module.cxx index bbb3e59..629e393 100644 --- a/mod/database-module.cxx +++ b/mod/database-module.cxx @@ -79,8 +79,10 @@ namespace brep optional<string> database_module:: update_tenant_service_state ( const connection_ptr& conn, - const string& tid, - const function<optional<string> (const tenant_service&)>& f) + const string& type, + const string& id, + const function<optional<string> (const string& tenant_id, + const tenant_service&)>& f) { assert (f != nullptr); // Shouldn't be called otherwise. @@ -96,13 +98,21 @@ namespace brep { transaction tr (conn->begin ()); - shared_ptr<build_tenant> t (build_db_->find<build_tenant> (tid)); + using query = query<build_tenant>; - if (t != nullptr && t->service) + shared_ptr<build_tenant> t ( + build_db_->query_one<build_tenant> (query::service.id == id && + query::service.type == type)); + + if (t != nullptr) { + // Shouldn't be here otherwise. + // + assert (t->service); + tenant_service& s (*t->service); - if (optional<string> data = f (s)) + if (optional<string> data = f (t->id, s)) { s.data = move (*data); build_db_->update (t); @@ -119,10 +129,14 @@ namespace brep } catch (const odb::recoverable& e) { + HANDLER_DIAG; + + // If no more retries left, don't re-throw odb::recoverable not to + // retry at the upper level. + // if (retry-- == 0) - throw; + fail << e << "; no tenant service state update retries left"; - HANDLER_DIAG; l1 ([&]{trace << e << "; " << retry + 1 << " tenant service " << "state update retries left";}); diff --git a/mod/database-module.hxx b/mod/database-module.hxx index 298afbf..76f13d4 100644 --- a/mod/database-module.hxx +++ b/mod/database-module.hxx @@ -61,16 +61,18 @@ namespace brep // and nullopt otherwise. // // Specifically, start the database transaction, query the service state, - // and call the callback-returned function on this state. If this call - // returns the data string (rather than nullopt), then update the service - // state with this data and persist the change. Repeat all the above steps - // on the recoverable database failures (deadlocks, etc). + // and, if present, call the callback-returned function on this state. If + // this call returns the data string (rather than nullopt), then update + // the service state with this data and persist the change. Repeat all the + // above steps on the recoverable database failures (deadlocks, etc). // optional<string> update_tenant_service_state ( const odb::core::connection_ptr&, - const string& tid, - const function<optional<string> (const tenant_service&)>&); + const string& type, + const string& id, + const function<optional<string> (const string& tenant_id, + const tenant_service&)>&); protected: size_t retry_ = 0; // Max of all retries. diff --git a/mod/hmac.cxx b/mod/hmac.cxx new file mode 100644 index 0000000..cfb0e23 --- /dev/null +++ b/mod/hmac.cxx @@ -0,0 +1,100 @@ +#include <mod/hmac.hxx> + +#include <libbutl/openssl.hxx> + +using namespace std; +using namespace butl; + +string brep:: +compute_hmac (const options::openssl_options& o, + const void* m, size_t l, + const char* k) +{ + try + { + fdpipe errp (fdopen_pipe ()); // stderr pipe. + + // To compute an HMAC over stdin with the key <secret>: + // + // openssl dgst -sha256 -hmac <secret> + // + // Note that since openssl 3.0 the `mac` command is the preferred method + // for generating HMACs. For future reference, the equivalent command + // would be: + // + // openssl mac -digest SHA256 -macopt "key:<secret>" HMAC + // + // Note that here we assume both output and diagnostics will fit into pipe + // buffers and don't poll both with fdselect(). + // + openssl os (path ("-"), // Read message from openssl::out. + path ("-"), // Write output to openssl::in. + process::pipe (errp.in.get (), move (errp.out)), + process_env (o.openssl (), o.openssl_envvar ()), + "dgst", o.openssl_option (), + "-sha256", + "-hmac", k); + + ifdstream err (move (errp.in)); + + string h; // The HMAC value. + try + { + // In case of an exception, skip and close input after output. + // + // Note: re-open in/out so that they get automatically closed on + // an exception. + // + ifdstream in (os.in.release (), fdstream_mode::skip); + ofdstream out (os.out.release ()); + + // Write the message to openssl's input. + // + out.write (static_cast<const char*> (m), l); + out.close (); + + // Read the HMAC value from openssl's output. + // + h = in.read_text (); + in.close (); + } + catch (const io_error& e) + { + // If the process exits with non-zero status, assume the IO error is due + // to that and fall through. + // + if (os.wait ()) + { + throw_generic_error ( + e.code ().value (), + (string ("unable to read/write openssl stdout/stdin: ") + + e.what ()).c_str ()); + } + } + + if (!os.wait ()) + { + string et (err.read_text ()); + throw_generic_error (EINVAL, + ("non-zero openssl exit status: " + et).c_str ()); + } + + err.close (); + + return h; + } + catch (const process_error& e) + { + throw_generic_error ( + e.code ().value (), + (string ("unable to execute openssl: ") + e.what ()).c_str ()); + } + catch (const io_error& e) + { + // Unable to read diagnostics from stderr. + // + throw_generic_error ( + e.code ().value (), + (string ("unable to read openssl stderr : ") + e.what ()).c_str ()); + } +} diff --git a/mod/hmac.hxx b/mod/hmac.hxx new file mode 100644 index 0000000..586d0e8 --- /dev/null +++ b/mod/hmac.hxx @@ -0,0 +1,29 @@ +#ifndef MOD_HMAC_HXX +#define MOD_HMAC_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <mod/module-options.hxx> + +namespace brep +{ + // Compute the HMAC-SHA256 message authentication code over a message using + // the given key (alpha-numeric string, not encoded). + // + // Return the HMAC value or throw std::system_error in case of an error. + // + // Example output: + // + // 5e822587094c68e646db8b916da1db2056d92f1dea4252136a533b4147a30cb7 + // + // Note that although any cryptographic hash function can be used to compute + // an HMAC, this implementation supports only SHA-256. + // + string + compute_hmac (const options::openssl_options&, + const void* message, size_t len, + const char* key); +} + +#endif diff --git a/mod/jwt.cxx b/mod/jwt.cxx new file mode 100644 index 0000000..4e28630 --- /dev/null +++ b/mod/jwt.cxx @@ -0,0 +1,189 @@ +#include <mod/jwt.hxx> + +#include <libbutl/base64.hxx> +#include <libbutl/openssl.hxx> +#include <libbutl/json/serializer.hxx> + +using namespace std; +using namespace butl; + +// Note that only GitHub's requirements are implemented, not the entire JWT +// spec. The following elements are currently supported: +// +// - The RS256 message authentication code algorithm (RSA with SHA256) +// - The `typ` and `alg` header fields +// - The `iat`, `exp`, and `iss` claims +// +// A JWT consists of a message and its signature. +// +// The message consists of a base64url-encoded JSON header and payload (set of +// claims). The signature is calculated over the message and then also +// base64url-encoded. +// +// base64url is base64 with a slightly different alphabet and optional padding +// to make it URL and filesystem safe. See base64.hxx for details. +// +// Header: +// +// { +// "typ": "JWT", +// "alg": "RS256" +// } +// +// Payload: +// +// { +// "iat": 1234567, +// "exp": 1234577, +// "iss": "MyName" +// } +// +// Where: +// iat := Issued At (NumericDate: seconds since 1970-01-01T00:00:00Z UTC) +// exp := Expiration Time (NumericDate) +// iss := Issuer +// +// Signature: +// +// RSA_SHA256(PKEY, base64url($header) + '.' + base64url($payload)) +// +// JWT: +// +// base64url($header) + '.' + base64url($payload) + '.' + base64url($signature) +// +string brep:: +generate_jwt (const options::openssl_options& o, + const path& pk, + const string& iss, + const chrono::seconds& vp, + const chrono::seconds& bd) +{ + // Create the header. + // + string h; // Header (base64url-encoded). + { + vector<char> b; + json::buffer_serializer s (b, 0 /* indentation */); + + s.begin_object (); + s.member ("typ", "JWT"); + s.member ("alg", "RS256"); // RSA with SHA256. + s.end_object (); + + h = base64url_encode (b); + } + + // Create the payload. + // + string p; // Payload (base64url-encoded). + { + using namespace std::chrono; + + // "Issued at" time. + // + seconds iat (duration_cast<seconds> ( + system_clock::now ().time_since_epoch () - bd)); + + // Expiration time. + // + seconds exp (iat + vp); + + vector<char> b; + json::buffer_serializer s (b, 0 /* indentation */); + + s.begin_object (); + s.member ("iss", iss); + s.member ("iat", iat.count ()); + s.member ("exp", exp.count ()); + s.end_object (); + + p = base64url_encode (b); + } + + // Create the signature. + // + string s; // Signature (base64url-encoded). + try + { + // Sign the concatenated header and payload using openssl. + // + // openssl dgst -sha256 -sign <pkey> file... + // + // Note that RSA is indicated by the contents of the private key. + // + // Note that here we assume both output and diagnostics will fit into pipe + // buffers and don't poll both with fdselect(). + // + fdpipe errp (fdopen_pipe ()); // stderr pipe. + + openssl os (path ("-"), // Read message from openssl::out. + path ("-"), // Write output to openssl::in. + process::pipe (errp.in.get (), move (errp.out)), + process_env (o.openssl (), o.openssl_envvar ()), + "dgst", o.openssl_option (), "-sha256", "-sign", pk); + + ifdstream err (move (errp.in)); + + vector<char> bs; // Binary signature (openssl output). + try + { + // In case of exception, skip and close input after output. + // + // Note: re-open in/out so that they get automatically closed on + // exception. + // + ifdstream in (os.in.release (), fdstream_mode::skip); + ofdstream out (os.out.release ()); + + // Write the concatenated header and payload to openssl's input. + // + out << h << '.' << p; + out.close (); + + // Read the binary signature from openssl's output. + // + bs = in.read_binary (); + in.close (); + } + catch (const io_error& e) + { + // If the process exits with non-zero status, assume the IO error is due + // to that and fall through. + // + if (os.wait ()) + { + throw_generic_error ( + e.code ().value (), + (string ("unable to read/write openssl stdout/stdin: ") + + e.what ()).c_str ()); + } + } + + if (!os.wait ()) + { + string et (err.read_text ()); + throw_generic_error (EINVAL, + ("non-zero openssl exit status: " + et).c_str ()); + } + + err.close (); + + s = base64url_encode (bs); + } + catch (const process_error& e) + { + throw_generic_error ( + e.code ().value (), + (string ("unable to execute openssl: ") + e.what ()).c_str ()); + } + catch (const io_error& e) + { + // Unable to read diagnostics from stderr. + // + throw_generic_error ( + e.code ().value (), + (string ("unable to read openssl stderr : ") + e.what ()).c_str ()); + } + + return h + '.' + p + '.' + s; // Return the token. +} diff --git a/mod/jwt.hxx b/mod/jwt.hxx new file mode 100644 index 0000000..b0df714 --- /dev/null +++ b/mod/jwt.hxx @@ -0,0 +1,37 @@ +#ifndef MOD_JWT_HXX +#define MOD_JWT_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <mod/module-options.hxx> + +#include <chrono> + +namespace brep +{ + // Generate a JSON Web Token (JWT), defined in RFC7519. + // + // A JWT is essentially the token issuer's name along with a number of + // claims, signed with a private key. + // + // Note that only GitHub's requirements are implemented, not the entire JWT + // spec; see the source file for details. + // + // The token expires when the validity period has elapsed. + // + // The backdate argument specifies the number of seconds to subtract from + // the "issued at" time in order to combat potential clock drift (which can + // cause the token to be not valid yet). + // + // Return the token or throw std::system_error in case of an error. + // + string + generate_jwt (const options::openssl_options&, + const path& private_key, + const string& issuer, + const std::chrono::seconds& validity_period, + const std::chrono::seconds& backdate = std::chrono::seconds (60)); +} + +#endif diff --git a/mod/mod-build-configs.cxx b/mod/mod-build-configs.cxx index ce79edb..2754f95 100644 --- a/mod/mod-build-configs.cxx +++ b/mod/mod-build-configs.cxx @@ -34,10 +34,12 @@ init (scanner& s) s, unknown_mode::fail, unknown_mode::fail); if (options_->build_config_specified ()) + { build_config_module::init (*options_); - if (options_->root ().empty ()) - options_->root (dir_path ("/")); + if (options_->root ().empty ()) + options_->root (dir_path ("/")); + } } bool brep::build_configs:: diff --git a/mod/mod-build-force.cxx b/mod/mod-build-force.cxx index ea921e9..146acd9 100644 --- a/mod/mod-build-force.cxx +++ b/mod/mod-build-force.cxx @@ -198,6 +198,9 @@ handle (request& rq, response& rs) // connection_ptr conn (build_db_->connection ()); + // NOTE: don't forget to update ci_start::rebuild() if changing anything + // here. + // { transaction t (conn->begin ()); @@ -206,8 +209,11 @@ handle (request& rq, response& rs) if (!build_db_->query_one<package_build> ( query<package_build>::build::id == id, pb) || + pb.archived || (b = move (pb.build))->state == build_state::queued) + { config_expired ("no package build"); + } force_state force (b->state == build_state::built ? force_state::forced @@ -255,8 +261,10 @@ handle (request& rq, response& rs) // If we ought to call the // tenant_service_build_queued::build_queued() callback, then also // set the package tenant's queued timestamp to the current time - // to prevent the notifications race (see tenant::queued_timestamp - // for details). + // to prevent the task handler from picking the build and + // potentially interfering with us by sending its `building` + // notification before we send our `queued` notification (see + // tenant::queued_timestamp for details). // if (tsq != nullptr) { @@ -308,14 +316,15 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, build_state::building, qhs, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, qbs.back ().tenant, f); + update_tenant_service_state (conn, ss.type, ss.id, f); } } diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx index 3ba18e1..666e7ef 100644 --- a/mod/mod-build-result.cxx +++ b/mod/mod-build-result.cxx @@ -248,16 +248,27 @@ handle (request& rq, response&) } else if (authenticate_session (*options_, rqm.challenge, *b, rqm.session)) { + // If the build is not in the `forcing` state, then retrieve the tenant + // service callback, if present, for subsequent notification (`queued` + // for the interrupted build and `built` otherwise; see below). Note + // that for the `forcing` state the service already assumes the `queued` + // state (see build_force::handle() and ci_start::rebuild() for + // details). + // const tenant_service_base* ts (nullptr); + shared_ptr<build_tenant> t; - shared_ptr<build_tenant> t (build_db_->load<build_tenant> (b->tenant)); - - if (t->service) + if (b->force != force_state::forcing) { - auto i (tenant_service_map_.find (t->service->type)); + t = build_db_->load<build_tenant> (b->tenant); - if (i != tenant_service_map_.end ()) - ts = i->second.get (); + if (t->service) + { + auto i (tenant_service_map_.find (t->service->type)); + + if (i != tenant_service_map_.end ()) + ts = i->second.get (); + } } // If the build is interrupted, then revert it to the original built @@ -343,11 +354,15 @@ handle (request& rq, response&) // If we ought to call the tenant_service_build_queued::build_queued() // callback, then also set the package tenant's queued timestamp to - // the current time to prevent the notifications race (see + // the current time to prevent the task handler from picking the build + // and potentially interfering with us by sending its `building` + // notification before we send our `queued` notification (see // tenant::queued_timestamp for details). // if (tsq != nullptr) { + assert (t != nullptr); + // Calculate the tenant service hints. // buildable_package_count tpc ( @@ -498,7 +513,11 @@ handle (request& rq, response&) // If required, stash the service notification information. // if (tsb != nullptr || tsq != nullptr) + { + assert (t != nullptr); + tss = make_pair (move (*t->service), move (b)); + } } t.commit (); @@ -528,14 +547,15 @@ handle (request& rq, response&) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, build_state::building, qhs, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, qbs.back ().tenant, f); + update_tenant_service_state (conn, ss.type, ss.id, f); } } @@ -547,7 +567,7 @@ handle (request& rq, response&) { assert (tss); // Wouldn't be here otherwise. - const tenant_service& ss (tss->first); + tenant_service& ss (tss->first); const build& b (*tss->second); // Release the database connection since build_built() notification can @@ -555,10 +575,36 @@ handle (request& rq, response&) // conn.reset (); - if (auto f = tsb->build_built (ss, b, log_writer_)) + if (auto f = tsb->build_built (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, b.tenant, f); + + bool build_completed (false); + + if (optional<string> data = + update_tenant_service_state ( + conn, ss.type, ss.id, + [&f, &build_completed] (const string& tid, + const tenant_service& ts) + { + auto r (f (tid, ts)); + build_completed = r.second; + return move (r.first); + })) + { + ss.data = move (data); + } + + if (build_completed) + { + // Release the database connection since the build_completed() + // notification can potentially be time-consuming (e.g., it may + // perform an HTTP request). + // + conn.reset (); + + tsb->build_completed (b.tenant, ss, log_writer_); + } } } diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index 917017b..e769f6a 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -203,6 +203,41 @@ package_query (bool custom_bot, query::build_repository::id.canonical_name.in_range (rp.begin (), rp.end ()); + // Filter by the types of services associated with the tenants, where the + // empty type denotes tenants without associated service. + // + if (params.tenant_service_type_specified ()) + { + cstrings ts; + bool et (false); + + for (const string& t: params.tenant_service_type ()) + { + if (!t.empty ()) + ts.push_back (t.c_str ()); + else + et = true; + } + + if (!ts.empty () && et) + { + q = q && + (query::build_tenant::service.type.in_range (ts.begin (), ts.end ()) || + query::build_tenant::service.type.is_null ()); + } + else if (!ts.empty ()) + { + q = q && query::build_tenant::service.type.in_range (ts.begin (), + ts.end ()); + } + else + { + assert (et); + + q = q && query::build_tenant::service.type.is_null (); + } + } + // If the interactive mode is false or true, then filter out the respective // packages. // @@ -464,10 +499,14 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsu->build_unloaded (move (*t->service), log_writer_)) + tenant_service& ts (*t->service); + string type (ts.type); + string id (ts.id); + + if (auto f = tsu->build_unloaded (t->id, move (ts), log_writer_)) { conn = build_db_->connection (); - update_tenant_service_state (conn, t->id, f); + update_tenant_service_state (conn, type, id, f); } } } @@ -950,8 +989,8 @@ handle (request& rq, response& rs) pq += "ORDER BY"; - // If the interactive mode is both, then order the packages so that ones - // from the interactive build tenants appear first. + // If the interactive mode is `both`, then order the packages so that + // ones from the interactive build tenants appear first. // if (imode == interactive_mode::both) pq += pkg_query::build_tenant::interactive + "NULLS LAST,"; @@ -1618,9 +1657,8 @@ handle (request& rq, response& rs) // Collect the potential build configurations as all combinations // of the tenant's packages build configurations and the - // non-excluded (by the packages) build target - // configurations. Note that here we ignore the machines from the - // task request. + // non-excluded (by the packages) build target configurations. + // Note that here we ignore the machines from the task request. // struct build_config { @@ -1761,9 +1799,9 @@ handle (request& rq, response& rs) pkg_config = pc.name; - // Iterate through the built configurations and erase them from the - // build configuration map. All those configurations that remained - // can be built. We will take the first one, if present. + // Iterate through the built configurations and erase them from + // the build configuration map. All those configurations that + // remained can be built. We will take the first one, if present. // // Also save the built configurations for which it's time to be // rebuilt. @@ -1966,10 +2004,21 @@ handle (request& rq, response& rs) qbs = queue_builds (*p, *b); // If we ought to call the - // tenant_service_build_queued::build_queued() callback, - // then also set the package tenant's queued timestamp - // to the current time to prevent the notifications race - // (see tenant::queued_timestamp for details). + // tenant_service_build_queued::build_queued() callback + // for the queued builds (qbs is not empty), then we + // also set the package tenant's queued timestamp to the + // current time to prevent any concurrently running task + // handlers from picking any of these queued builds now + // and so potentially interfering with us by sending + // their `building` notification before we send our + // `queued` notification (see tenant::queued_timestamp + // for details). Note that the `queued` notification for + // the being built configuration doesn't require setting + // this timestamp, since after the respective build + // object is changed and updated in the database it may + // not be picked by any task handler in the foreseeable + // future and so our {`queued`, `building`} notification + // sequence may not be interfered. // if (!qbs.empty () || !initial_state || @@ -1978,8 +2027,11 @@ handle (request& rq, response& rs) { qhs = queue_hints (*p); - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + if (!qbs.empty ()) + { + t->queued_timestamp = system_clock::now (); + build_db_->update (t); + } } } @@ -2222,17 +2274,20 @@ handle (request& rq, response& rs) // If we ought to call the // tenant_service_build_queued::build_queued() - // callback, then also set the package tenant's queued - // timestamp to the current time to prevent the - // notifications race (see tenant::queued_timestamp - // for details). + // callback for the queued builds, then also set the + // package tenant's queued timestamp to the current + // time to prevent the notifications race (see above + // building from scratch for details). // if (!qbs.empty () || !rebuild_interrupted_rebuild) { qhs = queue_hints (*p); - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + if (!qbs.empty ()) + { + t->queued_timestamp = system_clock::now (); + build_db_->update (t); + } } } @@ -2272,6 +2327,13 @@ handle (request& rq, response& rs) agent_fp = move (b->agent_fingerprint); challenge = move (b->agent_challenge); task_response = task_response_manifest (); + + // Also cleanup the tenant-associated third-party service data. + // + tsb = nullptr; + tsq = nullptr; + tss = nullopt; + qbs.clear (); } // If the task manifest is prepared, then bail out from the package @@ -2315,7 +2377,8 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, nullopt /* initial_state */, qhs, @@ -2324,7 +2387,7 @@ handle (request& rq, response& rs) conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, qbs.back ().tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2332,6 +2395,10 @@ handle (request& rq, response& rs) // Send the `queued` notification for the task build, unless it is // already sent, and update the service state, if requested. // + // Why don't we join this notification with the previous one? We + // cannot do this since the initial_state argument differs for the + // respective build_queued() function calls. + // if (initial_state && *initial_state != build_state::queued && !rebuild_interrupted_rebuild && @@ -2347,7 +2414,8 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsq->build_queued (ss, + if (auto f = tsq->build_queued (qbs.back ().tenant, + ss, qbs, initial_state, qhs, @@ -2356,7 +2424,7 @@ handle (request& rq, response& rs) conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, qbs.back ().tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2366,7 +2434,7 @@ handle (request& rq, response& rs) } // If a third-party service needs to be notified about the package - // build, then call the tenant_service_build_built::build_building() + // build, then call the tenant_service_build_building::build_building() // callback function and, if requested, update the tenant-associated // service state. // @@ -2383,12 +2451,12 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsb->build_building (ss, b, log_writer_)) + if (auto f = tsb->build_building (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); if (optional<string> data = - update_tenant_service_state (conn, b.tenant, f)) + update_tenant_service_state (conn, ss.type, ss.id, f)) ss.data = move (data); } } @@ -2511,13 +2579,36 @@ handle (request& rq, response& rs) // conn.reset (); - if (auto f = tsb->build_built (ss, b, log_writer_)) + if (auto f = tsb->build_built (b.tenant, ss, b, log_writer_)) { conn = build_db_->connection (); + bool build_completed (false); + if (optional<string> data = - update_tenant_service_state (conn, b.tenant, f)) + update_tenant_service_state ( + conn, ss.type, ss.id, + [&f, &build_completed] (const string& tid, + const tenant_service& ts) + { + auto r (f (tid, ts)); + build_completed = r.second; + return move (r.first); + })) + { ss.data = move (data); + } + + if (build_completed) + { + // Release the database connection since the build_completed() + // notification can potentially be time-consuming (e.g., it may + // perform an HTTP request). + // + conn.reset (); + + tsb->build_completed (b.tenant, ss, log_writer_); + } } } } diff --git a/mod/mod-builds.cxx b/mod/mod-builds.cxx index 0155c2e..8fb6e1b 100644 --- a/mod/mod-builds.cxx +++ b/mod/mod-builds.cxx @@ -58,10 +58,10 @@ init (scanner& s) { database_module::init (*options_, options_->build_db_retry ()); build_config_module::init (*options_); - } - if (options_->root ().empty ()) - options_->root (dir_path ("/")); + if (options_->root ().empty ()) + options_->root (dir_path ("/")); + } } template <typename T, typename C> @@ -172,7 +172,8 @@ build_query (const brep::vector<brep::build_target_config_id>* config_ids, { if (rs == "pending") { - q = q && qb::force != "unforced"; + q = q && ((qb::state == "built" && qb::force =="forced") || + (qb::state == "building" && qb::force =="forcing")); } else if (rs == "building") { diff --git a/mod/mod-ci-github-gh.cxx b/mod/mod-ci-github-gh.cxx new file mode 100644 index 0000000..42afe1b --- /dev/null +++ b/mod/mod-ci-github-gh.cxx @@ -0,0 +1,799 @@ +// file : mod/mod-ci-github-gh.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci-github-gh.hxx> + +#include <libbutl/json/parser.hxx> + +namespace brep +{ + [[noreturn]] static void + throw_json (const json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + + // Return the GitHub check run status corresponding to a build_state. + // + string + gh_to_status (build_state st) + { + // Just return by value (small string optimization). + // + switch (st) + { + case build_state::queued: return "QUEUED"; + case build_state::building: return "IN_PROGRESS"; + case build_state::built: return "COMPLETED"; + } + + return ""; // Should never reach. + } + + // Return the build_state corresponding to a GitHub check run status + // string. Throw invalid_argument if the passed status was invalid. + // + build_state + gh_from_status (const string& s) + { + if (s == "QUEUED") return build_state::queued; + else if (s == "IN_PROGRESS") return build_state::building; + else if (s == "COMPLETED") return build_state::built; + else + throw invalid_argument ("unexpected GitHub check run status: '" + s + + '\''); + } + + string + gh_to_conclusion (result_status rs, bool warning_success) + { + switch (rs) + { + case result_status::success: + return "SUCCESS"; + + case result_status::warning: + return warning_success ? "SUCCESS" : "FAILURE"; + + case result_status::error: + case result_status::abort: + case result_status::abnormal: + return "FAILURE"; + + // Valid values we should never encounter. + // + case result_status::skip: + case result_status::interrupt: + throw invalid_argument ("unexpected result_status value: " + + to_string (rs)); + } + + return ""; // Should never reach. + } + + string + gh_check_run_name (const build& b, const build_queued_hints* bh) + { + string r; + + if (bh == nullptr || !bh->single_package_version) + { + r += b.package_name.string (); + r += '/'; + r += b.package_version.string (); + r += '/'; + } + + r += b.target_config_name; + r += '/'; + r += b.target.string (); + r += '/'; + + if (bh == nullptr || !bh->single_package_config) + { + r += b.package_config_name; + r += '/'; + } + + r += b.toolchain_name; + r += '-'; + r += b.toolchain_version.string (); + + return r; + } + + // Throw invalid_json_input when a required member `m` is missing from a + // JSON object `o`. + // + [[noreturn]] static void + missing_member (const json::parser& p, const char* o, const char* m) + { + throw_json (p, o + string (" object is missing member '") + m + '\''); + } + + using event = json::event; + + // gh_check_suite + // + gh_check_suite:: + gh_check_suite (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), hb (false), hs (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (hb, "head_branch")) + { + string* v (p.next_expect_string_null ()); + if (v != nullptr) + head_branch = *v; + } + else if (c (hs, "head_sha")) head_sha = p.next_expect_string (); + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_check_suite", "node_id"); + if (!hb) missing_member (p, "gh_check_suite", "head_branch"); + if (!hs) missing_member (p, "gh_check_suite", "head_sha"); + } + + ostream& + operator<< (ostream& os, const gh_check_suite& cs) + { + os << "node_id: " << cs.node_id + << ", head_branch: " << (cs.head_branch ? *cs.head_branch : "null") + << ", head_sha: " << cs.head_sha; + + return os; + } + + // gh_check_suite_ex + // + gh_check_suite_ex:: + gh_check_suite_ex (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), hb (false), hs (false), cc (false), co (false), + ap (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (hb, "head_branch")) + { + string* v (p.next_expect_string_null ()); + if (v != nullptr) + head_branch = *v; + } + else if (c (hs, "head_sha")) head_sha = p.next_expect_string (); + else if (c (cc, "latest_check_runs_count")) + check_runs_count = p.next_expect_number <size_t> (); + else if (c (co, "conclusion")) + { + string* v (p.next_expect_string_null ()); + if (v != nullptr) + conclusion = *v; + } + else if (c (ap, "app")) + { + p.next_expect (event::begin_object); + + bool ai (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + if (c (ai, "id")) + { + // Note: unlike the check_run webhook's app.id, the check_suite + // one can be null. It's unclear under what circumstances, but it + // shouldn't happen unless something is broken. + // + optional<uint64_t> v (p.next_expect_number_null<uint64_t> ()); + + if (!v) + throw_json (p, "check_suite.app.id is null"); + + app_id = *v; + } + else p.next_expect_value_skip (); + } + + if (!ai) missing_member (p, "gh_check_suite_ex.app", "id"); + } + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_check_suite_ex", "node_id"); + if (!hb) missing_member (p, "gh_check_suite_ex", "head_branch"); + if (!hs) missing_member (p, "gh_check_suite_ex", "head_sha"); + if (!cc) missing_member (p, "gh_check_suite_ex", "latest_check_runs_count"); + if (!co) missing_member (p, "gh_check_suite_ex", "conclusion"); + if (!ap) missing_member (p, "gh_check_suite_ex", "app"); + } + + ostream& + operator<< (ostream& os, const gh_check_suite_ex& cs) + { + os << "node_id: " << cs.node_id + << ", head_branch: " << (cs.head_branch ? *cs.head_branch : "null") + << ", head_sha: " << cs.head_sha + << ", latest_check_runs_count: " << cs.check_runs_count + << ", conclusion: " << (cs.conclusion ? *cs.conclusion : "null") + << ", app_id: " << cs.app_id; + + return os; + } + + // gh_check_run + // + gh_check_run:: + gh_check_run (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), nm (false), st (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (nm, "name")) name = p.next_expect_string (); + else if (c (st, "status")) status = p.next_expect_string (); + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_check_run", "node_id"); + if (!nm) missing_member (p, "gh_check_run", "name"); + if (!st) missing_member (p, "gh_check_run", "status"); + } + + // gh_check_run_ex + // + gh_check_run_ex:: + gh_check_run_ex (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), nm (false), st (false), du (false), cs (false), + ap (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (nm, "name")) name = p.next_expect_string (); + else if (c (st, "status")) status = p.next_expect_string (); + else if (c (du, "details_url")) details_url = p.next_expect_string (); + else if (c (cs, "check_suite")) check_suite = gh_check_suite (p); + else if (c (ap, "app")) + { + p.next_expect (event::begin_object); + + bool ai (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + if (c (ai, "id")) app_id = p.next_expect_number<uint64_t> (); + else p.next_expect_value_skip (); + } + + if (!ai) missing_member (p, "gh_check_run_ex.app", "id"); + } + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_check_run_ex", "node_id"); + if (!nm) missing_member (p, "gh_check_run_ex", "name"); + if (!st) missing_member (p, "gh_check_run_ex", "status"); + if (!du) missing_member (p, "gh_check_run_ex", "details_url"); + if (!cs) missing_member (p, "gh_check_run_ex", "check_suite"); + if (!ap) missing_member (p, "gh_check_run_ex", "app"); + } + + + ostream& + operator<< (ostream& os, const gh_check_run& cr) + { + os << "node_id: " << cr.node_id + << ", name: " << cr.name + << ", status: " << cr.status; + + return os; + } + + ostream& + operator<< (ostream& os, const gh_check_run_ex& cr) + { + os << static_cast<const gh_check_run&> (cr) + << ", details_url: " << cr.details_url + << ", check_suite: { " << cr.check_suite << " }" + << ", app_id: " << cr.app_id; + + return os; + } + + gh_pull_request:: + gh_pull_request (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), nu (false), bs (false), hd (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (nu, "number")) number = p.next_expect_number<unsigned int> (); + else if (c (bs, "base")) + { + p.next_expect (event::begin_object); + + bool r (false), s (false), rp (false), fn (false); + + while (p.next_expect (event::name, event::end_object)) + { + if (c (r, "ref")) base_ref = p.next_expect_string (); + else if (c (s, "sha")) base_sha = p.next_expect_string (); + else if (c (rp, "repo")) + { + p.next_expect (event::begin_object); + + while (p.next_expect (event::name, event::end_object)) + { + if (c (fn, "full_name")) + base_path = p.next_expect_string (); + else + p.next_expect_value_skip (); + } + } + else p.next_expect_value_skip (); + } + + if (!r) missing_member (p, "gh_pull_request.base", "ref"); + if (!s) missing_member (p, "gh_pull_request.base", "sha"); + if (!rp) missing_member (p, "gh_pull_request.base", "repo"); + if (!fn) missing_member (p, "gh_pull_request.base.repo", "full_name"); + } + else if (c (hd, "head")) + { + p.next_expect (event::begin_object); + + bool r (false), s (false), rp (false), fn (false); + + while (p.next_expect (event::name, event::end_object)) + { + if (c (r, "ref")) head_ref = p.next_expect_string (); + else if (c (s, "sha")) head_sha = p.next_expect_string (); + else if (c (rp, "repo")) + { + p.next_expect (event::begin_object); + + while (p.next_expect (event::name, event::end_object)) + { + if (c (fn, "full_name")) + head_path = p.next_expect_string (); + else + p.next_expect_value_skip (); + } + } + else p.next_expect_value_skip (); + } + + if (!r) missing_member (p, "gh_pull_request.head", "ref"); + if (!s) missing_member (p, "gh_pull_request.head", "sha"); + if (!rp) missing_member (p, "gh_pull_request.head", "repo"); + if (!fn) missing_member (p, "gh_pull_request.head.repo", "full_name"); + } + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_pull_request", "node_id"); + if (!nu) missing_member (p, "gh_pull_request", "number"); + if (!bs) missing_member (p, "gh_pull_request", "base"); + if (!hd) missing_member (p, "gh_pull_request", "head"); + } + + ostream& + operator<< (ostream& os, const gh_pull_request& pr) + { + os << "node_id: " << pr.node_id + << ", number: " << pr.number + << ", base: { " + << "path: " << pr.base_path + << ", ref: " << pr.base_ref + << ", sha: " << pr.base_sha + << " }" + << ", head: { " + << "path: " << pr.head_path + << ", ref: " << pr.head_ref + << ", sha: " << pr.head_sha + << " }" + << ", app_id: " << pr.app_id; + + return os; + } + + // gh_repository + // + gh_repository:: + gh_repository (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ni (false), fn (false), cu (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ni, "node_id")) node_id = p.next_expect_string (); + else if (c (fn, "full_name")) path = p.next_expect_string (); + else if (c (cu, "clone_url")) clone_url = p.next_expect_string (); + else p.next_expect_value_skip (); + } + + if (!ni) missing_member (p, "gh_repository", "node_id"); + if (!fn) missing_member (p, "gh_repository", "full_name"); + if (!cu) missing_member (p, "gh_repository", "clone_url"); + } + + ostream& + operator<< (ostream& os, const gh_repository& rep) + { + os << "node_id: " << rep.node_id + << ", path: " << rep.path + << ", clone_url: " << rep.clone_url; + + return os; + } + + // gh_installation + // + gh_installation:: + gh_installation (json::parser& p) + { + p.next_expect (event::begin_object); + + bool i (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (i, "id")) id = p.next_expect_number (); + else p.next_expect_value_skip (); + } + + if (!i) missing_member (p, "gh_installation", "id"); + } + + ostream& + operator<< (ostream& os, const gh_installation& i) + { + os << "id: " << i.id; + + return os; + } + + // gh_check_suite_event + // + gh_check_suite_event:: + gh_check_suite_event (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ac (false), cs (false), rp (false), in (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ac, "action")) action = p.next_expect_string (); + else if (c (cs, "check_suite")) check_suite = gh_check_suite_ex (p); + else if (c (rp, "repository")) repository = gh_repository (p); + else if (c (in, "installation")) installation = gh_installation (p); + else p.next_expect_value_skip (); + } + + if (!ac) missing_member (p, "gh_check_suite_event", "action"); + if (!cs) missing_member (p, "gh_check_suite_event", "check_suite"); + if (!rp) missing_member (p, "gh_check_suite_event", "repository"); + if (!in) missing_member (p, "gh_check_suite_event", "installation"); + } + + ostream& + operator<< (ostream& os, const gh_check_suite_event& cs) + { + os << "action: " << cs.action; + os << ", check_suite { " << cs.check_suite << " }"; + os << ", repository { " << cs.repository << " }"; + os << ", installation { " << cs.installation << " }"; + + return os; + } + + // gh_check_run_event + // + gh_check_run_event:: + gh_check_run_event (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ac (false), cs (false), rp (false), in (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ac, "action")) action = p.next_expect_string (); + else if (c (cs, "check_run")) check_run = gh_check_run_ex (p); + else if (c (rp, "repository")) repository = gh_repository (p); + else if (c (in, "installation")) installation = gh_installation (p); + else p.next_expect_value_skip (); + } + + if (!ac) missing_member (p, "gh_check_run_event", "action"); + if (!cs) missing_member (p, "gh_check_run_event", "check_run"); + if (!rp) missing_member (p, "gh_check_run_event", "repository"); + if (!in) missing_member (p, "gh_check_run_event", "installation"); + } + + ostream& + operator<< (ostream& os, const gh_check_run_event& cr) + { + os << "action: " << cr.action; + os << ", check_run { " << cr.check_run << " }"; + os << ", repository { " << cr.repository << " }"; + os << ", installation { " << cr.installation << " }"; + + return os; + } + + // gh_pull_request_event + // + gh_pull_request_event:: + gh_pull_request_event (json::parser& p) + { + p.next_expect (event::begin_object); + + bool ac (false), pr (false), bf (false), rp (false), in (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (ac, "action")) action = p.next_expect_string (); + else if (c (pr, "pull_request")) pull_request = gh_pull_request (p); + else if (c (bf, "before")) before = p.next_expect_string (); + else if (c (rp, "repository")) repository = gh_repository (p); + else if (c (in, "installation")) installation = gh_installation (p); + else p.next_expect_value_skip (); + } + + if (!ac) missing_member (p, "gh_pull_request_event", "action"); + if (!pr) missing_member (p, "gh_pull_request_event", "pull_request"); + if (!rp) missing_member (p, "gh_pull_request_event", "repository"); + if (!in) missing_member (p, "gh_pull_request_event", "installation"); + } + + ostream& + operator<< (ostream& os, const gh_pull_request_event& pr) + { + os << "action: " << pr.action; + os << ", pull_request { " << pr.pull_request << " }"; + os << ", before: " << (pr.before ? *pr.before : "null"); + os << ", repository { " << pr.repository << " }"; + os << ", installation { " << pr.installation << " }"; + + return os; + } + + // gh_push_event + // + gh_push_event:: + gh_push_event (json::parser& p) + { + p.next_expect (event::begin_object); + + bool rf (false), bf (false), af (false), fd (false), dl (false), + rp (false), in (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (rf, "ref")) ref = p.next_expect_string (); + else if (c (bf, "before")) before = p.next_expect_string (); + else if (c (af, "after")) after = p.next_expect_string (); + else if (c (fd, "forced")) forced = p.next_expect_boolean<bool> (); + else if (c (dl, "deleted")) deleted = p.next_expect_boolean<bool> (); + else if (c (rp, "repository")) repository = gh_repository (p); + else if (c (in, "installation")) installation = gh_installation (p); + else p.next_expect_value_skip (); + } + + if (!rf) missing_member (p, "gh_push_event", "ref"); + if (!bf) missing_member (p, "gh_push_event", "before"); + if (!af) missing_member (p, "gh_push_event", "after"); + if (!fd) missing_member (p, "gh_push_event", "forced"); + if (!dl) missing_member (p, "gh_push_event", "deleted"); + if (!rp) missing_member (p, "gh_push_event", "repository"); + if (!in) missing_member (p, "gh_push_event", "installation"); + } + + ostream& + operator<< (ostream& os, const gh_push_event& p) + { + os << "ref: " << p.ref + << ", before: " << p.before + << ", after: " << p.after + << ", forced: " << p.forced + << ", deleted: " << p.deleted + << ", repository { " << p.repository << " }" + << ", installation { " << p.installation << " }"; + + return os; + } + + // gh_installation_access_token + // + // Example JSON: + // + // { + // "token": "ghs_Py7TPcsmsITeVCAWeVtD8RQs8eSos71O5Nzp", + // "expires_at": "2024-02-15T16:16:38Z", + // ... + // } + // + gh_installation_access_token:: + gh_installation_access_token (json::parser& p) + { + p.next_expect (event::begin_object); + + bool tk (false), ea (false); + + // Skip unknown/uninteresting members. + // + while (p.next_expect (event::name, event::end_object)) + { + auto c = [&p] (bool& v, const char* s) + { + return p.name () == s ? (v = true) : false; + }; + + if (c (tk, "token")) token = p.next_expect_string (); + else if (c (ea, "expires_at")) + { + string v (p.next_expect_string ()); + + try + { + expires_at = gh_from_iso8601 (v); + } + catch (const invalid_argument& e) + { + throw_json (p, + "invalid IAT expires_at value '" + v + + "': " + e.what ()); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw_json (p, + "unable to convert IAT expires_at value '" + v + + "': " + e.what ()); + } + } + else p.next_expect_value_skip (); + } + + if (!tk) missing_member (p, "gh_installation_access_token", "token"); + if (!ea) missing_member (p, "gh_installation_access_token", "expires_at"); + } + + gh_installation_access_token:: + gh_installation_access_token (string tk, timestamp ea) + : token (move (tk)), expires_at (ea) + { + } + + ostream& + operator<< (ostream& os, const gh_installation_access_token& t) + { + os << "token: " << t.token << ", expires_at: "; + butl::operator<< (os, t.expires_at); + + return os; + } + + string + gh_to_iso8601 (timestamp t) + { + return butl::to_string (t, + "%Y-%m-%dT%TZ", + false /* special */, + false /* local */); + } + + timestamp + gh_from_iso8601 (const string& s) + { + return butl::from_string (s.c_str (), + "%Y-%m-%dT%TZ", + false /* local */); + } +} diff --git a/mod/mod-ci-github-gh.hxx b/mod/mod-ci-github-gh.hxx new file mode 100644 index 0000000..5f6e5b7 --- /dev/null +++ b/mod/mod-ci-github-gh.hxx @@ -0,0 +1,355 @@ +// file : mod/mod-ci-github-gh.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_GH_HXX +#define MOD_MOD_CI_GITHUB_GH_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <libbrep/build.hxx> + +#include <mod/tenant-service.hxx> // build_hints + +namespace butl +{ + namespace json + { + class parser; + } +} + +namespace brep +{ + using build_queued_hints = tenant_service_build_queued::build_queued_hints; + + // GitHub request/response types (all start with gh_). + // + // Note that the GitHub REST and GraphQL APIs use different id types and + // values. In the REST API they are usually integers (but check the API + // reference for the object in question) whereas in GraphQL they are always + // strings (note: base64-encoded and opaque, not just the REST id value as a + // string). + // + // In both APIs the id field is called `id`, but REST responses and webhook + // events also contain the corresponding GraphQL object's id in the + // `node_id` field. + // + // The GraphQL API's ids are called "global node ids" by GitHub. We refer to + // them simply as node ids and we use them almost exclusively (over the + // REST/webhook ids). + // + // In the structures below, `id` always refers to the REST/webhook id and + // `node_id` always refers to the node id. + // + namespace json = butl::json; + + // The check_suite member of a check_run webhook event (gh_check_run_event). + // + struct gh_check_suite + { + string node_id; + optional<string> head_branch; + string head_sha; + + explicit + gh_check_suite (json::parser&); + + gh_check_suite () = default; + }; + + // The check_suite member of a check_suite webhook event + // (gh_check_suite_event). + // + struct gh_check_suite_ex: gh_check_suite + { + size_t check_runs_count; + optional<string> conclusion; + + uint64_t app_id; + + explicit + gh_check_suite_ex (json::parser&); + + gh_check_suite_ex () = default; + }; + + // The check_run object returned in response to GraphQL requests + // (e.g. create or update check run). Note that we specifiy the set of + // members to return in the GraphQL request. + // + struct gh_check_run + { + string node_id; + string name; + string status; + + explicit + gh_check_run (json::parser&); + + gh_check_run () = default; + }; + + // The check_run member of a check_run webhook event (gh_check_run_event). + // + struct gh_check_run_ex: gh_check_run + { + string details_url; + gh_check_suite check_suite; + + uint64_t app_id; + + explicit + gh_check_run_ex (json::parser&); + + gh_check_run_ex () = default; + }; + + // The pull_request member of a pull_request webhook event + // (gh_pull_request_event). + // + struct gh_pull_request + { + string node_id; + unsigned int number; + + string base_path; // Repository path (<org>/<repo>) under github.com. + string base_ref; + string base_sha; + + string head_path; + string head_ref; + string head_sha; + + // Note: not received from GitHub but set from the app-id webhook query + // parameter instead. + // + // For some reason, unlike the check_suite and check_run webhooks, the + // pull_request webhook does not contain the app id. For the sake of + // simplicity we emulate check_suite and check_run by storing the app-id + // webhook query parameter here. + // + uint64_t app_id; + + explicit + gh_pull_request (json::parser&); + + gh_pull_request () = default; + }; + + // The repository member of various webhook events. + // + struct gh_repository + { + string node_id; + string path; // Repository path (<org>/<repo>) under github.com. + string clone_url; + + explicit + gh_repository (json::parser&); + + gh_repository () = default; + }; + + // The installation member of various webhook events. + // + struct gh_installation + { + string id; // Note: used for installation access token (REST API). + + explicit + gh_installation (json::parser&); + + gh_installation () = default; + }; + + // The check_suite webhook event. + // + struct gh_check_suite_event + { + string action; + gh_check_suite_ex check_suite; + gh_repository repository; + gh_installation installation; + + explicit + gh_check_suite_event (json::parser&); + + gh_check_suite_event () = default; + }; + + // The check_run webhook event. + // + struct gh_check_run_event + { + string action; + gh_check_run_ex check_run; + gh_repository repository; + gh_installation installation; + + explicit + gh_check_run_event (json::parser&); + + gh_check_run_event () = default; + }; + + // The pull_request webhook event. + // + struct gh_pull_request_event + { + string action; + + gh_pull_request pull_request; + + // The SHA of the previous commit on the head branch before the current + // one. Only present if action is "synchronize". + // + optional<string> before; + + gh_repository repository; + gh_installation installation; + + explicit + gh_pull_request_event (json::parser&); + + gh_pull_request_event () = default; + }; + + // The push webhook event. + // + struct gh_push_event + { + // The full git ref that was pushed. Example: refs/heads/main or + // refs/tags/v3.14.1. + // + string ref; + + // The SHA of the most recent commit on ref before the push. + // + // The GitHub API reference says this member is always present and + // non-null. Testing shows that an absent before commit is represented by + // a value of "0000000000000000000000000000000000000000". + // + string before; + + // The SHA of the most recent commit on ref after the push. + // + string after; + + // True if this was a forced push of the ref. I.e., history was + // overwritten. + // + bool forced; + + // True if this was a branch deletion. + // + bool deleted; + + gh_repository repository; + gh_installation installation; + + // Note: not received from GitHub but set from the app-id webhook query + // parameter instead. + // + // For some reason, unlike the check_suite and check_run webhooks, the + // push webhook does not contain the app id. For the sake of simplicity we + // emulate check_suite and check_run by storing the app-id webhook query + // parameter here. + // + uint64_t app_id; + + explicit + gh_push_event (json::parser&); + + gh_push_event () = default; + }; + + // Installation access token (IAT) returned when we authenticate as a GitHub + // app installation. + // + struct gh_installation_access_token + { + string token; + timestamp expires_at; + + explicit + gh_installation_access_token (json::parser&); + + gh_installation_access_token (string token, timestamp expires_at); + + gh_installation_access_token () = default; + }; + + // Return the GitHub check run status corresponding to a build_state. + // + string + gh_to_status (build_state); + + // Return the build_state corresponding to a GitHub check run status + // string. Throw invalid_argument if the passed status was invalid. + // + build_state + gh_from_status (const string&); + + // If warning_success is true, then map result_status::warning to `SUCCESS` + // and to `FAILURE` otherwise. + // + // Throw invalid_argument in case of unsupported result_status value + // (currently skip, interrupt). + // + string + gh_to_conclusion (result_status, bool warning_success); + + // Create a check_run name from a build. If the second argument is not + // NULL, return an abbreviated id if possible. + // + string + gh_check_run_name (const build&, const build_queued_hints* = nullptr); + + // Throw system_error if the conversion fails due to underlying operating + // system errors. + // + string + gh_to_iso8601 (timestamp); + + // Throw invalid_argument if the conversion fails due to the invalid + // argument and system_error if due to underlying operating system errors. + // + timestamp + gh_from_iso8601 (const string&); + + ostream& + operator<< (ostream&, const gh_check_suite&); + + ostream& + operator<< (ostream&, const gh_check_suite_ex&); + + ostream& + operator<< (ostream&, const gh_check_run&); + + ostream& + operator<< (ostream&, const gh_pull_request&); + + ostream& + operator<< (ostream&, const gh_repository&); + + ostream& + operator<< (ostream&, const gh_installation&); + + ostream& + operator<< (ostream&, const gh_check_suite_event&); + + ostream& + operator<< (ostream&, const gh_check_run_event&); + + ostream& + operator<< (ostream&, const gh_pull_request_event&); + + ostream& + operator<< (ostream&, const gh_push_event&); + + ostream& + operator<< (ostream&, const gh_installation_access_token&); +} + +#endif // MOD_MOD_CI_GITHUB_GH_HXX diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx new file mode 100644 index 0000000..763842c --- /dev/null +++ b/mod/mod-ci-github-gq.cxx @@ -0,0 +1,1284 @@ +// file : mod/mod-ci-github-gq.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci-github-gq.hxx> + +#include <libbutl/json/parser.hxx> +#include <libbutl/json/serializer.hxx> + +#include <mod/mod-ci-github-post.hxx> + +using namespace std; +using namespace butl; + +namespace brep +{ + // GraphQL serialization functions (see definitions and documentation at the + // bottom). + // + static const string& gq_name (const string&); + static string gq_name (string&&); + static string gq_str (const string&); + static string gq_int (uint64_t); + static string gq_bool (bool); + static const string& gq_enum (const string&); + static string gq_enum (string&&); + + [[noreturn]] static void + throw_json (json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + + // Parse a JSON-serialized GraphQL response. + // + // Throw runtime_error if the response indicated errors and + // invalid_json_input if the GitHub response contained invalid JSON. + // + // The parse_data function should not throw anything but invalid_json_input. + // + // The response format is defined in the GraphQL spec: + // https://spec.graphql.org/October2021/#sec-Response. + // + // Example response: + // + // { + // "data": {...}, + // "errors": {...} + // } + // + // The contents of `data`, including its opening and closing braces, are + // parsed by the `parse_data` function. + // + // If the `errors` field is present in the response, error(s) occurred + // before or during execution of the operation. + // + // If the `data` field is not present the errors are request errors which + // occur before execution and are typically the client's fault. + // + // If the `data` field is also present in the response the errors are field + // errors which occur during execution and are typically the GraphQL + // endpoint's fault, and some fields in `data` that should not be are likely + // to be null. + // + // Although the spec recommends that the errors field (if present) should + // come before the data field, GitHub places data before errors. Therefore + // we need to check that the errors field is not present before parsing the + // data field as it might contain nulls if errors is present. + // + static void + gq_parse_response (json::parser& p, + function<void (json::parser&)> parse_data) + { + using event = json::event; + + // True if the data/errors fields are present. + // + bool dat (false), err (false); + + // Because the errors field is likely to come before the data field, + // serialize data to a stringstream and only parse it later once we're + // sure there are no errors. + // + stringstream data; // The value of the data field. + + p.next_expect (event::begin_object); + + while (p.next_expect (event::name, event::end_object)) + { + if (p.name () == "data") + { + dat = true; + + // Serialize the data field to a string. + // + // Note that the JSON payload sent by GitHub is not pretty-printed so + // there is no need to worry about that. + // + json::stream_serializer s (data, 0 /* indentation */); + + try + { + for (event e: p) + { + if (!s.next (e, p.data ())) + break; // Stop if data object is complete. + } + } + catch (const json::invalid_json_output& e) + { + throw_json (p, + string ("serializer rejected response 'data' field: ") + + e.what ()); + } + } + else if (p.name () == "errors") + { + // Skip the errors object but don't stop parsing because the error + // semantics depends on whether or not `data` is present. + // + err = true; // Handled below. + + p.next_expect_value_skip (); + } + else + { + // The spec says the response will never contain any top-level fields + // other than data, errors, and extensions. + // + if (p.name () != "extensions") + { + throw_json (p, + "unexpected top-level GraphQL response field: '" + + p.name () + '\''); + } + + p.next_expect_value_skip (); + } + } + + if (!err) + { + if (!dat) + throw runtime_error ("no data received from GraphQL endpoint"); + + // Parse the data field now that we know there are no errors. + // + json::parser dp (data, p.input_name); + + parse_data (dp); + } + else + { + if (dat) + { + throw runtime_error ("field error(s) received from GraphQL endpoint; " + "incomplete data received"); + } + else + throw runtime_error ("request error(s) received from GraphQL endpoint"); + } + } + + // Parse a response to a check_run GraphQL mutation such as `createCheckRun` + // or `updateCheckRun`. + // + // Throw invalid_json_input. + // + // Example response (only the part we need to parse here): + // + // { + // "cr0": { + // "checkRun": { + // "node_id": "CR_kwDOLc8CoM8AAAAFQ5GqPg", + // "name": "libb2/0.98.1+2/x86_64-linux-gnu/linux_debian_12-gcc_13.1-O3/default/dev/0.17.0-a.1", + // "status": "QUEUED" + // } + // }, + // "cr1": { + // "checkRun": { + // "node_id": "CR_kwDOLc8CoM8AAAAFQ5GqhQ", + // "name": "libb2/0.98.1+2/x86_64-linux-gnu/linux_debian_12-gcc_13.1/default/dev/0.17.0-a.1", + // "status": "QUEUED" + // } + // } + // } + // + static vector<gh_check_run> + gq_parse_mutate_check_runs_response (json::parser& p) + { + using event = json::event; + + vector<gh_check_run> r; + + gq_parse_response (p, [&r] (json::parser& p) + { + p.next_expect (event::begin_object); + + // Parse the "cr0".."crN" members (field aliases). + // + while (p.next_expect (event::name, event::end_object)) + { + // Parse `"crN": { "checkRun":`. + // + if (p.name () != "cr" + to_string (r.size ())) + throw_json (p, "unexpected field alias: '" + p.name () + '\''); + p.next_expect (event::begin_object); + p.next_expect_name ("checkRun"); + + r.emplace_back (p); // Parse the check_run object. + + p.next_expect (event::end_object); // Parse end of crN object. + } + }); + + // Our requests always operate on at least one check run so if there were + // none in the data field something went wrong. + // + if (r.empty ()) + throw_json (p, "data object is empty"); + + return r; + } + + // Serialize a query that fetches the most recent check runs on a commit. + // + static string + gq_query_get_check_runs (uint64_t ai, // App id + const string& ri, // Repository id + const string& ci, // Commit id + size_t cn) // Check run count + { + + ostringstream os; + + os << "query {" << '\n'; + + // Get the repository node. + // + os << "node(id: " << gq_str (ri) << ") {" << '\n' + << "... on Repository {" << '\n'; + + // Get the commit object. + // + os << " object(oid: " << gq_str (ci) << ") {" << '\n' + << " ... on Commit {" << '\n'; + + // Get the check suites on the commit, filtering by our app id. (Note that + // as a result there should never be more than one check suite; see + // below.) + // + os << " checkSuites(first: 1" << '\n' + << " filterBy: {appId: " << gq_int (ai) << "}) {" << '\n' + << " edges { node {" << '\n'; + + // Get the check suite's last N check runs (last:). + // + // Filter by App id because apparently an App can create check runs in + // another App's check suite. + // + // Also ask for the latest check runs only (checkType: LATEST) otherwise + // we could receive multiple check runs with the same name. Although this + // appears to be the default it's not documented anywhere so best make it + // explicit. + // + // Note that the selection set (fields to be returned) must match that of + // the check run mutations (create/update) generated by + // gq_mutation_{create,update}_check_runs(). + // + os << " checkRuns(last: " << gq_int (cn) << '\n' + << " filterBy: {appId: " << gq_int (ai) << '\n' + << " checkType: LATEST}) {" << '\n' + << " edges { node { node_id: id name status } }" << '\n' + << " }" /* checkRuns */ << '\n' + << " } }" /* node, edges */ << '\n' + << " }" /* checkSuites */ << '\n' + << " }" /* ... on Commit */ << '\n' + << " }" /* object */ << '\n' + << "}" /* ... on Repository */ << '\n' + << "}" /* node */ << '\n'; + + os << '}' /* query */ << '\n'; + + return os.str (); + } + + // Parse a response to a "get check runs for repository/commit" GraphQL + // query as constructed by gq_query_get_check_runs(). + // + // Note that there might be other check suites on this commit but they will + // all have been created by other apps (GitHub never creates more than one + // check suite per app). Therefore our query filters by app id and as a + // result there should never be more than one check suite in the response. + // + // Throw invalid_json_input. + // + // Example response (only the part we need to parse here): + // + // { + // "node": { + // "object":{ + // "checkSuites":{ + // "edges":[ + // {"node":{ + // "checkRuns":{ + // "edges":[ + // {"node":{"id":"CR_kwDOLc8CoM8AAAAImvJPfw", + // "name":"check_run0", + // "status":"QUEUED"}}, + // {"node":{"id":"CR_kwDOLc8CoM8AAAAImvJP_Q", + // "name":"check_run1", + // "status":"QUEUED"}} + // ] + // } + // } + // } + // ] + // } + // } + // } + // } + // + static vector<gh_check_run> + gq_parse_get_check_runs_response (json::parser& p) + { + using event = json::event; + + vector<gh_check_run> r; + + gq_parse_response (p, [&r] (json::parser& p) + { + p.next_expect (event::begin_object); // Outermost { + + p.next_expect_member_object ("node"); // Repository node + p.next_expect_member_object ("object"); // Commmit + p.next_expect_member_object ("checkSuites"); + p.next_expect_member_array ("edges"); // Check suites array + p.next_expect (event::begin_object); // Check suite outer { + p.next_expect_member_object ("node"); + p.next_expect_member_object ("checkRuns"); + p.next_expect_member_array ("edges"); // Check runs array + + // Parse the check run elements of the `edges` array. E.g.: + // + // { + // "node":{ + // "node_id":"CR_kwDOLc8CoM8AAAAIobBFlA", + // "name":"CONCLUSION", + // "status":"IN_PROGRESS" + // } + // } + // + while (p.next_expect (event::begin_object, event::end_array)) + { + p.next_expect_name ("node"); + r.emplace_back (p); // Parse check run: { members... } + p.next_expect (event::end_object); + } + + p.next_expect (event::end_object); // checkRuns + p.next_expect (event::end_object); // Check suite node + p.next_expect (event::end_object); // Check suite outer } + p.next_expect (event::end_array); // Check suites edges + p.next_expect (event::end_object); // checkSuites + p.next_expect (event::end_object); // Commit + p.next_expect (event::end_object); // Repository node + + p.next_expect (event::end_object); // Outermost } + }); + + return r; + } + + // Serialize a GraphQL operation (query/mutation) into a GraphQL request. + // + // This is essentially a JSON object with a "query" string member containing + // the GraphQL operation. For example: + // + // { "query": "mutation { cr0:createCheckRun(... }" } + // + static string + gq_serialize_request (const string& o) + { + string b; + json::buffer_serializer s (b); + + s.begin_object (); + s.member ("query", o); + s.end_object (); + + return b; + } + + // Send a GraphQL mutation request `rq` that creates (create=true) or + // updates (create=false) one or more check runs. The requested build state + // is taken from each check_run object. Update the check runs in `crs` with + // the new data (state, node ID if unset, and state_synced). Return false + // and issue diagnostics if the request failed. + // + struct gq_create_data + { + uint64_t app_id; + reference_wrapper<const string> repository_id; + reference_wrapper<const string> head_sha; + }; + + static bool + gq_mutate_check_runs (const basic_mark& error, + check_runs::iterator crs_b, + check_runs::iterator crs_e, + const string& iat, + string rq, + const optional<gq_create_data>& create_data) + { + size_t crs_n (crs_e - crs_b); + + const char* what (nullptr); + try + { + // Response type which parses a GraphQL response containing multiple + // check_run objects. + // + struct resp + { + vector<gh_check_run> check_runs; // Received check runs. + + resp (json::parser& p) + : check_runs (gq_parse_mutate_check_runs_response (p)) {} + + resp () = default; + } rs; + + what = create_data ? "create" : "update"; + uint16_t sc (github_post (rs, + "graphql", // API Endpoint. + strings {"Authorization: Bearer " + iat}, + move (rq))); + + // Turns out it's not uncommon to not get a reply from GitHub if the + // number of check runs being created in build_queued() is large. The + // symptom is a 502 (Bad gateway) reply from GitHub and the theory being + // that their load balancer drops the connection if the request is not + // handled within a certain time. Note that if the number of check runs + // is under 100, they seem to still be created on GitHub, we just don't + // get the reply (and thus their node ids). So we try to re-query that + // information. + // + optional<uint16_t> sc1; + if (sc == 502 && create_data) + { + what = "re-query"; + + // GraphQL query which fetches the most recently-created check runs. + // + string rq (gq_serialize_request ( + gq_query_get_check_runs (create_data->app_id, + create_data->repository_id, + create_data->head_sha, + crs_n))); + + // Type that parses the result of the above GraphQL query. + // + struct resp + { + vector<gh_check_run> check_runs; // Received check runs. + + resp (json::parser& p) + : check_runs (gq_parse_get_check_runs_response (p)) {} + + resp () = default; + } rs1; + + sc1 = github_post (rs1, + "graphql", // API Endpoint. + strings {"Authorization: Bearer " + iat}, + move (rq)); + + if (*sc1 == 200) + { + if (rs1.check_runs.size () == crs_n) + { + // It's possible GitHub did not create all the checkruns we have + // requested. In which case it may return some unrelated checkruns + // (for example, from before re-request). So we verify we got the + // expected ones. + // + size_t i (0); + for (; i != crs_n; ++i) + { + const check_run& cr (*(crs_b + i)); + const gh_check_run& gcr (rs1.check_runs[i]); + + if (cr.name != gcr.name || + cr.state != gh_from_status (gcr.status)) + break; + } + + if (i == crs_n) + { + rs.check_runs = move (rs1.check_runs); + + // Reduce to as-if the create request succeeded. + // + what = "create"; + sc = 200; + } + } + } + } + + if (sc == 200) + { + vector<gh_check_run>& rcrs (rs.check_runs); + + if (rcrs.size () == crs_n) + { + for (size_t i (0); i != crs_n; ++i) + { + check_run& cr (*(crs_b + i)); + + // Validate the check run in the response against the build. + // + const gh_check_run& rcr (rcrs[i]); // Received check run. + + build_state st (cr.state); // Requested state. + build_state rst (gh_from_status (rcr.status)); // Received state. + + // Note that GitHub won't allow us to change a built check run to + // any other state (but all other transitions are allowed). + // + if (rst != st && rst != build_state::built) + { + error << "unexpected check_run status: received '" << rcr.status + << "' but expected '" << gh_to_status (st) << '\''; + + return false; // Fail because something is clearly very wrong. + } + + if (!cr.node_id) + cr.node_id = move (rcr.node_id); + + cr.state = rst; + cr.state_synced = (rst == st); + } + + return true; + } + else + error << "unexpected number of check_run objects in response"; + } + else + { + diag_record dr (error); + + dr << "failed to " << what << " check runs: error HTTP response status " + << sc; + + if (sc1) + { + if (*sc1 != 200) + dr << error << "failed to re-query check runs: error HTTP " + << "response status " << *sc1; + else + dr << error << "unexpected number of check_run objects in " + << "re-query response"; + } + } + } + catch (const json::invalid_json_input& e) // struct resp (via github_post()) + { + // Note: e.name is the GitHub API endpoint. + // + error << "malformed JSON in " << what << " response from " << e.name + << ", line: " << e.line << ", column: " << e.column + << ", byte offset: " << e.position + << ", error: " << e; + } + catch (const invalid_argument& e) // github_post() + { + error << "malformed header(s) in " << what << " response: " << e; + } + catch (const system_error& e) // github_post() + { + error << "unable to " << what << " check runs (errno=" << e.code () + << "): " << e.what (); + } + catch (const runtime_error& e) // gq_parse_{mutate,get}_check_runs_response() + { + // GitHub response contained error(s) (could be ours or theirs at this + // point). + // + error << "unable to " << what << " check runs: " << e; + } + + return false; + } + + // Serialize `createCheckRun` mutations for one or more builds to GraphQL. + // + // The check run parameters (names, build states, details_urls, etc.) are + // taken from each object in `crs`. + // + // Note that build results are not supported because we never create + // multiple check runs in the built state. + // + // The details URL argument (`du`) can be empty for queued but not for the + // other states. + // + // Throw invalid_argument if any of the observed check run members are not + // valid GraphQL values (string, enum, etc). + // + static string + gq_mutation_create_check_runs (const string& ri, // Repository ID + const string& hs, // Head SHA + brep::check_runs::iterator crs_b, + brep::check_runs::iterator crs_e) + { + ostringstream os; + + os << "mutation {" << '\n'; + + // Serialize a `createCheckRun` for each build. + // + for (brep::check_runs::iterator crs_i (crs_b); crs_i != crs_e; ++crs_i) + { + const check_run& cr (*crs_i); + + assert (cr.state != build_state::built); // Not supported. + + // Ensure details URL and output are non-empty if present. + // + assert (!cr.details_url || !cr.details_url->empty ()); + assert (!cr.description || + (!cr.description->title.empty () && + !cr.description->summary.empty ())); + + string al ("cr" + to_string (crs_i - crs_b)); // Field alias. + + os << gq_name (al) << ":createCheckRun(input: {" << '\n' + << " name: " << gq_str (cr.name) << '\n' + << " repositoryId: " << gq_str (ri) << '\n' + << " headSha: " << gq_str (hs) << '\n' + << " status: " << gq_enum (gh_to_status (cr.state)); + if (cr.details_url) + { + os << '\n'; + os << " detailsUrl: " << gq_str (*cr.details_url); + } + if (cr.description) + { + os << " output: {" << '\n' + << " title: " << gq_str (cr.description->title) << '\n' + << " summary: " << gq_str (cr.description->summary) << '\n' + << " }"; + } + os << "})" << '\n' + // Specify the selection set (fields to be returned). Note that we + // rename `id` to `node_id` (using a field alias) for consistency with + // webhook events and REST API responses. + // + << "{" << '\n' + << " checkRun {" << '\n' + << " node_id: id" << '\n' + << " name" << '\n' + << " status" << '\n' + << " }" << '\n' + << "}" << '\n'; + } + + os << "}" << '\n'; + + return os.str (); + } + + // Serialize a `createCheckRun` mutation for a build to GraphQL. + // + // The conclusion argument (`co`) is required if the check run status is + // completed because GitHub does not allow a check run status of completed + // without a conclusion. + // + // The details URL argument (`du`) can be empty for queued but not for the + // other states. + // + // Throw invalid_argument if any of the arguments or observed check run + // members are not valid GraphQL values (string, enum, etc). + // + static string + gq_mutation_create_check_run (const string& ri, // Repository ID + const string& hs, // Head SHA + const optional<string>& du, // Details URL. + const check_run& cr, + const string& st, // Check run status. + const string& ti, // Output title. + const string& su, // Output summary. + optional<string> co = nullopt) // Conclusion. + { + // Ensure details URL is non-empty if present. + // + assert (!du || !du->empty ()); + + // Ensure we have conclusion if the status is completed. + // + assert (st != "COMPLETED" || co); + + ostringstream os; + + os << "mutation {" << '\n'; + + // Serialize a `createCheckRun` for the build. + // + os << gq_name ("cr0") << ":createCheckRun(input: {" << '\n' + << " name: " << gq_str (cr.name) << '\n' + << " repositoryId: " << gq_str (ri) << '\n' + << " headSha: " << gq_str (hs) << '\n' + << " status: " << gq_enum (st); + if (du) + { + os << '\n'; + os << " detailsUrl: " << gq_str (*du); + } + os << '\n'; + if (co) + os << " conclusion: " << gq_enum (*co) << '\n'; + os << " output: {" << '\n' + << " title: " << gq_str (ti) << '\n' + << " summary: " << gq_str (su) << '\n' + << " }"; + os << "})" << '\n' + // Specify the selection set (fields to be returned). Note that we + // rename `id` to `node_id` (using a field alias) for consistency with + // webhook events and REST API responses. + // + << "{" << '\n' + << " checkRun {" << '\n' + << " node_id: id" << '\n' + << " name" << '\n' + << " status" << '\n' + << " }" << '\n' + << "}" << '\n'; + + os << "}" << '\n'; + + return os.str (); + } + + // Serialize an `updateCheckRun` mutation for one build to GraphQL. + // + // The `br` argument is required if the check run status is completed + // because GitHub does not allow updating a check run to completed without a + // conclusion. + // + // Throw invalid_argument if any of the arguments are invalid values (of + // GraphQL types or otherwise). + // + static string + gq_mutation_update_check_run (const string& ri, // Repository ID. + const string& ni, // Node ID. + const string& st, // Check run status. + optional<timestamp> sa, // Started at. + const string& ti, // Output title. + const string& su, // Output summary. + optional<string> co = nullopt) // Conclusion. + { + // Ensure we have conclusion if the status is completed. + // + assert (st != "COMPLETED" || co); + + ostringstream os; + + os << "mutation {" << '\n' + << "cr0:updateCheckRun(input: {" << '\n' + << " checkRunId: " << gq_str (ni) << '\n' + << " repositoryId: " << gq_str (ri) << '\n' + << " status: " << gq_enum (st); + if (sa) + { + try + { + os << '\n'; + os << " startedAt: " << gq_str (gh_to_iso8601 (*sa)); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw invalid_argument ("unable to convert started_at value " + + to_string (system_clock::to_time_t (*sa)) + + ": " + e.what ()); + } + } + os << '\n'; + if (co) + os << " conclusion: " << gq_enum (*co) << '\n'; + os << " output: {" << '\n' + << " title: " << gq_str (ti) << '\n' + << " summary: " << gq_str (su) << '\n' + << " }"; + os << "})" << '\n' + // Specify the selection set (fields to be returned). Note that we + // rename `id` to `node_id` (using a field alias) for consistency with + // webhook events and REST API responses. + // + << "{" << '\n' + << " checkRun {" << '\n' + << " node_id: id" << '\n' + << " name" << '\n' + << " status" << '\n' + << " }" << '\n' + << "}" << '\n' + << "}" << '\n'; + + return os.str (); + } + + bool + gq_create_check_runs (const basic_mark& error, + brep::check_runs& crs, + const string& iat, + uint64_t ai, + const string& rid, + const string& hs) + { + // No support for result_status so state cannot be built. + // +#ifndef NDEBUG + for (const check_run& cr: crs) + assert (cr.state != build_state::built); +#endif + + // Trying to create a large number of check runs at once does not work. + // There are two failure modes: + // + // 1. Between about 40 - 60 we may get 502 (bad gateway) but the check + // runs are still created on GitHub. We handle this case be re-quering + // the check runs (see gq_mutate_check_runs() for details). + // + // 2. Above about 60 GitHub may not create all the check runs (while still + // responding with 502). We handle this here by batching the creation. + // + size_t n (crs.size ()); + size_t b (n / 60 + (n % 60 != 0 ? 1 : 0)); + size_t bn (n / b); + + auto i (crs.begin ()); + for (size_t j (0); j != b; ) + { + auto e (++j != b ? (i + bn): crs.end ()); + + string rq ( + gq_serialize_request ( + gq_mutation_create_check_runs (rid, hs, i, e))); + + if (!gq_mutate_check_runs (error, + i, e, + iat, + move (rq), + gq_create_data {ai, rid, hs})) + return false; + + i += bn; + } + + return true; + } + + bool + gq_create_check_run (const basic_mark& error, + check_run& cr, + const string& iat, + uint64_t ai, + const string& rid, + const string& hs, + const optional<string>& du, + build_state st, + string ti, string su) + { + // State cannot be built without a conclusion. + // + assert (st != build_state::built && !ti.empty () && !su.empty ()); + + string rq ( + gq_serialize_request ( + gq_mutation_create_check_run (rid, + hs, + du, + cr, + gh_to_status (st), + move (ti), move (su), + nullopt /* conclusion */))); + + brep::check_runs crs {move (cr)}; + crs[0].state = st; + + bool r (gq_mutate_check_runs (error, + crs.begin (), crs.end (), + iat, + move (rq), + gq_create_data {ai, rid, hs})); + + cr = move (crs[0]); + + return r; + } + + bool + gq_create_check_run (const basic_mark& error, + check_run& cr, + const string& iat, + uint64_t ai, + const string& rid, + const string& hs, + const optional<string>& du, + gq_built_result br) + { + string rq ( + gq_serialize_request ( + gq_mutation_create_check_run (rid, + hs, + du, + cr, + gh_to_status (build_state::built), + move (br.title), move (br.summary), + move (br.conclusion)))); + + brep::check_runs crs {move (cr)}; + crs[0].state = build_state::built; + + bool r (gq_mutate_check_runs (error, + crs.begin (), crs.end (), + iat, + move (rq), + gq_create_data {ai, rid, hs})); + + cr = move (crs[0]); + + return r; + } + + bool + gq_update_check_run (const basic_mark& error, + check_run& cr, + const string& iat, + const string& rid, + const string& nid, + build_state st, + string ti, string su) + { + // State cannot be built without a conclusion. + // + assert (st != build_state::built && !ti.empty () && !su.empty ()); + + // Set `startedAt` to current time if updating to building. + // + optional<timestamp> sa; + + if (st == build_state::building) + sa = system_clock::now (); + + string rq ( + gq_serialize_request ( + gq_mutation_update_check_run (rid, + nid, + gh_to_status (st), + sa, + move (ti), move (su), + nullopt /* conclusion */))); + + brep::check_runs crs {move (cr)}; + crs[0].state = st; + + bool r (gq_mutate_check_runs (error, + crs.begin (), crs.end (), + iat, + move (rq), + nullopt)); + + cr = move (crs[0]); + + return r; + } + + bool + gq_update_check_run (const basic_mark& error, + check_run& cr, + const string& iat, + const string& rid, + const string& nid, + gq_built_result br) + { + string rq ( + gq_serialize_request ( + gq_mutation_update_check_run (rid, + nid, + gh_to_status (build_state::built), + nullopt /* startedAt */, + move (br.title), move (br.summary), + move (br.conclusion)))); + + brep::check_runs crs {move (cr)}; + crs[0].state = build_state::built; + + bool r (gq_mutate_check_runs (error, + crs.begin (), crs.end (), + iat, + move (rq), + nullopt)); + + cr = move (crs[0]); + + return r; + } + + // Serialize a GraphQL query that fetches a pull request from GitHub. + // + // Throw invalid_argument if the node id is not a valid GraphQL string. + // + static string + gq_query_pr_mergeability (const string& nid) + { + ostringstream os; + + os << "query {" << '\n' + << " node(id:" << gq_str (nid) << ") {" << '\n' + << " ... on PullRequest {" << '\n' + << " headRefOid" << '\n' + << " mergeStateStatus" << '\n' + << " mergeable" << '\n' + << " potentialMergeCommit { oid }" << '\n' + << " }" << '\n' + << " }" << '\n' + << "}" << '\n'; + + return os.str (); + } + + optional<gq_pr_pre_check_info> + gq_fetch_pull_request_pre_check_info (const basic_mark& error, + const string& iat, + const string& nid) + { + // Let invalid_argument from gq_query_pr_mergeability() propagate. + // + string rq (gq_serialize_request (gq_query_pr_mergeability (nid))); + + try + { + // Response parser. + // + struct resp + { + // True if the pull request was found (i.e., the node ID was valid). + // + bool found = false; + + // Non-fatal error message issued during the parse. + // + string parse_error; + + // The response value. Absent if the merge commit is still being + // generated. + // + optional<gq_pr_pre_check_info> r; + + resp (json::parser& p) + { + using event = json::event; + + gq_parse_response (p, [this] (json::parser& p) + { + p.next_expect (event::begin_object); + + if (p.next_expect_member_object_null ("node")) + { + found = true; + + string hs (p.next_expect_member_string ("headRefOid")); + string ms (p.next_expect_member_string ("mergeStateStatus")); + string ma (p.next_expect_member_string ("mergeable")); + + if (ms == "BEHIND") + { + // The PR head branch is not up to date with the PR base + // branch. + // + // Note that we can only get here if the head-not-behind + // protection rule is active on the PR base branch. + // + r = {move (hs), true, nullopt}; + } + else if (ma == "MERGEABLE") + { + p.next_expect_member_object ("potentialMergeCommit"); + string oid (p.next_expect_member_string ("oid")); + p.next_expect (event::end_object); + + r = {move (hs), false, move (oid)}; + } + else + { + if (ma == "CONFLICTING") + r = {move (hs), false, nullopt}; + else if (ma == "UNKNOWN") + ; // Still being generated; leave r absent. + else + throw_json (p, "unexpected mergeable value '" + ma + '\''); + } + + if (!r || !r->merge_commit_sha) + { + // Skip the merge commit ID if it has not yet been extracted + // (in which case it should be null). + // + p.next_expect_name ("potentialMergeCommit"); + p.next_expect_value_skip (); + } + + p.next_expect (event::end_object); // node + } + + p.next_expect (event::end_object); + }); + } + + resp () = default; + } rs; + + uint16_t sc (github_post (rs, + "graphql", // API Endpoint. + strings {"Authorization: Bearer " + iat}, + move (rq))); + + if (sc == 200) + { + if (!rs.found) + error << "pull request '" << nid << "' not found"; + else if (!rs.parse_error.empty ()) + error << rs.parse_error; + + return rs.r; + } + else + error << "failed to fetch pull request: error HTTP response status " + << sc; + } + catch (const json::invalid_json_input& e) // struct resp (via github_post()) + { + // Note: e.name is the GitHub API endpoint. + // + error << "malformed JSON in response from " << e.name << ", line: " + << e.line << ", column: " << e.column << ", byte offset: " + << e.position << ", error: " << e; + } + catch (const invalid_argument& e) // github_post() + { + error << "malformed header(s) in response: " << e; + } + catch (const system_error& e) // github_post() + { + error << "unable to fetch pull request (errno=" << e.code () << "): " + << e.what (); + } + catch (const runtime_error& e) // struct resp + { + // GitHub response contained error(s) (could be ours or theirs at this + // point). + // + error << "unable to fetch pull request: " << e; + } + + return nullopt; + } + + // GraphQL serialization functions. + // + // The GraphQL spec: + // https://spec.graphql.org/ + // + // The GitHub GraphQL API reference: + // https://docs.github.com/en/graphql/reference/ + // + + // Check that a string is a valid GraphQL name. + // + // GraphQL names can contain only alphanumeric characters and underscores + // and cannot begin with a digit (so basically a C identifier). + // + // Return the name or throw invalid_argument if it is invalid. + // + static const string& + gq_name (const string& v) + { + if (v.empty () || digit (v[0])) + throw invalid_argument ("invalid GraphQL name: '" + v + '\''); + + for (char c: v) + { + if (!alnum (c) && c != '_') + { + throw invalid_argument ("invalid character in GraphQL name: '" + c + + '\''); + } + } + + return v; + } + + static string + gq_name (string&& v) + { + gq_name (v); + return move (v); + } + + // Serialize a string to GraphQL. + // + // Return the serialized string or throw invalid_argument if the string is + // invalid. + // + static string + gq_str (const string& v) + { + // GraphQL strings are the same as JSON strings so we use the JSON + // serializer. + // + string b; + json::buffer_serializer s (b); + + try + { + s.value (v); + } + catch (const json::invalid_json_output&) + { + throw invalid_argument ("invalid GraphQL string: '" + v + '\''); + } + + return b; + } + + // Serialize an int to GraphQL. + // + static string + gq_int (uint64_t v) + { + string b; + json::buffer_serializer s (b); + s.value (v); + return b; + } + + // Serialize a boolean to GraphQL. + // + static inline string + gq_bool (bool v) + { + return v ? "true" : "false"; + } + + // Check that a string is a valid GraphQL enum value. + // + // GraphQL enum values can be any GraphQL name except for `true`, `false`, + // or `null`. + // + // Return the enum value or throw invalid_argument if it is invalid. + // + static const string& + gq_enum (const string& v) + { + if (v == "true" || v == "false" || v == "null") + throw invalid_argument ("invalid GraphQL enum value: '" + v + '\''); + + return gq_name (v); + } + + static string + gq_enum (string&& v) + { + gq_enum (v); + return move (v); + } + +} diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx new file mode 100644 index 0000000..5fc75aa --- /dev/null +++ b/mod/mod-ci-github-gq.hxx @@ -0,0 +1,171 @@ +// file : mod/mod-ci-github-gq.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_GQ_HXX +#define MOD_MOD_CI_GITHUB_GQ_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <libbrep/build.hxx> + +#include <mod/tenant-service.hxx> // build_hints + +#include <mod/mod-ci-github-gh.hxx> +#include <mod/mod-ci-github-service-data.hxx> + +namespace brep +{ + // GraphQL functions (all start with gq_). + // + + // Create a new check run on GitHub for each build with the build state, + // name, details_url, and output taken from each check_run object. Update + // `check_runs` with the new data (node id and state_synced). Return false + // and issue diagnostics if the request failed. Note that in this case some + // elements in check_runs may still have been updated (due to batching). + // + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. + // + // Note that creating a check_run named `foo` will effectively replace any + // existing check_runs with that name. They will still exist on the GitHub + // servers but GitHub will only consider the latest one (for display in the + // UI or in determining the mergeability of a PR). + // + bool + gq_create_check_runs (const basic_mark& error, + brep::check_runs& check_runs, + const string& installation_access_token, + uint64_t app_id, + const string& repository_id, + const string& head_sha); + + // Create a new check run on GitHub for a build in the queued or building + // state. Note that the state cannot be built because in that case a + // conclusion is required. + // + // Update `cr` with the new data (node id, state, and state_synced). Return + // false and issue diagnostics if the request failed. + // + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. + // + // If the details_url is absent GitHub will use the app's homepage. Title + // and summary are required and cannot be empty. + // + bool + gq_create_check_run (const basic_mark& error, + check_run& cr, + const string& installation_access_token, + uint64_t app_id, + const string& repository_id, + const string& head_sha, + const optional<string>& details_url, + build_state, + string title, + string summary); + + // As above but create a check run in the built state (which requires a + // conclusion). + // + struct gq_built_result + { + string conclusion; + string title; + string summary; + }; + + bool + gq_create_check_run (const basic_mark& error, + check_run& cr, + const string& installation_access_token, + uint64_t app_id, + const string& repository_id, + const string& head_sha, + const optional<string>& details_url, + gq_built_result); + + // Update a check run on GitHub to the queued or building state. Note that + // the state cannot be built because in that case a conclusion is required. + // + // Update `cr` with the new data (state and state_synced). Return false and + // issue diagnostics if the request failed. + // + // Throw invalid_argument if the passed data is invalid, missing, or + // inconsistent. + // + // Title and summary are required and cannot be empty. + // + bool + gq_update_check_run (const basic_mark& error, + check_run& cr, + const string& installation_access_token, + const string& repository_id, + const string& node_id, + build_state, + string title, + string summary); + + // As above but update a check run to the built state (which requires a + // conclusion). + // + // Note that GitHub allows any state transitions except from built (but + // built to built is allowed). The latter case is signalled by setting the + // check_run state_synced member to false and the state member to built. + // + bool + gq_update_check_run (const basic_mark& error, + check_run& cr, + const string& installation_access_token, + const string& repository_id, + const string& node_id, + gq_built_result); + + // Fetch pre-check information for a pull request from GitHub. This + // information is used to decide whether or not to CI the PR and is + // comprised of the PR's head commit SHA, whether its head branch is behind + // its base branch, and its mergeability and test merge commit SHA. + // + // Return absent value if the merge commit is still being generated (which + // means PR head branch behindness is not yet known either). See the + // gq_pr_pre_check struct's member comments for non-absent return value + // semantics. + // + // Issue diagnostics and return absent if the request failed (which means it + // will be treated by the caller as still being generated). + // + // Throw invalid_argument if the node id is invalid. + // + // Note that the first request causes GitHub to start preparing the test + // merge commit. + // + // For details regarding the test merge commit and how to check/poll for PR + // mergeability see + // https://docs.github.com/en/rest/pulls/pulls?#get-a-pull-request and + // https://docs.github.com/en/rest/guides/using-the-rest-api-to-interact-with-your-git-database?#checking-mergeability-of-pull-requests + // + struct gq_pr_pre_check_info + { + // The PR head commit id. + // + string head_sha; + + // True if the PR's head branch is behind its base branch. + // + bool behind; + + // The commit id of the test merge commit. Absent if behind or the PR is + // not auto-mergeable. + // + optional<string> merge_commit_sha; + }; + + optional<gq_pr_pre_check_info> + gq_fetch_pull_request_pre_check_info ( + const basic_mark& error, + const string& installation_access_token, + const string& node_id); +} + +#endif // MOD_MOD_CI_GITHUB_GQ_HXX diff --git a/mod/mod-ci-github-post.hxx b/mod/mod-ci-github-post.hxx new file mode 100644 index 0000000..d278ae0 --- /dev/null +++ b/mod/mod-ci-github-post.hxx @@ -0,0 +1,161 @@ +// file : mod/mod-ci-github-post.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_POST_HXX +#define MOD_MOD_CI_GITHUB_POST_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <libbutl/curl.hxx> + +namespace brep +{ + // Send a POST request to the GitHub API endpoint `ep`, parse GitHub's JSON + // response into `rs` (only for 200 codes), and return the HTTP status code. + // + // The endpoint `ep` should not have a leading slash. + // + // Pass additional HTTP headers in `hdrs`. For example: + // + // "HeaderName: header value" + // + // Throw invalid_argument if unable to parse the response headers, + // invalid_json_input (derived from invalid_argument) if unable to parse the + // response body, and system_error in other cases. + // + template <typename T> + uint16_t + github_post (T& rs, + const string& ep, + const strings& hdrs, + const string& body = "") + { + using namespace butl; + + // Convert the header values to curl header option/value pairs. + // + strings hdr_opts; + + for (const string& h: hdrs) + { + hdr_opts.push_back ("--header"); + hdr_opts.push_back (h); + } + + // Run curl. + // + try + { + // Pass --include to print the HTTP status line (followed by the response + // headers) so that we can get the response status code. + // + // Suppress the --fail option which causes curl to exit with status 22 + // in case of an error HTTP response status code (>= 400) otherwise we + // can't get the status code. + // + // Note that butl::curl also adds --location to make curl follow redirects + // (which is recommended by GitHub). + // + // The API version `2022-11-28` is the only one currently supported. If + // the X-GitHub-Api-Version header is not passed this version will be + // chosen by default. + // + fdpipe errp (fdopen_pipe ()); // stderr pipe. + + curl c (path ("-"), // Read input from curl::out. + path ("-"), // Write response to curl::in. + process::pipe (errp.in.get (), move (errp.out)), + curl::post, + curl::flags::no_fail, + "https://api.github.com/" + ep, + "--no-fail", // Don't fail if response status code >= 400. + "--include", // Output response headers for status code. + "--header", "Accept: application/vnd.github+json", + "--header", "X-GitHub-Api-Version: 2022-11-28", + move (hdr_opts)); + + ifdstream err (move (errp.in)); + + // Parse the HTTP response. + // + uint16_t sc; // Status code. + try + { + // Note: re-open in/out so that they get automatically closed on + // exception. + // + ifdstream in (c.in.release (), fdstream_mode::skip); + ofdstream out (c.out.release ()); + + // Write request body to out. + // + if (!body.empty ()) + out << body; + out.close (); + + sc = curl::read_http_status (in).code; // May throw invalid_argument. + + // Parse the response body if the status code is in the 200 range. + // + if (sc >= 200 && sc < 300) + { + // Use endpoint name as input name (useful to have it propagated + // in exceptions). + // + json::parser p (in, ep /* name */); + rs = T (p); + } + + in.close (); + } + catch (const io_error& e) + { + // If the process exits with non-zero status, assume the IO error is due + // to that and fall through. + // + if (c.wait ()) + { + throw_generic_error ( + e.code ().value (), + (string ("unable to read curl stdout: ") + e.what ()).c_str ()); + } + } + catch (const json::invalid_json_input&) + { + // If the process exits with non-zero status, assume the JSON error is + // due to that and fall through. + // + if (c.wait ()) + throw; + } + + if (!c.wait ()) + { + string et (err.read_text ()); + throw_generic_error (EINVAL, + ("non-zero curl exit status: " + et).c_str ()); + } + + err.close (); + + return sc; + } + catch (const process_error& e) + { + throw_generic_error ( + e.code ().value (), + (string ("unable to execute curl:") + e.what ()).c_str ()); + } + catch (const io_error& e) + { + // Unable to read diagnostics from stderr. + // + throw_generic_error ( + e.code ().value (), + (string ("unable to read curl stderr : ") + e.what ()).c_str ()); + } + } +} + +#endif // MOD_MOD_CI_GITHUB_POST_HXX diff --git a/mod/mod-ci-github-service-data.cxx b/mod/mod-ci-github-service-data.cxx new file mode 100644 index 0000000..aa2e619 --- /dev/null +++ b/mod/mod-ci-github-service-data.cxx @@ -0,0 +1,331 @@ +// file : mod/mod-ci-github-service-data.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci-github-service-data.hxx> + +#include <libbutl/json/parser.hxx> +#include <libbutl/json/serializer.hxx> + +namespace brep +{ + using event = json::event; + + [[noreturn]] static void + throw_json (json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + + service_data:: + service_data (const string& json) + { + json::parser p (json.data (), json.size (), "service_data"); + + p.next_expect (event::begin_object); + + // Throw if the schema version is not supported. + // + version = p.next_expect_member_number<uint64_t> ("version"); + if (version != 1) + { + throw invalid_argument ("unsupported service_data schema version: " + + to_string (version)); + } + + { + string v (p.next_expect_member_string ("kind")); + + if (v == "local") kind = local; + else if (v == "remote") kind = remote; + else + throw_json (p, "invalid service data kind: '" + v + '\''); + } + + pre_check = p.next_expect_member_boolean<bool> ("pre_check"); + re_request = p.next_expect_member_boolean<bool> ("re_request"); + + warning_success = p.next_expect_member_boolean<bool> ("warning_success"); + + // Installation access token (IAT). + // + p.next_expect_name ("installation_access"); + installation_access = gh_installation_access_token (p); + + app_id = p.next_expect_member_number<uint64_t> ("app_id"); + installation_id = p.next_expect_member_string ("installation_id"); + + repository_node_id = p.next_expect_member_string ("repository_node_id"); + repository_clone_url = p.next_expect_member_string ("repository_clone_url"); + + { + string* s (p.next_expect_member_string_null ("pr_node_id")); + if (s != nullptr) + pr_node_id = *s; + } + + pr_number = p.next_expect_member_number_null<uint32_t> ("pr_number"); + + check_sha = p.next_expect_member_string ("check_sha"); + report_sha = p.next_expect_member_string ("report_sha"); + + p.next_expect_member_array ("check_runs"); + while (p.next_expect (event::begin_object, event::end_array)) + { + string bid (p.next_expect_member_string ("build_id")); + string nm (p.next_expect_member_string ("name")); + + optional<string> nid; + { + string* v (p.next_expect_member_string_null ("node_id")); + if (v != nullptr) + nid = *v; + } + + build_state s; + try + { + s = to_build_state (p.next_expect_member_string ("state")); + } + catch (const invalid_argument& e) + { + throw_json (p, e.what ()); + } + + bool ss (p.next_expect_member_boolean<bool> ("state_synced")); + + optional<result_status> rs; + { + string* v (p.next_expect_member_string_null ("status")); + if (v != nullptr) + { + try + { + rs = bbot::to_result_status (*v); + } + catch (const invalid_argument& e) + { + throw_json (p, e.what ()); + } + assert (s == build_state::built); + } + } + + check_runs.push_back ( + check_run {move (bid), + move (nm), + move (nid), + s, + ss, + rs, + nullopt, /* details_url */ + nullopt /* description */}); + + p.next_expect (event::end_object); + } + + completed = p.next_expect_member_boolean<bool> ("completed"); + + { + string* s (p.next_expect_member_string_null ("conclusion_node_id")); + if (s != nullptr) + conclusion_node_id = *s; + } + + p.next_expect (event::end_object); + } + + // check_suite constructor. + // + service_data:: + service_data (bool ws, + string iat_tok, + timestamp iat_ea, + uint64_t aid, + string iid, + string rid, + string rcu, + kind_type k, + bool pc, + bool rr, + string cs, + string rs) + : kind (k), pre_check (pc), re_request (rr), + warning_success (ws), + installation_access (move (iat_tok), iat_ea), + app_id (aid), + installation_id (move (iid)), + repository_node_id (move (rid)), + repository_clone_url (move (rcu)), + check_sha (move (cs)), + report_sha (move (rs)), + completed (false) + { + } + + // pull_request constructor. + // + service_data:: + service_data (bool ws, + string iat_tok, + timestamp iat_ea, + uint64_t aid, + string iid, + string rid, + string rcu, + kind_type k, + bool pc, + bool rr, + string cs, + string rs, + string pid, + uint32_t prn) + : kind (k), pre_check (pc), re_request (rr), + warning_success (ws), + installation_access (move (iat_tok), iat_ea), + app_id (aid), + installation_id (move (iid)), + repository_node_id (move (rid)), + repository_clone_url (move (rcu)), + pr_node_id (move (pid)), + pr_number (prn), + check_sha (move (cs)), + report_sha (move (rs)), + completed (false) + { + } + + string service_data:: + json () const + { + string b; + json::buffer_serializer s (b); + + s.begin_object (); + + s.member ("version", 1); + + s.member_name ("kind"); + switch (kind) + { + case local: s.value ("local"); break; + case remote: s.value ("remote"); break; + } + + s.member ("pre_check", pre_check); + s.member ("re_request", re_request); + + s.member ("warning_success", warning_success); + + // Installation access token (IAT). + // + s.member_begin_object ("installation_access"); + s.member ("token", installation_access.token); + + // IAT expires_at timestamp. + // + { + string v; + try + { + v = gh_to_iso8601 (installation_access.expires_at); + } + catch (const system_error& e) + { + // Translate for simplicity. + // + throw invalid_argument ("unable to convert IAT expires_at value " + + to_string (system_clock::to_time_t ( + installation_access.expires_at))); + } + s.member ("expires_at", move (v)); + } + + s.end_object (); + + s.member ("app_id", app_id); + s.member ("installation_id", installation_id); + s.member ("repository_node_id", repository_node_id); + s.member ("repository_clone_url", repository_clone_url); + + s.member_name ("pr_node_id"); + if (pr_node_id) + s.value (*pr_node_id); + else + s.value (nullptr); + + s.member_name ("pr_number"); + if (pr_number) + s.value (*pr_number); + else + s.value (nullptr); + + s.member ("check_sha", check_sha); + s.member ("report_sha", report_sha); + + s.member_begin_array ("check_runs"); + for (const check_run& cr: check_runs) + { + s.begin_object (); + s.member ("build_id", cr.build_id); + s.member ("name", cr.name); + + s.member_name ("node_id"); + if (cr.node_id) + s.value (*cr.node_id); + else + s.value (nullptr); + + s.member ("state", to_string (cr.state)); + s.member ("state_synced", cr.state_synced); + + s.member_name ("status"); + if (cr.status) + { + assert (cr.state == build_state::built); + s.value (to_string (*cr.status)); // Doesn't throw. + } + else + s.value (nullptr); + + s.end_object (); + } + s.end_array (); + + s.member ("completed", completed); + + s.member_name ("conclusion_node_id"); + if (conclusion_node_id) + s.value (*conclusion_node_id); + else + s.value (nullptr); + + s.end_object (); + + return b; + } + + check_run* service_data:: + find_check_run (const string& bid) + { + for (check_run& cr: check_runs) + { + if (cr.build_id == bid) + return &cr; + } + return nullptr; + } + + ostream& + operator<< (ostream& os, const check_run& cr) + { + os << "node_id: " << cr.node_id.value_or ("null") + << ", build_id: " << cr.build_id + << ", name: " << cr.name + << ", state: " << cr.state_string (); + + return os; + } +} diff --git a/mod/mod-ci-github-service-data.hxx b/mod/mod-ci-github-service-data.hxx new file mode 100644 index 0000000..9aa512a --- /dev/null +++ b/mod/mod-ci-github-service-data.hxx @@ -0,0 +1,208 @@ +// file : mod/mod-ci-github-service-data.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_SERVICE_DATA_HXX +#define MOD_MOD_CI_GITHUB_SERVICE_DATA_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <mod/mod-ci-github-gh.hxx> + +namespace brep +{ + // Service data associated with the tenant (corresponds to GH check suite). + // + // It is always a top-level JSON object and the first member is always the + // schema version. + + // Unsynchronized state means we were unable to (conclusively) notify + // GitHub about the last state transition (e.g., due to a transient + // network error). The "conclusively" part means that the notification may + // or may not have gone through. Note: node_id can be absent for the same + // reason. + // + struct check_run + { + string build_id; // Full build id. + string name; // Potentially shortened build id. + optional<string> node_id; // GitHub id. + + build_state state; + bool state_synced; + + optional<result_status> status; // Only if state is built & synced. + + // Note: these are never serialized (only used to pass information to the + // GraphQL functions). + // + struct description_type + { + string title; + string summary; + }; + + optional<string> details_url; + optional<description_type> description; + + string + state_string () const + { + string r (to_string (state)); + if (!state_synced) + r += "(unsynchronized)"; + return r; + } + }; + + using check_runs = vector<check_run>; + + // We have two kinds of service data that correspond to the following two + // typical scenarios (until/unless we add support for merge queues): + // + // 1. Branch push (via check_suite) plus zero or more local PRs (via + // pull_request) that share the same head commit id. + // + // 2. One or more remote PRs (via pull_request) that share the same head + // commit id (from a repository in another organization). + // + // Plus, for PRs, the service data may be in the pre-check phase while we + // are in the process of requesting the test merge commit and making sure it + // can be created and is not behind base. We do all this before we actually + // create the CI tenant. + // + // Note that the above two cases are typical but not the only possible + // scenarios. Specifically, it is possible to have a mixture of all three + // kinds (branch push, local PR, and remote PR) since the same head commit + // id can be present in both local and remote branches. There is no way to + // handle this case perfectly and we do the best we can (see + // build_unloaded_pre_check() for details). + // + struct service_data + { + // The data schema version. Note: must be first member in the object. + // + uint64_t version = 1; + + // Kind and phase. + // + enum kind_type {local, remote /*, queue */} kind; + bool pre_check; + bool re_request; // Re-requested (rebuild). + + // Check suite settings. + // + bool warning_success; // See gh_to_conclusion(). + + // Check suite-global data. + // + gh_installation_access_token installation_access; + + uint64_t app_id; + string installation_id; // @@ TMP Also actually an integer + + string repository_node_id; // GitHub-internal opaque repository id. + + string repository_clone_url; + + // The following two are only used for pull requests. + // + // @@ TODO/LATER: maybe put them in a struct, if more members? + // + optional<string> pr_node_id; + optional<uint32_t> pr_number; + + // The commit ID the branch push or pull request (and its check runs) are + // building. This will be the head commit for the branch push as well as + // local pull requests and the test merge commit for remote pull requests. + // + string check_sha; + + // The commit ID the branch push or pull request (and its check runs) are + // reporting to. Note that in the case of a pull request this will be the + // head commit (`pull_request.head.sha`) as opposed to the test merge + // commit. + // + string report_sha; + + brep::check_runs check_runs; + + // Flag indicating that all the elements in check_runs are built and this + // check suite is completed. + // + bool completed; + + // The GitHub ID of the synthetic conclusion check run or absent if it + // hasn't been created yet. + // + optional<string> conclusion_node_id; + + // Return the check run with the specified build ID or nullptr if not + // found. + // + check_run* + find_check_run (const string& build_id); + + // Construct from JSON. + // + // Throw invalid_argument if the schema version is not supported. + // + // Throw invalid_argument (invalid_json_input) in case of malformed JSON + // or any invalid values. + // + explicit + service_data (const string& json); + + // The check_suite constructor. + // + // Note that check_sha and report_sha are both the SHA of the + // check_suite's head commit. + // + service_data (bool warning_success, + string iat_token, + timestamp iat_expires_at, + uint64_t app_id, + string installation_id, + string repository_node_id, + string repository_clone_url, + kind_type kind, + bool pre_check, + bool re_request, + string check_sha, + string report_sha); + + // The pull_request constructor. + // + service_data (bool warning_success, + string iat_token, + timestamp iat_expires_at, + uint64_t app_id, + string installation_id, + string repository_node_id, + string repository_clone_url, + kind_type kind, + bool pre_check, + bool re_request, + string check_sha, + string report_sha, + string pr_node_id, + uint32_t pr_number); + + service_data () = default; + + // Serialize to JSON. + // + // Throw invalid_argument if any values are invalid. + // + // May also throw invalid_json_output but that would be a programming + // error. + // + string + json () const; + }; + + ostream& + operator<< (ostream&, const check_run&); +} + +#endif // MOD_MOD_CI_GITHUB_SERVICE_DATA_HXX diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx new file mode 100644 index 0000000..cd45c4c --- /dev/null +++ b/mod/mod-ci-github.cxx @@ -0,0 +1,3460 @@ +// file : mod/mod-ci-github.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci-github.hxx> + +#include <libbutl/json/parser.hxx> + +#include <web/xhtml/serialization.hxx> +#include <web/server/mime-url-encoding.hxx> // mime_url_encode() + +#include <mod/jwt.hxx> +#include <mod/hmac.hxx> +#include <mod/build.hxx> // build_log_url() +#include <mod/module-options.hxx> + +#include <mod/mod-ci-github-gq.hxx> +#include <mod/mod-ci-github-post.hxx> +#include <mod/mod-ci-github-service-data.hxx> + +#include <cerrno> +#include <cstdlib> // strtoull() +#include <stdexcept> + +// Resources: +// +// Creating an App: +// https://docs.github.com/en/apps/creating-github-apps/about-creating-github-apps/best-practices-for-creating-a-github-app +// +// Webhooks: +// https://docs.github.com/en/webhooks/using-webhooks/best-practices-for-using-webhooks +// https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries +// +// REST API: +// All docs: https://docs.github.com/en/rest#all-docs +// Best practices: https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api +// +// GraphQL API: +// Reference: https://docs.github.com/en/graphql/reference +// + +using namespace std; +using namespace butl; +using namespace web; +using namespace brep::cli; + +namespace brep +{ + ci_github:: + ci_github (tenant_service_map& tsm) + : tenant_service_map_ (tsm) + { + } + + ci_github:: + ci_github (const ci_github& r, tenant_service_map& tsm) + : database_module (r), + ci_start (r), + options_ (r.initialized_ ? r.options_ : nullptr), + tenant_service_map_ (tsm) + { + } + + void ci_github:: + init (scanner& s) + { + HANDLER_DIAG; + + { + shared_ptr<tenant_service_base> ts ( + dynamic_pointer_cast<tenant_service_base> (shared_from_this ())); + + assert (ts != nullptr); // By definition. + + tenant_service_map_["ci-github"] = move (ts); + } + + options_ = make_shared<options::ci_github> ( + s, unknown_mode::fail, unknown_mode::fail); + + // Prepare for the CI requests handling, if configured. + // + if (options_->ci_github_app_webhook_secret_specified ()) + { + if (!options_->build_config_specified ()) + fail << "package building functionality must be enabled"; + + if (!options_->ci_github_app_id_private_key_specified ()) + fail << "no app id/private key mappings configured"; + + for (const auto& pr: options_->ci_github_app_id_private_key ()) + { + if (pr.second.relative ()) + fail << "ci-github-app-id-private-key path must be absolute"; + } + + // Read the webhook secret from the configured path. + // + { + const path& p (options_->ci_github_app_webhook_secret ()); + + if (p.relative ()) + fail << "ci-github-app-webhook-secret path must be absolute"; + + try + { + ifdstream is (p); + getline (is, webhook_secret_, '\0'); + + // Trim leading/trailing whitespaces (presumably GitHub does the + // same in its web UI). + // + if (trim (webhook_secret_).empty ()) + fail << "empty webhook secret in " << p; + } + catch (const io_error& e) + { + fail << "unable to read webhook secret from " << p << ": " << e; + } + } + + ci_start::init (make_shared<options::ci_start> (*options_)); + + database_module::init (*options_, options_->build_db_retry ()); + } + } + + bool ci_github:: + handle (request& rq, response&) + { + using namespace bpkg; + + HANDLER_DIAG; + + if (build_db_ == nullptr) + throw invalid_request (501, "GitHub CI submission not implemented"); + + // Process headers. + // + string event; // Webhook event. + string hmac; // Received HMAC. + try + { + bool content_type (false); + + for (const name_value& h: rq.headers ()) + { + // HMAC authenticating this request. Note that it won't be present + // unless a webhook secret has been set in the GitHub app's settings. + // + if (icasecmp (h.name, "x-hub-signature-256") == 0) + { + if (!h.value) + throw invalid_request (400, "missing x-hub-signature-256 value"); + + // Parse the x-hub-signature-256 header value. For example: + // + // sha256=5e82258... + // + // Check for the presence of the "sha256=" prefix and then strip it + // to leave only the HMAC value. + // + if (h.value->find ("sha256=", 0, 7) == string::npos) + throw invalid_request (400, "invalid x-hub-signature-256 value"); + + hmac = h.value->substr (7); + } + // This event's UUID. + // + else if (icasecmp (h.name, "x-github-delivery") == 0) + { + // @@ TODO Check that delivery UUID has not been received before + // (replay attack). + } + else if (icasecmp (h.name, "content-type") == 0) + { + if (!h.value) + throw invalid_request (400, "missing content-type value"); + + if (icasecmp (*h.value, "application/json") != 0) + { + throw invalid_request (400, + "invalid content-type value: '" + *h.value + + '\''); + } + + content_type = true; + } + // The webhook event. + // + else if (icasecmp (h.name, "x-github-event") == 0) + { + if (!h.value) + throw invalid_request (400, "missing x-github-event value"); + + event = *h.value; + } + } + + if (!content_type) + throw invalid_request (400, "missing content-type header"); + + if (event.empty ()) + throw invalid_request (400, "missing x-github-event header"); + + if (hmac.empty ()) + throw invalid_request (400, "missing x-hub-signature-256 header"); + } + catch (const invalid_request& e) + { + error << "request header error: " << e.content; + throw; + } + + // Read the entire request body into a buffer because we need to compute + // an HMAC over it and then parse it as JSON. The alternative of reading + // from the stream twice works out to be more complicated (see also a TODO + // item in web/server/module.hxx). + // + string body; + { + // Note that even though we may not need caching right now, we may later + // (e.g., to support cancel) so let's just enable it right away. + // + size_t limit (128 * 1024); + + istream& is (rq.content (limit, limit)); + + try + { + getline (is, body, '\0'); + } + catch (const io_error& e) + { + fail << "unable to read request body: " << e; + } + } + + // Verify the received HMAC. + // + // Compute the HMAC value over the request body using the configured + // webhook secret as key and compare it to the received HMAC. + // + try + { + string h (compute_hmac (*options_, + body.data (), + body.size (), + webhook_secret_.c_str ())); + + if (!icasecmp (h, hmac)) + { + string m ("computed HMAC does not match received HMAC"); + + error << m; + + throw invalid_request (400, move (m)); + } + } + catch (const system_error& e) + { + fail << "unable to compute request HMAC: " << e; + } + + // Process the `app-id` and `warning` webhook request query parameters. + // + uint64_t app_id; + bool warning_success; + { + const name_values& rps (rq.parameters (1024, true /* url_only */)); + + bool ai (false), wa (false); + + auto badreq = [] (const string& m) + { + throw invalid_request (400, m); + }; + + for (const name_value& rp: rps) + { + if (rp.name == "app-id") + { + if (!rp.value) + badreq ("missing 'app-id' webhook query parameter value"); + + ai = true; + + // Parse the app id value. + // + const char* b (rp.value->c_str ()); + char* e (nullptr); + errno = 0; // We must clear it according to POSIX. + app_id = strtoull (b, &e, 10); + if (errno == ERANGE || e == b || *e != '\0') + { + badreq ("invalid 'app-id' webhook query parameter value: '" + + *rp.value + '\''); + } + } + else if (rp.name == "warning") + { + if (!rp.value) + badreq ("missing 'warning' webhook query parameter value"); + + wa = true; + const string& v (*rp.value); + + if (v == "success") warning_success = true; + else if (v == "failure") warning_success = false; + else + badreq ("invalid 'warning' webhook query parameter value: '" + v + + '\''); + } + } + + if (!ai) badreq ("missing 'app-id' webhook query parameter"); + if (!wa) badreq ("missing 'warning' webhook query parameter"); + } + + // There is a webhook event (specified in the x-github-event header) and + // each event contains a bunch of actions (specified in the JSON request + // body). + // + // Note: "GitHub continues to add new event types and new actions to + // existing event types." As a result we ignore known actions that we are + // not interested in and log and ignore unknown actions. The thinking here + // is that we want to be "notified" of new actions at which point we can + // decide whether to ignore them or to handle. + // + if (event == "check_suite") + { + gh_check_suite_event cs; + try + { + json::parser p (body.data (), body.size (), "check_suite event"); + + cs = gh_check_suite_event (p); + } + catch (const json::invalid_json_input& e) + { + string m ("malformed JSON in " + e.name + " request body"); + + error << m << ", line: " << e.line << ", column: " << e.column + << ", byte offset: " << e.position << ", error: " << e; + + throw invalid_request (400, move (m)); + } + + if (cs.check_suite.app_id != app_id) + { + fail << "webhook check_suite app.id " << cs.check_suite.app_id + << " does not match app-id query parameter " << app_id; + } + + if (cs.action == "requested") + { + // Branch pushes are handled in handle_branch_push() so ignore this + // event. + // + return true; + } + else if (cs.action == "rerequested") + { + // Someone manually requested to re-run all the check runs in this + // check suite. Treat as a new request. + // + return handle_check_suite_rerequest (move (cs), warning_success); + } + else if (cs.action == "completed") + { + // GitHub thinks that "all the check runs in this check suite have + // completed and a conclusion is available". Check with our own + // bookkeeping and log an error if there is a mismatch. + // + return handle_check_suite_completed (move (cs), warning_success); + } + else + { + // Ignore unknown actions by sending a 200 response with empty body + // but also log as an error since we want to notice new actions. + // + error << "unknown action '" << cs.action << "' in check_suite event"; + + return true; + } + } + else if (event == "check_run") + { + gh_check_run_event cr; + try + { + json::parser p (body.data (), body.size (), "check_run event"); + + cr = gh_check_run_event (p); + } + catch (const json::invalid_json_input& e) + { + string m ("malformed JSON in " + e.name + " request body"); + + error << m << ", line: " << e.line << ", column: " << e.column + << ", byte offset: " << e.position << ", error: " << e; + + throw invalid_request (400, move (m)); + } + + if (cr.check_run.app_id != app_id) + { + fail << "webhook check_run app.id " << cr.check_run.app_id + << " does not match app-id query parameter " << app_id; + } + + if (cr.action == "rerequested") + { + // Someone manually requested to re-run a specific check run. + // + return handle_check_run_rerequest (move (cr), warning_success); + } +#if 0 + // It looks like we shouldn't be receiving these since we are not + // subscribed to them. + // + else if (cr.action == "created" || + cr.action == "completed" || + cr.action == "requested_action") + { + } +#endif + else + { + // Ignore unknown actions by sending a 200 response with empty body + // but also log as an error since we want to notice new actions. + // + error << "unknown action '" << cr.action << "' in check_run event"; + + return true; + } + } + else if (event == "pull_request") + { + gh_pull_request_event pr; + try + { + json::parser p (body.data (), body.size (), "pull_request event"); + + pr = gh_pull_request_event (p); + } + catch (const json::invalid_json_input& e) + { + string m ("malformed JSON in " + e.name + " request body"); + + error << m << ", line: " << e.line << ", column: " << e.column + << ", byte offset: " << e.position << ", error: " << e; + + throw invalid_request (400, move (m)); + } + + // Store the app-id webhook query parameter in the gh_pull_request_event + // object (see gh_pull_request for an explanation). + // + // When we receive the other webhooks we do check that the app ids in + // the payload and query match but here we have to assume it is valid. + // + pr.pull_request.app_id = app_id; + + if (pr.action == "opened" || + pr.action == "synchronize") + { + // opened + // A pull request was opened. + // + // synchronize + // A pull request's head branch was updated from the base branch or + // new commits were pushed to the head branch. (Note that there is + // no equivalent event for the base branch.) + // + // Note that both cases are handled similarly: we start a new CI + // request which will be reported on the new commit id. + // + return handle_pull_request (move (pr), warning_success); + } + else if (pr.action == "edited") + { + // PR base branch changed (to a different branch) besides other + // irrelevant changes (title, body, etc). + // + // This is in a sense a special case of the base branch moving. In + // that case we don't do anything (due to the head sharing problem) + // relying instead on the branch protection rule. So it makes sense + // to do the same here. + // + return true; + } + else if (pr.action == "closed") + { + // PR has been closed (as merged or not; see merged member). Also + // apparently received if base branch is deleted (and the same + // for head branch). See also the reopened event below. + // + // While it may seem natural to cancel the CI for the closed PR, it + // might actually be useful to have a completed CI record. GitHub + // doesn't prevent us from publishing CI results for the closed PR + // (even if both base and head branches were deleted). And if such a + // PR is reopened, the CI results remain. + // + return true; + } + else if (pr.action == "reopened") + { + // Previously closed PR has been reopened. + // + // Since we don't cancel the CI for a closed PR, there is nothing + // to do if it is reopened. + // + return true; + } + else if (pr.action == "assigned" || + pr.action == "auto_merge_disabled" || + pr.action == "auto_merge_enabled" || + pr.action == "converted_to_draft" || + pr.action == "demilestoned" || + pr.action == "dequeued" || + pr.action == "enqueued" || + pr.action == "labeled" || + pr.action == "locked" || + pr.action == "milestoned" || + pr.action == "ready_for_review" || + pr.action == "review_request_removed" || + pr.action == "review_requested" || + pr.action == "unassigned" || + pr.action == "unlabeled" || + pr.action == "unlocked") + { + // These have no relation to CI. + // + return true; + } + else + { + // Ignore unknown actions by sending a 200 response with empty body + // but also log as an error since we want to notice new actions. + // + error << "unknown action '" << pr.action << "' in pull_request event"; + + return true; + } + } + else if (event == "push") + { + // Push events are triggered by branch pushes, branch creation, and + // branch deletion. + // + gh_push_event ps; + try + { + json::parser p (body.data (), body.size (), "push event"); + + ps = gh_push_event (p); + } + catch (const json::invalid_json_input& e) + { + string m ("malformed JSON in " + e.name + " request body"); + + error << m << ", line: " << e.line << ", column: " << e.column + << ", byte offset: " << e.position << ", error: " << e; + + throw invalid_request (400, move (m)); + } + + // Store the app-id webhook query parameter in the gh_push_event + // object (see gh_push_event for an explanation). + // + // When we receive the other webhooks we do check that the app ids in + // the payload and query match but here we have to assume it is valid. + // + ps.app_id = app_id; + + // Note that the push request event has no action. + // + return handle_branch_push (move (ps), warning_success); + } + // Ignore marketplace_purchase events (sent by the GitHub Marketplace) by + // sending a 200 response with empty body. We offer a free plan only and + // do not support user accounts so there is nothing to be done. + // + else if (event == "marketplace_purchase") + { + return true; + } + // Ignore GitHub App installation events by sending a 200 response with + // empty body. These are triggered when a user installs a GitHub App in a + // repository or organization. + // + else if (event == "installation") + { + return true; + } + // Ignore ping events by sending a 200 response with empty body. This + // event occurs when you create a new webhook. The ping event is a + // confirmation from GitHub that you configured the webhook correctly. One + // of its triggers is listing an App on the GitHub Marketplace. + // + else if (event == "ping") + { + return true; + } + else + { + // Log to investigate. + // + error << "unexpected event '" << event << "'"; + + throw invalid_request (400, "unexpected event: '" + event + "'"); + } + } + + // Let's capitalize the synthetic conclusion check run name to make it + // easier to distinguish from the regular ones. + // + static const string conclusion_check_run_name ("CONCLUSION"); + + // Yellow circle. + // + static const string conclusion_building_title ("\U0001F7E1 IN PROGRESS"); + static const string conclusion_building_summary ( + "Waiting for all the builds to complete."); + + // "Medium white" circle. + // + static const string check_run_queued_title ("\U000026AA QUEUED"); + static const string check_run_queued_summary ( + "Waiting for the build to start."); + + // Yellow circle. + // + static const string check_run_building_title ("\U0001F7E1 BUILDING"); + static const string check_run_building_summary ( + "Waiting for the build to complete."); + + // Return the colored circle corresponding to a result_status. + // + // Note: the rest of the title is produced by to_string(result_status). + // + static string + circle (result_status rs) + { + switch (rs) + { + case result_status::success: return "\U0001F7E2"; // Green circle. + case result_status::warning: return "\U0001F7E0"; // Orange circle. + case result_status::error: + case result_status::abort: + case result_status::abnormal: return "\U0001F534"; // Red circle. + + // Valid values we should never encounter. + // + case result_status::skip: + case result_status::interrupt: + throw invalid_argument ("unexpected result_status value: " + + to_string (rs)); + } + + return ""; // Should never reach. + } + + bool ci_github:: + handle_branch_push (gh_push_event ps, bool warning_success) + { + HANDLER_DIAG; + + l3 ([&]{trace << "push event { " << ps << " }";}); + + // Cancel the CI tenant associated with the overwritten/deleted previous + // head commit if this is a forced push or a branch deletion. + // + if (ps.forced || ps.deleted) + { + // Service id that will uniquely identify the CI tenant. + // + string sid (ps.repository.node_id + ':' + ps.before); + + // Note that it's possible this commit still exists in another branch so + // we do refcount-aware cancel. + // + if (optional<tenant_service> ts = cancel (error, warn, + verb_ ? &trace : nullptr, + *build_db_, retry_, + "ci-github", sid, + true /* ref_count */)) + { + l3 ([&]{trace << (ps.forced ? "forced push " + ps.after + " to " + : "deletion of ") + << ps.ref << ": attempted to cancel CI of previous" + << " head commit with tenant_service id " << sid + << " (ref_count: " << ts->ref_count << ')';}); + } + else + { + // It's possible that there was no CI for the previous commit for + // various reasons (e.g., CI was not enabled). + // + l3 ([&]{trace << (ps.forced ? "forced push " + ps.after + " to " + : "deletion of ") + << ps.ref << ": failed to cancel CI of previous" + << " head commit with tenant_service id " << sid;}); + } + } + + if (ps.deleted) + return true; // Do nothing further if this was a branch deletion. + + // While we don't need the installation access token in this request, + // let's obtain it to flush out any permission issues early. Also, it is + // valid for an hour so we will most likely make use of it. + // + optional<string> jwt (generate_jwt (ps.app_id, trace, error)); + if (!jwt) + throw server_error (); + + optional<gh_installation_access_token> iat ( + obtain_installation_access_token (ps.installation.id, + move (*jwt), + error)); + if (!iat) + throw server_error (); + + l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); + + // While it would have been nice to cancel CIs of PRs with this branch as + // base not to waste resources, there are complications: Firstly, we can + // only do this for remote PRs (since local PRs will most likely share the + // result with branch push). Secondly, we try to do our best even if the + // branch protection rule for head behind is not enabled. In this case, it + // would be good to complete the CI. So maybe/later. See also the head + // case in handle_pull_request(), where we do cancel remote PRs that are + // not shared. + + // Service id that uniquely identifies the CI tenant. + // + string sid (ps.repository.node_id + ':' + ps.after); + + service_data sd (warning_success, + iat->token, + iat->expires_at, + ps.app_id, + ps.installation.id, + move (ps.repository.node_id), + move (ps.repository.clone_url), + service_data::local, + false /* pre_check */, + false /* re_requested */, + ps.after /* check_sha */, + ps.after /* report_sha */); + + // Create an unloaded CI tenant, doing nothing if one already exists + // (which could've been created by handle_pull_request() or by us as a + // result of a push to another branch). Note that the tenant's reference + // count is incremented in all cases. + // + // Note: use no delay since we need to (re)create the synthetic conclusion + // check run as soon as possible. + // + // Note that we use the create() API instead of start() since duplicate + // management is not available in start(). + // + // After this call we will start getting the build_unloaded() + // notifications until (1) we load the tenant, (2) we cancel it, or (3) + // it gets archived after some timeout. + // + if (!create (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + tenant_service (sid, "ci-github", sd.json ()), + chrono::seconds (30) /* interval */, + chrono::seconds (0) /* delay */, + duplicate_tenant_mode::ignore)) + { + fail << "push " + ps.after + " to " + ps.ref + << ": unable to create unloaded CI tenant"; + } + + return true; + } + + // Miscellaneous pull request facts + // + // - Although some of the GitHub documentation makes it sound like they + // expect check runs to be added to both the PR head commit and the merge + // commit, the PR UI does not react to the merge commit's check runs + // consistently. It actually seems to be quite broken. The only thing it + // does seem to do reliably is blocking the PR merge if the merge commit's + // check runs are not successful (i.e, overriding the PR head commit's + // check runs). But the UI looks quite messed up generally in this state. + // + // - When new commits are added to a PR base branch, pull_request.base.sha + // does not change, but the test merge commit will be updated to include + // the new commits to the base branch. + // + // - When new commits are added to a PR head branch, pull_request.head.sha + // gets updated with the head commit's SHA and check_suite.pull_requests[] + // will contain all PRs with this branch as head. + // + bool ci_github:: + handle_pull_request (gh_pull_request_event pr, bool warning_success) + { + HANDLER_DIAG; + + l3 ([&]{trace << "pull_request event { " << pr << " }";}); + + // While we don't need the installation access token in this request, + // let's obtain it to flush out any permission issues early. Also, it is + // valid for an hour so we will most likely make use of it. + // + optional<string> jwt (generate_jwt (pr.pull_request.app_id, trace, error)); + if (!jwt) + throw server_error (); + + optional<gh_installation_access_token> iat ( + obtain_installation_access_token (pr.installation.id, + move (*jwt), + error)); + if (!iat) + throw server_error (); + + l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); + + // Distinguish between local and remote PRs by comparing the head and base + // repositories' paths. + // + service_data::kind_type kind ( + pr.pull_request.head_path == pr.pull_request.base_path + ? service_data::local + : service_data::remote); + + // Note that similar to the branch push case above, while it would have + // been nice to cancel the previous CI job once the PR head moves (the + // "synchronize" event), due to the head sharing problem the previous CI + // job might actually still be relevant (in both local and remote PR + // cases). So we only do it for the remote PRs and only if the head is not + // shared (via tenant reference counting). + // + if (kind == service_data::remote && pr.action == "synchronize") + { + if (pr.before) + { + // Service id that will uniquely identify the CI tenant. + // + string sid (pr.repository.node_id + ':' + *pr.before); + + if (optional<tenant_service> ts = cancel (error, warn, + verb_ ? &trace : nullptr, + *build_db_, retry_, + "ci-github", sid, + true /* ref_count */)) + { + l3 ([&]{trace << "pull request " << pr.pull_request.node_id + << ": attempted to cancel CI of previous head commit" + << " (ref_count: " << ts->ref_count << ')';}); + } + else + { + // It's possible that there was no CI for the previous commit for + // various reasons (e.g., CI was not enabled). + // + l3 ([&]{trace << "pull request " << pr.pull_request.node_id + << ": failed to cancel CI of previous head commit " + << "with tenant_service id " << sid;}); + } + } + else + { + error << "pull request " << pr.pull_request.node_id + << ": before commit is missing in synchronize event"; + } + } + + // Note: for remote PRs the check_sha will be set later, in + // build_unloaded_pre_check(), to test merge commit id. + // + string check_sha (kind == service_data::local + ? pr.pull_request.head_sha + : ""); + + // Note that PR rebuilds (re-requested) are handled by + // handle_check_suite_rerequest(). + // + // Note that, in the case of a remote PR, GitHub will copy the PR head + // commit from the head (forked) repository into the base repository. So + // the check runs must always be added to the base repository, whether the + // PR is local or remote. The head commit refs are located at + // refs/pull/<PR-number>/head. + // + service_data sd (warning_success, + move (iat->token), + iat->expires_at, + pr.pull_request.app_id, + pr.installation.id, + move (pr.repository.node_id), + move (pr.repository.clone_url), + kind, true /* pre_check */, false /* re_request */, + move (check_sha), + move (pr.pull_request.head_sha) /* report_sha */, + pr.pull_request.node_id, + pr.pull_request.number); + + // Create an unloaded CI tenant for the pre-check phase (during which we + // wait for the PR's merge commit and behindness to become available). + // + // Create with an empty service id so that the generated tenant id is used + // instead during the pre-check phase (so as not to clash with a proper + // service id for this head commit, potentially created in + // handle_branch_push() or as another PR). + // + tenant_service ts ("", "ci-github", sd.json ()); + + // Note: use no delay since we need to start the actual CI (which in turn + // (re)creates the synthetic conclusion check run) as soon as possible. + // + // After this call we will start getting the build_unloaded() + // notifications -- which will be routed to build_unloaded_pre_check() -- + // until we cancel the tenant or it gets archived after some timeout. + // (Note that we never actually load this request, we always cancel it; + // see build_unloaded_pre_check() for details.) + // + if (!create (error, + warn, + verb_ ? &trace : nullptr, + *build_db_, retry_, + move (ts), + chrono::seconds (30) /* interval */, + chrono::seconds (0) /* delay */)) + { + fail << "pull request " << pr.pull_request.node_id + << ": unable to create unloaded pre-check tenant"; + } + + return true; + } + + bool ci_github:: + handle_check_suite_rerequest (gh_check_suite_event cs, bool warning_success) + { + HANDLER_DIAG; + + l3 ([&]{trace << "check_suite event { " << cs << " }";}); + + assert (cs.action == "rerequested"); + + // While we don't need the installation access token in this request, + // let's obtain it to flush out any permission issues early. Also, it is + // valid for an hour so we will most likely make use of it. + // + optional<string> jwt (generate_jwt (cs.check_suite.app_id, trace, error)); + if (!jwt) + throw server_error (); + + optional<gh_installation_access_token> iat ( + obtain_installation_access_token (cs.installation.id, + move (*jwt), + error)); + if (!iat) + throw server_error (); + + l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); + + // Service id that uniquely identifies the CI tenant. + // + string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha); + + // If the user requests a rebuild of the (entire) PR, then this manifests + // as the check_suite rather than pull_request event. Specifically: + // + // - For a local PR, this event is shared with the branch push and all we + // need to do is restart the CI for the head commit. + // + // - For a remote PR, this event will have no gh_check_suite::head_branch. + // In this case we need to load the existing service data for this head + // commit, extract the test merge commit, and restart the CI for that. + // + // Note that it's possible the base branch has moved in the meantime and + // ideally we would want to re-request the test merge commit, etc. + // However, this will only be necessary if the user does not follow our + // recommendation of enabling the head-behind-base protection. And it + // seems all this extra complexity would not be warranted. + // + string check_sha; + service_data::kind_type kind; + + if (!cs.check_suite.head_branch) + { + // Rebuild of remote PR. + // + kind = service_data::remote; + + if (optional<tenant_data> d = find (*build_db_, "ci-github", sid)) + { + tenant_service& ts (d->service); + + try + { + service_data sd (*ts.data); + check_sha = move (sd.check_sha); // Test merge commit. + } + catch (const invalid_argument& e) + { + fail << "failed to parse service data: " << e; + } + } + else + { + error << "check suite " << cs.check_suite.node_id + << " for remote pull request:" + << " re-requested but tenant_service with id " << sid + << " did not exist"; + return true; + } + } + else + { + // Rebuild of branch push or local PR. + // + kind = service_data::local; + check_sha = cs.check_suite.head_sha; + } + + service_data sd (warning_success, + iat->token, + iat->expires_at, + cs.check_suite.app_id, + cs.installation.id, + move (cs.repository.node_id), + move (cs.repository.clone_url), + kind, false /* pre_check */, true /* re_requested */, + move (check_sha), + move (cs.check_suite.head_sha) /* report_sha */); + + // Replace the existing CI tenant if it exists. + // + // Note that GitHub UI does not allow re-running the entire check suite + // until all the check runs are completed. + // + + // Create an unloaded CI tenant. + // + // Note: use no delay since we need to (re)create the synthetic conclusion + // check run as soon as possible. + // + // Note that we use the create() API instead of start() since duplicate + // management is not available in start(). + // + // After this call we will start getting the build_unloaded() + // notifications until (1) we load the tenant, (2) we cancel it, or (3) + // it gets archived after some timeout. + // + auto pr (create (error, + warn, + verb_ ? &trace : nullptr, + *build_db_, retry_, + tenant_service (sid, "ci-github", sd.json ()), + chrono::seconds (30) /* interval */, + chrono::seconds (0) /* delay */, + duplicate_tenant_mode::replace)); + + if (!pr) + { + fail << "check suite " << cs.check_suite.node_id + << ": unable to create unloaded CI tenant"; + } + + if (pr->second == duplicate_tenant_result::created) + { + error << "check suite " << cs.check_suite.node_id + << ": re-requested but tenant_service with id " << sid + << " did not exist"; + return true; + } + + return true; + } + + bool ci_github:: + handle_check_suite_completed (gh_check_suite_event cs, bool warning_success) + { + // The plans is as follows: + // + // 1. Load the service data. + // + // 2. Verify it is completed. + // + // 3. Verify the check run counts match. + // + // 4. Verify (like in build_built()) that all the check runs are + // completed. + // + // 5. Verify the result matches what GitHub thinks it is. + + HANDLER_DIAG; + + l3 ([&]{trace << "check_suite event { " << cs << " }";}); + + // Service id that uniquely identifies the CI tenant. + // + string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha); + + // The common log entry subject. + // + string sub ("check suite " + cs.check_suite.node_id + '/' + sid); + + // Load the service data. + // + service_data sd; + + if (optional<tenant_data> d = find (*build_db_, "ci-github", sid)) + { + try + { + sd = service_data (*d->service.data); + } + catch (const invalid_argument& e) + { + fail << "failed to parse service data: " << e; + } + } + else + { + error << sub << ": tenant_service does not exist"; + return true; + } + + // Verify the completed flag and the number of check runs. + // + if (!sd.completed) + { + error << sub << " service data complete flag is false"; + return true; + } + + // Received count will be one higher because we don't store the conclusion + // check run. + // + size_t check_runs_count (sd.check_runs.size () + 1); + + if (check_runs_count == 1) + { + error << sub << ": no check runs in service data"; + return true; + } + + if (cs.check_suite.check_runs_count != check_runs_count) + { + error << sub << ": check runs count " << cs.check_suite.check_runs_count + << " does not match service data count " << check_runs_count; + return true; + } + + // Verify that all the check runs are built and compute the summary + // conclusion. + // + result_status conclusion (result_status::success); + + for (const check_run& cr: sd.check_runs) + { + if (cr.state == build_state::built) + { + assert (cr.status.has_value ()); + conclusion |= *cr.status; + } + else + { + error << sub << ": unbuilt check run in service data"; + return true; + } + } + + // Verify the conclusion. + // + if (!cs.check_suite.conclusion) + { + error << sub << ": absent conclusion in completed check suite"; + return true; + } + + // Note that the case mismatch is due to GraphQL (gh_conclusion()) + // requiring uppercase conclusion values while the received webhook values + // are lower case. + // + string gh_conclusion (gh_to_conclusion (conclusion, warning_success)); + + if (icasecmp (*cs.check_suite.conclusion, gh_conclusion) != 0) + { + error << sub << ": conclusion " << *cs.check_suite.conclusion + << " does not match service data conclusion " << gh_conclusion; + return true; + } + + return true; + } + + // Make a check run summary from a CI start_result. + // + static string + to_check_run_summary (const optional<ci_start::start_result>& r) + { + string s; + + s = "```\n"; + if (r) s += r->message; + else s += "Internal service error"; + s += "\n```"; + + return s; + } + + // Create a gq_built_result. + // + // Throw invalid_argument in case of invalid result_status. + // + static gq_built_result + make_built_result (result_status rs, bool warning_success, string message) + { + string title (circle (rs == result_status::warning && !warning_success + ? result_status::error + : rs)); + title += ' '; + title += ucase (to_string (rs)); + + return {gh_to_conclusion (rs, warning_success), + move (title), + move (message)}; + } + + // Parse a check run details URL into a build_id. + // + // Return nullopt if the URL is invalid. + // + static optional<build_id> + parse_details_url (const string& details_url); + + // Note that GitHub always posts a message to their GUI saying "You have + // successfully requested <check_run_name> be rerun", regardless of what + // HTTP status code we respond with. However we do return error status codes + // when there is no better option (like failing the conclusion) in case they + // start handling them someday. + // + bool ci_github:: + handle_check_run_rerequest (const gh_check_run_event& cr, + bool warning_success) + { + HANDLER_DIAG; + + l3 ([&]{trace << "check_run event { " << cr << " }";}); + + // The overall plan is as follows: + // + // 1. Load service data. + // + // 2. If the tenant is archived, then fail (re-create) both the check run + // and the conclusion with appropriate diagnostics. + // + // 3. If the check run is in the queued state, then do nothing. + // + // 4. Re-create the check run in the queued state and the conclusion in + // the building state. Note: do in a single request to make sure we + // either "win" or "loose" the potential race for both (important + // for #7). + // + // 5. Call the rebuild() function to attempt to schedule a rebuild. Pass + // the update function that does the following (if called): + // + // a. Save new node ids. + // + // b. Update the check run state (may also not exist). + // + // c. Clear the completed flag if true. + // + // 6. If the result of rebuild() indicates the tenant is archived, then + // fail (update) both the check run and conclusion with appropriate + // diagnostics. + // + // 7. If original state is queued (no rebuild was scheduled), then fail + // (update) both the check run and the conclusion. + // + // Note that while conceptually we are updating existing check runs, in + // practice we have to re-create as new check runs in order to replace the + // existing ones because GitHub does not allow transitioning out of the + // built state. + + // Get a new installation access token. + // + auto get_iat = [this, &trace, &error, &cr] () + -> optional<gh_installation_access_token> + { + optional<string> jwt (generate_jwt (cr.check_run.app_id, trace, error)); + if (!jwt) + return nullopt; + + optional<gh_installation_access_token> iat ( + obtain_installation_access_token (cr.installation.id, + move (*jwt), + error)); + + if (iat) + l3 ([&]{trace << "installation_access_token { " << *iat << " }";}); + + return iat; + }; + + const string& repo_node_id (cr.repository.node_id); + const string& head_sha (cr.check_run.check_suite.head_sha); + + // Prepare the build and conclusion check runs. They are sent to GitHub in + // a single request (unless something goes wrong) so store them together + // from the outset. + // + brep::check_runs check_runs (2); + check_run& bcr (check_runs[0]); // Build check run + check_run& ccr (check_runs[1]); // Conclusion check run + + ccr.name = conclusion_check_run_name; + + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; + + // Load the service data, failing the check runs if the tenant has been + // archived. + // + service_data sd; + string tenant_id; + { + // Service id that uniquely identifies the CI tenant. + // + string sid (repo_node_id + ':' + head_sha); + + optional<tenant_data> d (find (*build_db_, "ci-github", sid)); + if (!d) + { + // No such tenant. + // + fail << "check run " << cr.check_run.node_id + << " re-requested but tenant_service with id " << sid + << " does not exist"; + } + + tenant_service& ts (d->service); + + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + fail << "failed to parse service data: " << e; + } + + if (!sd.conclusion_node_id) + fail << "no conclusion node id for check run " << cr.check_run.node_id; + + tenant_id = d->tenant_id; + + // Get a new IAT if the one from the service data has expired. + // + if (system_clock::now () > sd.installation_access.expires_at) + { + if ((new_iat = get_iat ())) + iat = &*new_iat; + else + throw server_error (); + } + else + iat = &sd.installation_access; + + if (d->archived) // Tenant is archived + { + // Fail (update) the check runs. + // + gq_built_result br ( + make_built_result ( + result_status::error, warning_success, + "Unable to rebuild individual configuration: build has " + "been archived")); + + // Try to update the conclusion check run even if the first update + // fails. + // + bool f (false); // Failed. + + if (gq_update_check_run (error, bcr, iat->token, + repo_node_id, cr.check_run.node_id, + br)) + { + l3 ([&]{trace << "updated check_run { " << bcr << " }";}); + } + else + { + error << "check_run " << cr.check_run.node_id + << ": unable to update check run"; + f = true; + } + + if (gq_update_check_run (error, ccr, iat->token, + repo_node_id, *sd.conclusion_node_id, + move (br))) + { + l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";}); + } + else + { + error << "check_run " << cr.check_run.node_id + << ": unable to update conclusion check run"; + f = true; + } + + // Fail the handler if either of the check runs could not be + // updated. + // + if (f) + throw server_error (); + + return true; + } + } + + // Fail if it's the conclusion check run that is being re-requested. + // + // Expect that if the user selects re-run all failed checks we will + // receive multiple check runs, one of which will be the conclusion. And + // if we fail it while it happens to arrive last, then we will end up in + // the wrong overall state (real check run is building while conclusion is + // failed). It seems the best we can do is to ignore it: if the user did + // request a rebuild of the conclusion check run explicitly, there will be + // no change, which is not ideal but is still an indication that this + // operation is not supported. + // + if (cr.check_run.name == conclusion_check_run_name) + { + l3 ([&]{trace << "re-requested conclusion check_run";}); + +#if 0 + if (!sd.conclusion_node_id) + fail << "no conclusion node id for check run " << cr.check_run.node_id; + + gq_built_result br ( + make_built_result (result_status::error, warning_success, + "Conclusion check run cannot be rebuilt")); + + // Fail (update) the conclusion check run. + // + if (gq_update_check_run (error, ccr, iat->token, + repo_node_id, *sd.conclusion_node_id, + move (br))) + { + l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";}); + } + else + { + fail << "check run " << cr.check_run.node_id + << ": unable to update conclusion check run " + << *sd.conclusion_node_id; + } +#endif + + return true; + } + + // Parse the check_run's details_url to extract build id. + // + // While this is a bit hackish, there doesn't seem to be a better way + // (like associating custom data with a check run). Note that the GitHub + // UI only allows rebuilding completed check runs, so the details URL + // should be there. + // + optional<build_id> bid (parse_details_url (cr.check_run.details_url)); + if (!bid) + { + fail << "check run " << cr.check_run.node_id + << ": failed to extract build id from details_url"; + } + + // Initialize the check run (`bcr`) with state from the service data. + // + { + // Search for the check run in the service data. + // + // Note that we look by name in case node id got replaced by a racing + // re-request (in which case we ignore this request). + // + auto i (find_if (sd.check_runs.begin (), sd.check_runs.end (), + [&cr] (const check_run& scr) + { + return scr.name == cr.check_run.name; + })); + + if (i == sd.check_runs.end ()) + fail << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "re-requested but does not exist in service data"; + + // Do nothing if node ids don't match. + // + if (i->node_id && *i->node_id != cr.check_run.node_id) + { + l3 ([&]{trace << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "node id has changed in service data";}); + return true; + } + + // Do nothing if the build is already queued. + // + if (i->state == build_state::queued) + { + l3 ([&]{trace << "ignoring already-queued check run";}); + return true; + } + + bcr.name = i->name; + bcr.build_id = i->build_id; + bcr.state = i->state; + } + + // Transition the build and conclusion check runs out of the built state + // (or any other state) by re-creating them. + // + bcr.state = build_state::queued; + bcr.state_synced = false; + bcr.details_url = cr.check_run.details_url; + bcr.description = {check_run_queued_title, check_run_queued_summary}; + + ccr.state = build_state::building; + ccr.state_synced = false; + ccr.details_url = details_url (tenant_id); + ccr.description = {conclusion_building_title, + conclusion_building_summary}; + + if (gq_create_check_runs (error, check_runs, iat->token, + cr.check_run.app_id, repo_node_id, head_sha)) + { + assert (bcr.state == build_state::queued); + assert (ccr.state == build_state::building); + + l3 ([&]{trace << "created check_run { " << bcr << " }";}); + l3 ([&]{trace << "created conclusion check_run { " << ccr << " }";}); + } + else + { + fail << "check run " << cr.check_run.node_id + << ": unable to re-create build and conclusion check runs"; + } + + // Request the rebuild and update service data. + // + bool race (false); + + // Callback function called by rebuild() to update the service data (but + // only if the build is actually restarted). + // + auto update_sd = [&error, &new_iat, &race, + tenant_id = move (tenant_id), + &cr, &bcr, &ccr] (const string& ti, + const tenant_service& ts, + build_state) -> optional<string> + { + // NOTE: this lambda may be called repeatedly (e.g., due to transaction + // being aborted) and so should not move out of its captures. + + race = false; // Reset. + + if (tenant_id != ti) + { + // The tenant got replaced since we loaded it but we managed to + // trigger a rebuild in the new tenant. Who knows whose check runs are + // visible, so let's fail ours similar to the cases below. + // + race = true; + return nullopt; + } + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullopt; + } + + // Note that we again look by name in case node id got replaced by a + // racing re-request. In this case, however, it's impossible to decide + // who won that race, so let's fail the check suite to be on the safe + // side (in a sense, similar to the rebuild() returning queued below). + // + auto i (find_if ( + sd.check_runs.begin (), sd.check_runs.end (), + [&cr] (const check_run& scr) + { + return scr.name == cr.check_run.name; + })); + + if (i == sd.check_runs.end ()) + { + error << "check_run " << cr.check_run.node_id + << " (" << cr.check_run.name << "): " + << "re-requested but does not exist in service data"; + return nullopt; + } + + if (i->node_id && *i->node_id != cr.check_run.node_id) + { + // Keep the old conclusion node id to make sure any further state + // transitions are ignored. A bit of a hack. + // + race = true; + return nullopt; + } + + *i = bcr; // Update with new node_id, state, state_synced. + + sd.conclusion_node_id = ccr.node_id; + sd.completed = false; + + // Save the IAT if we created a new one. + // + if (new_iat) + sd.installation_access = *new_iat; + + return sd.json (); + }; + + optional<build_state> bs (rebuild (*build_db_, retry_, *bid, update_sd)); + + // If the build has been archived or re-enqueued since we loaded the + // service data, fail (by updating) both the build check run and the + // conclusion check run. Otherwise the build has been successfully + // re-enqueued so do nothing further. + // + if (!race && bs && *bs != build_state::queued) + return true; + + gq_built_result br; // Built result for both check runs. + + if (race || bs) // Race or re-enqueued. + { + // The re-enqueued case: this build has been re-enqueued since we first + // loaded the service data. This could happen if the user clicked + // "re-run" multiple times and another handler won the rebuild() race. + // + // However the winner of the check runs race cannot be determined. + // + // Best case the other handler won the check runs race as well and + // thus everything will proceed normally. Our check runs will be + // invisible and disregarded. + // + // Worst case we won the check runs race and the other handler's check + // runs -- the ones that will be updated by the build_*() notifications + // -- are no longer visible, leaving things quite broken. + // + // Either way, we fail our check runs. In the best case scenario it + // will have no effect; in the worst case scenario it lets the user + // know something has gone wrong. + // + br = make_built_result (result_status::error, warning_success, + "Unable to rebuild, try again"); + } + else // Archived. + { + // The build has expired since we loaded the service data. Most likely + // the tenant has been archived. + // + br = make_built_result ( + result_status::error, warning_success, + "Unable to rebuild individual configuration: build has been archived"); + } + + // Try to update the conclusion check run even if the first update fails. + // + bool f (false); // Failed. + + // Fail the build check run. + // + if (gq_update_check_run (error, bcr, iat->token, + repo_node_id, *bcr.node_id, + br)) + { + l3 ([&]{trace << "updated check_run { " << bcr << " }";}); + } + else + { + error << "check run " << cr.check_run.node_id + << ": unable to update (replacement) check run " + << *bcr.node_id; + f = true; + } + + // Fail the conclusion check run. + // + if (gq_update_check_run (error, ccr, iat->token, + repo_node_id, *ccr.node_id, + move (br))) + { + l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";}); + } + else + { + error << "check run " << cr.check_run.node_id + << ": unable to update conclusion check run " << *ccr.node_id; + f = true; + } + + // Fail the handler if either of the check runs could not be updated. + // + if (f) + throw server_error (); + + return true; + } + + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded (const string& ti, + tenant_service&& ts, + const diag_epilogue& log_writer) const noexcept + { + // NOTE: this function is noexcept and should not throw. + + NOTIFICATION_DIAG (log_writer); + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullptr; + } + + return sd.pre_check + ? build_unloaded_pre_check (move (ts), move (sd), log_writer) + : build_unloaded_load (ti, move (ts), move (sd), log_writer); + } + + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded_pre_check (tenant_service&& ts, + service_data&& sd, + const diag_epilogue& log_writer) const noexcept + try + { + // NOTE: this function is noexcept and should not throw. + // + // In a few places where invalid_argument is unlikely to be thrown and/or + // would indicate that things are seriously broken we let it propagate to + // the function catch block where the pre-check tenant will be canceled + // (otherwise we could end up in an infinite loop, e.g., because the + // problematic arguments won't change). + + NOTIFICATION_DIAG (log_writer); + + // We get here for PRs only (but both local and remote). The overall + // plan is as follows: + // + // 1. Ask for the mergeability/behind status/test merge commit. + // + // 2. If not ready, get called again. + // + // 3. If not mergeable, behind, or different head (head changed while + // waiting for merge commit and thus differs from what's in the + // service_data), cancel the pre-check tenant and do nothing. + // + // 4. Otherwise, create an unloaded CI tenant and cancel ourselves. Note + // that all re-requested cases are handled elsewhere. + // + // Note that in case of a mixed local/remote case, whether we CI the head + // commit or test merge commit will be racy and there is nothing we can do + // about (the purely local case can get "upgraded" to mixed after we have + // started the CI job). + // + + // Request PR pre-check info (triggering the generation of the test merge + // commit on the GitHub's side). + // + // Let unlikely invalid_argument propagate (see above). + // + optional<gq_pr_pre_check_info> pc ( + gq_fetch_pull_request_pre_check_info (error, + sd.installation_access.token, + *sd.pr_node_id)); + + if (!pc) + { + // Test merge commit not available yet: get called again to retry. + // + return nullptr; + } + + // Create the CI tenant if nothing is wrong, otherwise issue diagnostics. + // + if (pc->behind) + { + l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id + << ": head is behind base";}); + } + else if (!pc->merge_commit_sha) + { + l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id + << ": not auto-mergeable";}); + } + else if (pc->head_sha != sd.report_sha) + { + l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id + << ": head commit has changed";}); + } + else + { + // Create the CI tenant by reusing the pre-check service data. + // + sd.pre_check = false; + + // Set the service data's check_sha if this is a remote PR. The test + // merge commit refs are located at refs/pull/<PR-number>/merge. + // + if (sd.kind == service_data::remote) + sd.check_sha = *pc->merge_commit_sha; + + // Service id that will uniquely identify the CI tenant. + // + string sid (sd.repository_node_id + ':' + sd.report_sha); + + // Create an unloaded CI tenant, doing nothing if one already exists + // (which could've been created by a head branch push or another PR + // sharing the same head commit). Note that the tenant's reference count + // is incremented in all cases. + // + // Note: use no delay since we need to (re)create the synthetic + // conclusion check run as soon as possible. + // + // Note that we use the create() API instead of start() since duplicate + // management is not available in start(). + // + // After this call we will start getting the build_unloaded() + // notifications until (1) we load the tenant, (2) we cancel it, or (3) + // it gets archived after some timeout. + // + try + { + if (auto pr = create (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + tenant_service (sid, "ci-github", sd.json ()), + chrono::seconds (30) /* interval */, + chrono::seconds (0) /* delay */, + duplicate_tenant_mode::ignore)) + { + if (pr->second == duplicate_tenant_result::ignored) + { + // This PR is sharing a head commit with something else. + // + // If this is a local PR then it's probably the branch push, which + // is expected, so do nothing. + // + // If this is a remote PR then it could be anything (branch push, + // local PR, or another remote PR) which in turn means the CI + // result may end up being for head, not merge commit. There is + // nothing we can do about it on our side (the user can enable the + // head-behind-base protection on their side). + // + if (sd.kind == service_data::remote) + { + l3 ([&]{trace << "remote pull request " << *sd.pr_node_id + << ": CI tenant already exists for " << sid;}); + } + } + } + else + { + error << "pull request " << *sd.pr_node_id + << ": failed to create unloaded CI tenant " + << "with tenant_service id " << sid; + + // Fall through to cancel. + } + } + catch (const runtime_error& e) // Database retries exhausted. + { + error << "pull request " << *sd.pr_node_id + << ": failed to create unloaded CI tenant " + << "with tenant_service id " << sid + << ": " << e.what (); + + // Fall through to cancel. + } + } + + // Cancel the pre-check tenant. + // + try + { + if (!cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + ts.type, + ts.id)) + { + // Should never happen (no such tenant). + // + error << "pull request " << *sd.pr_node_id + << ": failed to cancel pre-check tenant with tenant_service id " + << ts.id; + } + } + catch (const runtime_error& e) // Database retries exhausted. + { + error << "pull request " << *sd.pr_node_id + << ": failed to cancel pre-check tenant with tenant_service id " + << ts.id << ": " << e.what (); + } + + return nullptr; + } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + error << "pull request " << *sd.pr_node_id + << ": unhandled exception: " << e.what (); + + // Cancel the pre-check tenant otherwise we could end up in an infinite + // loop (see top of function). + // + try + { + if (cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + ts.type, + ts.id)) + l3 ([&]{trace << "canceled pre-check tenant " << ts.id;}); + } + catch (const runtime_error& e) // Database retries exhausted. + { + l3 ([&]{trace << "failed to cancel pre-check tenant " << ts.id << ": " + << e.what ();}); + } + + return nullptr; + } + + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_unloaded_load (const string& tenant_id, + tenant_service&& ts, + service_data&& sd, + const diag_epilogue& log_writer) const noexcept + try + { + // NOTE: this function is noexcept and should not throw. + // + // In a few places where invalid_argument is unlikely to be thrown and/or + // would indicate that things are seriously broken we let it propagate to + // the function catch block where the tenant will be canceled (otherwise + // we could end up in an infinite loop, e.g., because the problematic + // arguments won't change). + + NOTIFICATION_DIAG (log_writer); + + // Load the tenant, which is essentially the same for both branch push and + // PR. The overall plan is as follows: + // + // - Create synthetic conclusion check run with the in-progress state. If + // unable to, get called again to re-try. + // + // - Load the tenant. If unable to, fail the conclusion check run. + // + // - Update service data. + // + + // Get a new installation access token if the current one has expired. + // + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; + + if (system_clock::now () > sd.installation_access.expires_at) + { + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) + { + new_iat = obtain_installation_access_token (sd.installation_id, + move (*jwt), + error); + if (new_iat) + iat = &*new_iat; + } + } + else + iat = &sd.installation_access; + + if (iat == nullptr) + return nullptr; // Try again on the next call. + + // Create a synthetic check run with an in-progress state. Return the + // check run on success or nullopt on failure. + // + auto create_synthetic_cr = [&tenant_id, + iat, + &sd, + &error, + this] (string name, + const string& title, + const string& summary) + -> optional<check_run> + { + check_run cr; + cr.name = move (name); + + // Let unlikely invalid_argument propagate (see above). + // + if (gq_create_check_run (error, + cr, + iat->token, + sd.app_id, + sd.repository_node_id, + sd.report_sha, + details_url (tenant_id), + build_state::building, + title, summary)) + { + return cr; + } + else + return nullopt; + }; + + // Update a synthetic check run with success or failure. Return the check + // run on success or nullopt on failure. + // + auto update_synthetic_cr = [iat, + &sd, + &error] (const string& node_id, + const string& name, + result_status rs, + string summary) -> optional<check_run> + { + assert (!node_id.empty ()); + + // Let unlikely invalid_argument propagate (see above). + // + gq_built_result br ( + make_built_result (rs, sd.warning_success, move (summary))); + + check_run cr; + cr.name = name; // For display purposes only. + + // Let unlikely invalid_argument propagate (see above). + // + if (gq_update_check_run (error, + cr, + iat->token, + sd.repository_node_id, + node_id, + move (br))) + { + assert (cr.state == build_state::built); + return cr; + } + else + return nullopt; + }; + + // (Re)create the synthetic conclusion check run first in order to convert + // a potentially completed check suite to building as early as possible. + // + // Note that there is a window between receipt of a check_suite or + // pull_request event and the first bot/worker asking for a task, which + // could be substantial. We could probably (also) try to (re)create the + // conclusion checkrun in the webhook handler. @@ Maybe/later. + // + string conclusion_node_id; // Conclusion check run node ID. + + if (!sd.conclusion_node_id) + { + if (auto cr = create_synthetic_cr (conclusion_check_run_name, + conclusion_building_title, + conclusion_building_summary)) + { + l3 ([&]{trace << "created check_run { " << *cr << " }";}); + + conclusion_node_id = move (*cr->node_id); + } + } + + const string& effective_conclusion_node_id ( + sd.conclusion_node_id + ? *sd.conclusion_node_id + : conclusion_node_id); + + // Load the CI tenant if the conclusion check run was created. + // + if (!effective_conclusion_node_id.empty ()) + { + string ru; // Repository URL. + + // CI the test merge commit for remote PRs and the head commit for + // everything else (branch push or local PRs). + // + if (sd.kind == service_data::remote) + { + // E.g. #pull/28/merge@1b6c9a361086ed93e6f1e67189e82d52de91c49b + // + ru = sd.repository_clone_url + "#pull/" + to_string (*sd.pr_number) + + "/merge@" + sd.check_sha; + } + else + ru = sd.repository_clone_url + '#' + sd.check_sha; + + // Let unlikely invalid_argument propagate (see above). + // + repository_location rl (move (ru), repository_type::git); + + try + { + optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, + move (ts), + move (rl))); + + if (!r || r->status != 200) + { + // Let unlikely invalid_argument propagate (see above). + // + if (auto cr = update_synthetic_cr (effective_conclusion_node_id, + conclusion_check_run_name, + result_status::error, + to_check_run_summary (r))) + { + l3 ([&]{trace << "updated check_run { " << *cr << " }";}); + } + else + { + // Nothing really we can do in this case since we will not receive + // any further notifications. Log the error as a last resort. + + error << "failed to load CI tenant " << ts.id + << " and unable to update conclusion"; + } + + return nullptr; // No need to update service data in this case. + } + } + catch (const runtime_error& e) // Database retries exhausted. + { + error << "failed to load CI tenant " << ts.id << ": " << e.what (); + + // Fall through to retry on next call. + } + } + + if (!new_iat && conclusion_node_id.empty ()) + return nullptr; // Nothing to save (but potentially retry on next call). + + return [&error, + tenant_id, + iat = move (new_iat), + cni = move (conclusion_node_id)] + (const string& ti, + const tenant_service& ts) -> optional<string> + { + // NOTE: this lambda may be called repeatedly (e.g., due to + // transaction being aborted) and so should not move out of its + // captures. + + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullopt; + } + + if (iat) + sd.installation_access = *iat; + + if (!cni.empty ()) + sd.conclusion_node_id = cni; + + return sd.json (); + }; + } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + error << "CI tenant " << ts.id << ": unhandled exception: " << e.what (); + + // Cancel the tenant otherwise we could end up in an infinite loop (see + // top of function). + // + try + { + if (cancel (error, warn, verb_ ? &trace : nullptr, + *build_db_, retry_, ts.type, ts.id)) + l3 ([&]{trace << "canceled CI tenant " << ts.id;}); + } + catch (const runtime_error& e) // Database retries exhausted. + { + l3 ([&]{trace << "failed to cancel CI tenant " << ts.id + << ": " << e.what ();}); + } + + return nullptr; + } + + // Build state change notifications (see tenant-services.hxx for + // background). Mapping our state transitions to GitHub pose multiple + // problems: + // + // 1. In our model we have the building->queued (interrupted) and + // built->queued (rebuild) transitions. We are going to ignore both of + // them when notifying GitHub. The first is not important (we expect the + // state to go back to building shortly). The second should normally not + // happen and would mean that a completed check suite may go back on its + // conclusion (which would be pretty confusing for the user). Note that + // the ->queued state transition of a check run rebuild triggered by + // us is handled directly in handle_check_run_rerequest(). + // + // So, for GitHub notifications, we only have the following linear + // transition sequence: + // + // -> queued -> building -> built + // + // Note, however, that because we ignore certain transitions, we can now + // observe "degenerate" state changes that we need to ignore: + // + // building -> [queued] -> building + // built -> [queued] -> ... + // + // 2. As mentioned in tenant-services.hxx, we may observe the notifications + // as arriving in the wrong order. Unfortunately, GitHub provides no + // mechanisms to help with that. In fact, GitHub does not even prevent + // the creation of multiple check runs with the same name (it will always + // use the last created instance, regardless of the status, timestamps, + // etc). As a result, we cannot, for example, rely on the failure to + // create a new check run in response to the queued notification as an + // indication of a subsequent notification (e.g., building) having + // already occurred. + // + // The only aid in this area that GitHub provides is that it prevents + // updating a check run in the built state to a former state (queued or + // building). But one can still create a new check run with the same name + // and a former state. + // + // (Note that we should also be careful if trying to take advantage of + // this "check run override" semantics: each created check run gets a new + // URL and while the GitHub UI will always point to the last created when + // showing the list of check runs, if the user is already on the previous + // check run's URL, nothing will automatically cause them to be + // redirected to the new URL. And so the user may sit on the abandoned + // check run waiting forever for it to be completed.) + // + // As a result, we will deal with the out of order problem differently + // depending on the notification: + // + // queued Skip if there is already a check run in service data, + // otherwise create new. + // + // building Skip if there is no check run in service data or it's + // not in the queued state, otherwise update. + // + // built Update if there is check run in service data unless its + // state is built, otherwise create new. + // + // The rationale for this semantics is as follows: the building + // notification is a "nice to have" and can be skipped if things are not + // going normally. In contrast, the built notification cannot be skipped + // and we must either update the existing check run or create a new one + // (hopefully overriding the one created previously, if any). Note that + // the likelihood of the built notification being performed at the same + // time as queued/building is quite low (unlike queued and building). + // + // Note also that with this semantics it's unlikely but possible that we + // attempt to update the service data in the wrong order. Specifically, it + // feels like this should not be possible in the ->building transition + // since we skip the building notification unless the check run in the + // service data is already in the queued state. But it is theoretically + // possible in the ->built transition. For example, we may be updating + // the service data for the queued notification after it has already been + // updated by the built notification. In such cases we should not be + // overriding the latter state (built) with the former (queued). + // + // 3. We may not be able to "conclusively" notify GitHub, for example, due + // to a transient network error. The "conclusively" part means that the + // notification may or may not have gone through (though it feels the + // common case will be the inability to send the request rather than + // receive the reply). + // + // In such cases, we record in the service data that the notification was + // not synchronized and in subsequent notifications we do the best we can: + // if we have node_id, then we update, otherwise, we create (potentially + // overriding the check run created previously). + // + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_queued (const string& tenant_id, + const tenant_service& ts, + const vector<build>& builds, + optional<build_state> istate, + const build_queued_hints& hs, + const diag_epilogue& log_writer) const noexcept + try + { + // NOTE: this function is noexcept and should not throw. + + NOTIFICATION_DIAG (log_writer); + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullptr; + } + + // Ignore attempts to add new builds to a completed check suite. This can + // happen, for example, if a new build configuration is added before + // the tenant is archived. + // + if (sd.completed) + return nullptr; + + // The builds for which we will be creating check runs. + // + vector<reference_wrapper<const build>> bs; + brep::check_runs crs; // Parallel to bs. + + // Exclude the builds for which we won't be creating check runs. + // + for (const build& b: builds) + { + string bid (gh_check_run_name (b)); // Full build id. + + if (const check_run* scr = sd.find_check_run (bid)) + { + // Another notification has already stored this check run. + // + if (!istate) + { + // Out of order queued notification. + // + warn << "check run " << bid << ": out of order queued " + << "notification; existing state: " << scr->state_string (); + } + else if (*istate == build_state::built) + { + // Unexpected built->queued transition (rebuild). + // + // Note that handle_check_run_rerequest() may trigger an "expected" + // rebuild, in which case our state should be set to queued. + // + if (scr->state != build_state::queued || !scr->state_synced) + warn << "check run " << bid << ": unexpected rebuild"; + } + else + { + // Ignore interrupted. + // + assert (*istate == build_state::building); + } + } + else + { + // No stored check run for this build so prepare to create one. + // + bs.push_back (b); + + crs.push_back ( + check_run {move (bid), + gh_check_run_name (b, &hs), + nullopt, /* node_id */ + build_state::queued, + false /* state_synced */, + nullopt /* status */, + details_url (b), + check_run::description_type {check_run_queued_title, + check_run_queued_summary}}); + } + } + + if (bs.empty ()) // Nothing to do. + return nullptr; + + // Get a new installation access token if the current one has expired. + // + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; + + if (system_clock::now () > sd.installation_access.expires_at) + { + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) + { + new_iat = obtain_installation_access_token (sd.installation_id, + move (*jwt), + error); + if (new_iat) + iat = &*new_iat; + } + } + else + iat = &sd.installation_access; + + // Note: we treat the failure to obtain the installation access token the + // same as the failure to notify GitHub (state is updated by not marked + // synced). + // + if (iat != nullptr) + { + // Create a check_run for each build as a single request. + // + // Let unlikely invalid_argument propagate. + // + if (gq_create_check_runs (error, + crs, + iat->token, + sd.app_id, + sd.repository_node_id, + sd.report_sha)) + { + for (const check_run& cr: crs) + { + // We can only create a check run in the queued state. + // + assert (cr.state == build_state::queued); + l3 ([&]{trace << "created check_run { " << cr << " }";}); + } + } + } + + return [tenant_id, + bs = move (bs), + iat = move (new_iat), + crs = move (crs), + error = move (error), + warn = move (warn)] (const string& ti, + const tenant_service& ts) -> optional<string> + { + // NOTE: this lambda may be called repeatedly (e.g., due to transaction + // being aborted) and so should not move out of its captures. + + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullopt; + } + + if (iat) + sd.installation_access = *iat; + + for (size_t i (0); i != bs.size (); ++i) + { + const check_run& cr (crs[i]); + + // Note that this service data may not be the same as what we observed + // in the build_queued() function above. For example, some check runs + // that we have queued may have already transitioned to built. So we + // skip any check runs that are already present. + // + if (const check_run* scr = sd.find_check_run (cr.build_id)) + { + // Doesn't looks like printing new/existing check run node_id will + // be of any help. + // + warn << "check run " << cr.build_id << ": out of order queued " + << "notification service data update; existing state: " + << scr->state_string (); + } + else + sd.check_runs.push_back (cr); + } + + return sd.json (); + }; + } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + error << "CI tenant " << ts.id << ": unhandled exception: " << e.what (); + + return nullptr; + } + + function<optional<string> (const string&, const tenant_service&)> ci_github:: + build_building (const string& tenant_id, + const tenant_service& ts, + const build& b, + const diag_epilogue& log_writer) const noexcept + try + { + // NOTE: this function is noexcept and should not throw. + + NOTIFICATION_DIAG (log_writer); + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullptr; + } + + // Similar to build_queued(), ignore attempts to add new builds to a + // completed check suite. + // + if (sd.completed) + return nullptr; + + optional<check_run> cr; // Updated check run. + string bid (gh_check_run_name (b)); // Full build id. + + if (check_run* scr = sd.find_check_run (bid)) // Stored check run. + { + // Update the check run if it exists on GitHub and the queued + // notification updated the service data, otherwise do nothing. + // + if (scr->state == build_state::queued) + { + if (scr->node_id) + { + cr = move (*scr); + cr->state_synced = false; + } + else + { + // Network error during queued notification (state unsynchronized), + // ignore. + // + l3 ([&]{trace << "unsynchronized check run " << bid;}); + } + } + else + { + // Ignore interrupted (building -> queued -> building transition). + // + if (scr->state != build_state::building) + { + warn << "check run " << bid << ": out of order building " + << "notification; existing state: " << scr->state_string (); + } + } + } + else + warn << "check run " << bid << ": out of order building " + << "notification; no check run state in service data"; + + if (!cr) + return nullptr; + + // Get a new installation access token if the current one has expired. + // + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; + + if (system_clock::now () > sd.installation_access.expires_at) + { + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) + { + new_iat = obtain_installation_access_token (sd.installation_id, + move (*jwt), + error); + if (new_iat) + iat = &*new_iat; + } + } + else + iat = &sd.installation_access; + + // Note: we treat the failure to obtain the installation access token the + // same as the failure to notify GitHub (state is updated but not marked + // synced). + // + if (iat != nullptr) + { + // Let unlikely invalid_argument propagate. + // + if (gq_update_check_run (error, + *cr, + iat->token, + sd.repository_node_id, + *cr->node_id, + build_state::building, + check_run_building_title, + check_run_building_summary)) + { + // Do nothing further if the state was already built on GitHub (note + // that this is based on the above-mentioned special GitHub semantics + // of preventing changes to the built status). + // + if (cr->state == build_state::built) + { + warn << "check run " << bid << ": already in built state on GitHub"; + return nullptr; + } + + assert (cr->state == build_state::building); + l3 ([&]{trace << "updated check_run { " << *cr << " }";}); + } + } + + return [tenant_id, + iat = move (new_iat), + cr = move (*cr), + error = move (error), + warn = move (warn)] (const string& ti, + const tenant_service& ts) -> optional<string> + { + // NOTE: this lambda may be called repeatedly (e.g., due to transaction + // being aborted) and so should not move out of its captures. + + if (tenant_id != ti) + return nullopt; // Do nothing if the tenant has been replaced. + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullopt; + } + + if (iat) + sd.installation_access = *iat; + + // Update the check run only if it is in the queued state. + // + if (check_run* scr = sd.find_check_run (cr.build_id)) + { + if (scr->state == build_state::queued) + *scr = cr; + else + { + warn << "check run " << cr.build_id << ": out of order building " + << "notification service data update; existing state: " + << scr->state_string (); + } + } + else + warn << "check run " << cr.build_id << ": service data state has " + << "disappeared"; + + return sd.json (); + }; + } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + string bid (gh_check_run_name (b)); // Full build id. + + error << "check run " << bid << ": unhandled exception: " << e.what(); + + return nullptr; + } + + function<pair<optional<string>, bool> (const string&, + const tenant_service&)> ci_github:: + build_built (const string& tenant_id, + const tenant_service& ts, + const build& b, + const diag_epilogue& log_writer) const noexcept + try + { + // NOTE: this function is noexcept and should not throw. + + NOTIFICATION_DIAG (log_writer); + + // @@ TODO Include ts.id in diagnostics? Check run build ids alone seem + // kind of meaningless. Log lines get pretty long this way however. + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return nullptr; + } + + // Similar to build_queued(), ignore attempts to add new builds to a + // completed check suite. + // + if (sd.completed) + return nullptr; + + // Here we only update the state of this check run. If there are no more + // unbuilt ones, then the synthetic conclusion check run will be updated + // in build_completed(). Note that determining whether we have no more + // unbuilt would be racy here so instead we do it in the service data + // update function that we return. + + check_run cr; // Updated check run. + { + string bid (gh_check_run_name (b)); // Full build id. + + if (check_run* scr = sd.find_check_run (bid)) + { + if (scr->state != build_state::building) + { + warn << "check run " << bid << ": out of order built notification; " + << "existing state: " << scr->state_string (); + } + + // Do nothing if already built (e.g., rebuild). + // + if (scr->state == build_state::built) + return nullptr; + + cr = move (*scr); + } + else + { + warn << "check run " << bid << ": out of order built notification; " + << "no check run state in service data"; + + // Note that we have no hints here and so have to use the full build + // id for name. + // + cr.build_id = move (bid); + cr.name = cr.build_id; + } + + cr.state_synced = false; + } + + // Get a new installation access token if the current one has expired. + // + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; + + if (system_clock::now () > sd.installation_access.expires_at) + { + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) + { + new_iat = obtain_installation_access_token (sd.installation_id, + move (*jwt), + error); + if (new_iat) + iat = &*new_iat; + } + } + else + iat = &sd.installation_access; + + // Note: we treat the failure to obtain the installation access token the + // same as the failure to notify GitHub (state is updated but not marked + // synced). + // + if (iat != nullptr) + { + // Prepare the check run's summary field (the build information in an + // XHTML table). + // + string sm; // Summary. + { + using namespace web::xhtml; + + // Note: let all serialization exceptions propagate. The XML + // serialization code can throw bad_alloc or xml::serialization in + // case of I/O failures, but we're serializing to a string stream so + // both exceptions are unlikely. + // + ostringstream os; + xml::serializer s (os, "check_run_summary"); + + // This hack is required to disable XML element name prefixes (which + // GitHub does not like). Note that this adds an xmlns declaration for + // the XHTML namespace which for now GitHub appears to ignore. If that + // ever becomes a problem, then we should redo this with raw XML + // serializer calls. + // + struct table: element + { + table (): element ("table") {} + + void + start (xml::serializer& s) const override + { + s.start_element (xmlns, name); + s.namespace_decl (xmlns, ""); + } + } TABLE; + + // Serialize a result row (colored circle, result text, log URL) for + // an operation and result_status. + // + auto tr_result = [this, &b] (xml::serializer& s, + const string& op, + result_status rs) + { + // The log URL. + // + string lu (build_log_url (options_->host (), + options_->root (), + b, + op != "result" ? &op : nullptr)); + + s << TR + << TD << EM << op << ~EM << ~TD + << TD + << circle (rs) << ' ' + << CODE << to_string (rs) << ~CODE + << " (" << A << HREF << lu << ~HREF << "log" << ~A << ')' + << ~TD + << ~TR; + }; + + // Serialize the summary to an XHTML table. + // + s << TABLE + << TBODY; + + tr_result (s, "result", *b.status); + + s << TR + << TD << EM << "package" << ~EM << ~TD + << TD << CODE << b.package_name << ~CODE << ~TD + << ~TR + << TR + << TD << EM << "version" << ~EM << ~TD + << TD << CODE << b.package_version << ~CODE << ~TD + << ~TR + << TR + << TD << EM << "toolchain" << ~EM << ~TD + << TD + << CODE + << b.toolchain_name << '-' << b.toolchain_version.string () + << ~CODE + << ~TD + << ~TR + << TR + << TD << EM << "target" << ~EM << ~TD + << TD << CODE << b.target.string () << ~CODE << ~TD + << ~TR + << TR + << TD << EM << "target config" << ~EM << ~TD + << TD << CODE << b.target_config_name << ~CODE << ~TD + << ~TR + << TR + << TD << EM << "package config" << ~EM << ~TD + << TD << CODE << b.package_config_name << ~CODE << ~TD + << ~TR; + + for (const operation_result& r: b.results) + tr_result (s, r.operation, r.status); + + s << ~TBODY + << ~TABLE; + + sm = os.str (); + } + + gq_built_result br ( + make_built_result (*b.status, sd.warning_success, move (sm))); + + if (cr.node_id) + { + // Update existing check run to built. Let unlikely invalid_argument + // propagate. + // + if (gq_update_check_run (error, + cr, + iat->token, + sd.repository_node_id, + *cr.node_id, + move (br))) + { + assert (cr.state == build_state::built); + l3 ([&]{trace << "updated check_run { " << cr << " }";}); + } + } + else + { + // Create new check run. Let unlikely invalid_argument propagate. + // + // Note that we don't have build hints so will be creating this check + // run with the full build id as name. In the unlikely event that an + // out of order build_queued() were to run before we've saved this + // check run to the service data it will create another check run with + // the shortened name which will never get to the built state. + // + if (gq_create_check_run (error, + cr, + iat->token, + sd.app_id, + sd.repository_node_id, + sd.report_sha, + details_url (b), + move (br))) + { + assert (cr.state == build_state::built); + l3 ([&]{trace << "created check_run { " << cr << " }";}); + } + } + + if (cr.state_synced) + { + // Check run was created/updated successfully to built (with status we + // specified). + // + cr.status = b.status; + } + } + + return [tenant_id, + iat = move (new_iat), + cr = move (cr), + error = move (error), + warn = move (warn)] (const string& ti, + const tenant_service& ts) + { + // NOTE: this lambda may be called repeatedly (e.g., due to transaction + // being aborted) and so should not move out of its captures. + + // Do nothing if the tenant has been replaced. + // + if (tenant_id != ti) + return make_pair (optional<string> (), false); + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return make_pair (optional<string> (), false); + } + + // Feel like this could potentially happen in case of an out of order + // notification (see above). + // + if (sd.completed) + { + // @@ Perhaps this should be a warning but let's try error for now (we + // essentially missed a build, which could have failed). + // + error << "built notification for completed check suite"; + return make_pair (optional<string> (), false); + } + + if (iat) + sd.installation_access = *iat; + + // Only update the check_run state in service data if it matches the + // state (specifically, status) on GitHub. + // + if (cr.state_synced) + { + if (check_run* scr = sd.find_check_run (cr.build_id)) + { + // This will most commonly generate a duplicate warning (see above). + // We could save the old state and only warn if it differs but let's + // not complicate things for now. + // +#if 0 + if (scr->state != build_state::building) + { + warn << "check run " << cr.build_id << ": out of order built " + << "notification service data update; existing state: " + << scr->state_string (); + } +#endif + *scr = cr; // Note: also updates node id if created. + } + else + sd.check_runs.push_back (cr); + + // Determine of this check suite is completed. + // + sd.completed = find_if (sd.check_runs.begin (), sd.check_runs.end (), + [] (const check_run& scr) + { + return scr.state != build_state::built; + }) == sd.check_runs.end (); + } + + return make_pair (optional<string> (sd.json ()), sd.completed); + }; + } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + string bid (gh_check_run_name (b)); // Full build id. + + error << "check run " << bid << ": unhandled exception: " << e.what (); + + return nullptr; + } + + void ci_github:: + build_completed (const string& /* tenant_id */, + const tenant_service& ts, + const diag_epilogue& log_writer) const noexcept + try + { + // NOTE: this function is noexcept and should not throw. + + NOTIFICATION_DIAG (log_writer); + + service_data sd; + try + { + sd = service_data (*ts.data); + } + catch (const invalid_argument& e) + { + error << "failed to parse service data: " << e; + return; + } + + // This could have been reset by handle_check_run_rerequest(). + // + if (!sd.completed) + return; + + assert (!sd.check_runs.empty ()); + + // Here we need to update the state of the synthetic conclusion check run. + // + result_status result (result_status::success); + + // Conclusion check run summary. Will include the success/warning/failure + // count breakdown. + // + string summary; + { + // The success/warning/failure counts. + // + // Note that the warning count will be included in the success or + // failure count (depending on the value of sd.warning_success). + // + size_t succ_count (0), warn_count (0), fail_count (0); + + // Count a result_status under the appropriate category. + // + auto count = [&succ_count, + &warn_count, + &fail_count, + ws = sd.warning_success] (result_status rs) + { + switch (rs) + { + case result_status::success: ++succ_count; break; + + case result_status::error: + case result_status::abort: + case result_status::abnormal: ++fail_count; break; + + case result_status::warning: + { + ++warn_count; + + if (ws) + ++succ_count; + else + ++fail_count; + + break; + } + + case result_status::skip: + case result_status::interrupt: + { + assert (false); + } + } + }; + + for (const check_run& cr: sd.check_runs) + { + assert (cr.state == build_state::built && cr.status); + + result |= *cr.status; + count (*cr.status); + } + + // Construct the conclusion check run summary. + // + ostringstream os; + + // Note: the warning count has already been included in the success or + // failure count. + // + os << fail_count << " failed"; + if (!sd.warning_success && warn_count != 0) + os << " (" << warn_count << " due to warnings)"; + + os << ", " << succ_count << " succeeded"; + if (sd.warning_success && warn_count != 0) + os << " (" << warn_count << " with warnings)"; + + os << ", " << (succ_count + fail_count) << " total"; + + summary = os.str (); + } + + // Get a new installation access token if the current one has expired + // (unlikely since we just returned from build_built()). Note also that we + // are not saving the new token in the service data. + // + const gh_installation_access_token* iat (nullptr); + optional<gh_installation_access_token> new_iat; + + if (system_clock::now () > sd.installation_access.expires_at) + { + if (optional<string> jwt = generate_jwt (sd.app_id, trace, error)) + { + new_iat = obtain_installation_access_token (sd.installation_id, + move (*jwt), + error); + if (new_iat) + iat = &*new_iat; + } + } + else + iat = &sd.installation_access; + + // Note: we treat the failure to obtain the installation access token the + // same as the failure to notify GitHub. + // + if (iat != nullptr) + { + // Update the conclusion check run if all check runs are now built. + // + assert (sd.conclusion_node_id); + + gq_built_result br ( + make_built_result (result, sd.warning_success, move (summary))); + + check_run cr; + + // Set some fields for display purposes. + // + cr.node_id = *sd.conclusion_node_id; + cr.name = conclusion_check_run_name; + + // Let unlikely invalid_argument propagate. + // + if (gq_update_check_run (error, + cr, + iat->token, + sd.repository_node_id, + *sd.conclusion_node_id, + move (br))) + { + assert (cr.state == build_state::built); + l3 ([&]{trace << "updated conclusion check_run { " << cr << " }";}); + } + else + { + // Nothing we can do here except log the error. + // + error << "tenant_service id " << ts.id + << ": unable to update conclusion check run " + << *sd.conclusion_node_id; + } + } + } + catch (const std::exception& e) + { + NOTIFICATION_DIAG (log_writer); + + error << "unhandled exception: " << e.what (); + } + + string ci_github:: + details_url (const build& b) const + { + // This code is based on build_force_url() in mod/build.cxx. + // + return + options_->host () + + tenant_dir (options_->root (), b.tenant).string () + + "?builds=" + mime_url_encode (b.package_name.string ()) + + "&pv=" + mime_url_encode (b.package_version.string ()) + + "&tg=" + mime_url_encode (b.target.string ()) + + "&tc=" + mime_url_encode (b.target_config_name) + + "&pc=" + mime_url_encode (b.package_config_name) + + "&th=" + mime_url_encode (b.toolchain_name) + '-' + + b.toolchain_version.string (); + } + + string ci_github:: + details_url (const string& t) const + { + return + options_->host () + + tenant_dir (options_->root (), t).string () + + "?builds"; + } + + static optional<build_id> + parse_details_url (const string& details_url) + try + { + // See details_url() above for an idea of what the URL looks like. + + url u (details_url); + + build_id r; + + // Extract the tenant from the URL path. + // + // Example paths: + // + // @d2586f57-21dc-40b7-beb2-6517ad7917dd (37 characters) + // <brep-root>/@d2586f57-21dc-40b7-beb2-6517ad7917dd + // + if (!u.path) + return nullopt; + + { + size_t p (u.path->find ('@')); + if (p == string::npos || u.path->size () - p != 37) + return nullopt; // Tenant not found or too short. + + r.package.tenant = u.path->substr (p + 1); + } + + // Extract the rest of the build_id members from the URL query. + // + if (!u.query) + return nullopt; + + bool pn (false), pv (false), tg (false), tc (false), pc (false), + th (false); + + // This URL query parsing code is based on + // web::apache::request::parse_url_parameters(). + // + for (const char* qp (u.query->c_str ()); qp != nullptr; ) + { + const char* vp (strchr (qp, '=')); + const char* ep (strchr (qp, '&')); + + if (vp == nullptr || (ep != nullptr && ep < vp)) + return nullopt; // Missing value. + + string n (mime_url_decode (qp, vp)); // Name. + + ++vp; // Skip '=' + + const char* ve (ep != nullptr ? ep : vp + strlen (vp)); // Value end. + + // Get the value as-is or URL-decode it. + // + auto rawval = [vp, ve] () { return string (vp, ve); }; + auto decval = [vp, ve] () { return mime_url_decode (vp, ve); }; + + auto make_version = [] (string&& v) + { + return canonical_version (brep::version (move (v))); + }; + + auto c = [&n] (bool& b, const char* s) + { + return n == s ? (b = true) : false; + }; + + if (c (pn, "builds")) r.package.name = package_name (decval ()); + else if (c (pv, "pv")) r.package.version = make_version (decval ()); + else if (c (tg, "tg")) r.target = target_triplet (decval ()); + else if (c (tc, "tc")) r.target_config_name = decval (); + else if (c (pc, "pc")) r.package_config_name = decval (); + else if (c (th, "th")) + { + // Toolchain name and version. E.g. "public-0.17.0" + + string v (rawval ()); + + // Note: parsing code based on mod/mod-builds.cxx. + // + size_t p (v.find ('-')); + if (p == string::npos || p >= v.size () - 1) + return nullopt; // Invalid format. + + r.toolchain_name = v.substr (0, p); + r.toolchain_version = make_version (v.substr (p + 1)); + } + + qp = ep != nullptr ? ep + 1 : nullptr; + } + + if (!pn || !pv || !tg || !tc || !pc || !th) + return nullopt; // Fail if any query parameters are absent. + + return r; + } + catch (const invalid_argument&) // Invalid url, brep::version, etc. + { + return nullopt; + } + + optional<string> ci_github:: + generate_jwt (uint64_t app_id, + const basic_mark& trace, + const basic_mark& error) const + { + string jwt; + try + { + // Look up the private key path for the app id and fail if not found. + // + const map<uint64_t, dir_path>& pks ( + options_->ci_github_app_id_private_key ()); + + auto pk (pks.find (app_id)); + if (pk == pks.end ()) + { + error << "unable to generate JWT: " + << "no private key configured for app id " << app_id; + return nullopt; + } + + // Set token's "issued at" time 60 seconds in the past to combat clock + // drift (as recommended by GitHub). + // + jwt = brep::generate_jwt ( + *options_, + pk->second, to_string (app_id), + chrono::seconds (options_->ci_github_jwt_validity_period ()), + chrono::seconds (60)); + + l3 ([&]{trace << "JWT: " << jwt;}); + } + catch (const system_error& e) + { + error << "unable to generate JWT (errno=" << e.code () << "): " << e; + return nullopt; + } + + return jwt; + } + + // There are three types of GitHub API authentication: + // + // 1) Authenticating as an app. Used to access parts of the API concerning + // the app itself such as getting the list of installations. (Need to + // authenticate as an app as part of authenticating as an app + // installation.) + // + // 2) Authenticating as an app installation (on a user or organisation + // account). Used to access resources belonging to the user/repository + // or organisation the app is installed in. + // + // 3) Authenticating as a user. Used to perform actions as the user. + // + // We need to authenticate as an app installation (2). + // + // How to authenticate as an app installation + // + // Reference: + // https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app/authenticating-as-a-github-app-installation + // + // The final authentication token we need is an installation access token + // (IAT), valid for one hour, which we will pass in the `Authentication` + // header of our Github API requests: + // + // Authorization: Bearer <INSTALLATION_ACCESS_TOKEN> + // + // To generate an IAT: + // + // - Generate a JSON Web Token (JWT) + // + // - Get the installation ID. This will be included in the webhook request + // in our case + // + // - Send a POST to /app/installations/<INSTALLATION_ID>/access_tokens which + // includes the JWT (`Authorization: Bearer <JWT>`). The response will + // include the IAT. Can pass the name of the repository included in the + // webhook request to restrict access, otherwise we get access to all + // repos covered by the installation if installed on an organisation for + // example. + // + optional<gh_installation_access_token> ci_github:: + obtain_installation_access_token (const string& iid, + string jwt, + const basic_mark& error) const + { + gh_installation_access_token iat; + try + { + // API endpoint. + // + string ep ("app/installations/" + iid + "/access_tokens"); + + uint16_t sc ( + github_post (iat, ep, strings {"Authorization: Bearer " + jwt})); + + // Possible response status codes from the access_tokens endpoint: + // + // 201 Created + // 401 Requires authentication + // 403 Forbidden + // 404 Resource not found + // 422 Validation failed, or the endpoint has been spammed. + // + // Note that the payloads of non-201 status codes are undocumented. + // + if (sc != 201) + { + error << "unable to get installation access token: error HTTP " + << "response status " << sc; + return nullopt; + } + + // Create a clock drift safety window. + // + iat.expires_at -= chrono::minutes (5); + } + // gh_installation_access_token (via github_post()) + // + catch (const json::invalid_json_input& e) + { + // Note: e.name is the GitHub API endpoint. + // + error << "malformed JSON in response from " << e.name << ", line: " + << e.line << ", column: " << e.column << ", byte offset: " + << e.position << ", error: " << e; + return nullopt; + } + catch (const invalid_argument& e) // github_post() + { + error << "malformed header(s) in response: " << e; + return nullopt; + } + catch (const system_error& e) // github_post() + { + error << "unable to get installation access token (errno=" << e.code () + << "): " << e.what (); + return nullopt; + } + + return iat; + } +} diff --git a/mod/mod-ci-github.hxx b/mod/mod-ci-github.hxx new file mode 100644 index 0000000..0c90bb1 --- /dev/null +++ b/mod/mod-ci-github.hxx @@ -0,0 +1,164 @@ +// file : mod/mod-ci-github.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_GITHUB_HXX +#define MOD_MOD_CI_GITHUB_HXX + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <mod/module-options.hxx> +#include <mod/database-module.hxx> + +#include <mod/ci-common.hxx> +#include <mod/tenant-service.hxx> + +#include <mod/mod-ci-github-gh.hxx> + +namespace brep +{ + struct service_data; + + class ci_github: public database_module, + private ci_start, + public tenant_service_build_unloaded, + public tenant_service_build_queued, + public tenant_service_build_building, + public tenant_service_build_built + { + public: + explicit + ci_github (tenant_service_map&); + + // Create a shallow copy (handling instance) if initialized and a deep + // copy (context exemplar) otherwise. + // + explicit + ci_github (const ci_github&, tenant_service_map&); + + virtual bool + handle (request&, response&); + + virtual const cli::options& + cli_options () const {return options::ci_github::description ();} + + virtual function<optional<string> (const string&, const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, + const diag_epilogue& log_writer) const noexcept override; + + function<optional<string> (const string&, const tenant_service&)> + build_unloaded_pre_check (tenant_service&&, + service_data&&, + const diag_epilogue&) const noexcept; + + function<optional<string> (const string&, const tenant_service&)> + build_unloaded_load (const string& tenant_id, + tenant_service&&, + service_data&&, + const diag_epilogue&) const noexcept; + + virtual function<optional<string> (const string&, const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, + const vector<build>&, + optional<build_state> initial_state, + const build_queued_hints&, + const diag_epilogue& log_writer) const noexcept override; + + virtual function<optional<string> (const string&, const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, + const build&, + const diag_epilogue& log_writer) const noexcept override; + + virtual function<pair<optional<string>, bool> (const string&, + const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, + const build&, + const diag_epilogue& log_writer) const noexcept override; + + virtual void + build_completed (const string& tenant_id, + const tenant_service& ts, + const diag_epilogue& log_writer) const noexcept override; + + private: + virtual void + init (cli::scanner&); + + // Handle push events (branch push). + // + // If warning_success is true, then map result_status::warning to SUCCESS + // and to FAILURE otherwise. + // + bool + handle_branch_push (gh_push_event, bool warning_success); + + // Handle the pull_request event `opened` and `synchronize` actions. + // + // If warning_success is true, then map result_status::warning to SUCCESS + // and to FAILURE otherwise. + // + bool + handle_pull_request (gh_pull_request_event, bool warning_success); + + // Handle the check_suite event `rerequested` action. + // + // If warning_success is true, then map result_status::warning to SUCCESS + // and to FAILURE otherwise. + // + bool + handle_check_suite_rerequest (gh_check_suite_event, bool warning_success); + + // Handle the check_suite event `completed` action. + // + // If warning_success is true, then map result_status::warning to SUCCESS + // and to FAILURE otherwise. + // + bool + handle_check_suite_completed (gh_check_suite_event, bool warning_success); + + // Handle the check_run event `rerequested` action. + // + // If warning_success is true, then map result_status::warning to SUCCESS + // and to FAILURE otherwise. + // + bool + handle_check_run_rerequest (const gh_check_run_event&, bool warning_success); + + // Build a check run details_url for a build. + // + string + details_url (const build&) const; + + // Build a check run details_url for a tenant. + // + string + details_url (const string& tenant) const; + + optional<string> + generate_jwt (uint64_t app_id, + const basic_mark& trace, + const basic_mark& error) const; + + // Authenticate to GitHub as an app installation. Return the installation + // access token (IAT). Issue diagnostics and return nullopt if something + // goes wrong. + // + optional<gh_installation_access_token> + obtain_installation_access_token (const string& install_id, + string jwt, + const basic_mark& error) const; + + private: + shared_ptr<options::ci_github> options_; + + tenant_service_map& tenant_service_map_; + + string webhook_secret_; + }; +} + +#endif // MOD_MOD_CI_GITHUB_HXX diff --git a/mod/mod-ci.cxx b/mod/mod-ci.cxx index 8c47bc4..85c00c6 100644 --- a/mod/mod-ci.cxx +++ b/mod/mod-ci.cxx @@ -105,17 +105,17 @@ init (scanner& s) fail << "unable to read ci-form file '" << ci_form << "': " << e; } } - } #ifdef BREP_CI_TENANT_SERVICE_UNLOADED - if (!options_->build_config_specified ()) - fail << "package building functionality must be enabled"; + if (!options_->build_config_specified ()) + fail << "package building functionality must be enabled"; - database_module::init (*options_, options_->build_db_retry ()); + database_module::init (*options_, options_->build_db_retry ()); #endif - if (options_->root ().empty ()) - options_->root (dir_path ("/")); + if (options_->root ().empty ()) + options_->root (dir_path ("/")); + } } bool brep::ci:: @@ -131,8 +131,6 @@ handle (request& rq, response& rs) HANDLER_DIAG; - const dir_path& root (options_->root ()); - // We will respond with the manifest to the CI request submission protocol // violations and with a plain text message on the internal errors. In the // latter case we will always respond with the same neutral message for @@ -180,6 +178,8 @@ handle (request& rq, response& rs) if (!options_->ci_data_specified ()) return respond_manifest (404, "CI request submission disabled"); + const dir_path& root (options_->root ()); + // Parse the request form data and verify the submission size limit. // // Note that the submission may include the overrides upload that we don't @@ -387,18 +387,19 @@ handle (request& rq, response& rs) optional<start_result> r; - if (optional<string> ref = create (error, - warn, - verb_ ? &trace : nullptr, - *build_db_, - tenant_service ("", "ci", rl.string ()), - chrono::seconds (40), - chrono::seconds (10))) + if (optional<pair<string, duplicate_tenant_result>> ref = + create (error, + warn, + verb_ ? &trace : nullptr, + *build_db_, retry_, + tenant_service ("", "ci", rl.string ()), + chrono::seconds (40), + chrono::seconds (10))) { string msg ("unloaded CI request is created: " + - options_->host () + tenant_dir (root, *ref).string ()); + options_->host () + tenant_dir (root, ref->first).string ()); - r = start_result {200, move (msg), move (*ref), {}}; + r = start_result {200, move (msg), move (ref->first), {}}; } #endif @@ -422,8 +423,10 @@ handle (request& rq, response& rs) } #ifdef BREP_CI_TENANT_SERVICE -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_queued (const tenant_service&, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_queued (const string& /*tenant_id*/, + const tenant_service&, const vector<build>& bs, optional<build_state> initial_state, const build_queued_hints& hints, @@ -437,7 +440,8 @@ build_queued (const tenant_service&, << hints.single_package_version << ' ' << hints.single_package_config;}); - return [&bs, initial_state] (const tenant_service& ts) + return [&bs, initial_state] (const string& tenant_id, + const tenant_service& ts) { optional<string> r (ts.data); @@ -446,6 +450,7 @@ build_queued (const tenant_service&, string s ((!initial_state ? "queued " : "queued " + to_string (*initial_state) + ' ') + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + @@ -467,14 +472,18 @@ build_queued (const tenant_service&, }; } -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_building (const tenant_service&, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_building (const string& /*tenant_id*/, + const tenant_service&, const build& b, const diag_epilogue&) const noexcept { - return [&b] (const tenant_service& ts) + return [&b] (const string& tenant_id, + const tenant_service& ts) { string s ("building " + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + @@ -487,14 +496,17 @@ build_building (const tenant_service&, }; } -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_built (const tenant_service&, +function<pair<optional<string>, bool> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_built (const string& /*tenant_id*/, + const tenant_service&, const build& b, const diag_epilogue&) const noexcept { - return [&b] (const tenant_service& ts) + return [&b] (const string& tenant_id, const tenant_service& ts) { string s ("built " + + tenant_id + '/' + b.package_name.string () + '/' + b.package_version.string () + '/' + b.target.string () + '/' + @@ -503,13 +515,16 @@ build_built (const tenant_service&, b.toolchain_name + '/' + b.toolchain_version.string ()); - return ts.data ? *ts.data + ", " + s : s; + return make_pair ( + optional<string> (ts.data ? *ts.data + ", " + s : s), false); }; } #ifdef BREP_CI_TENANT_SERVICE_UNLOADED -function<optional<string> (const brep::tenant_service&)> brep::ci:: -build_unloaded (tenant_service&& ts, +function<optional<string> (const string& tenant_id, + const brep::tenant_service&)> brep::ci:: +build_unloaded (const string& /* tenant_id */, + tenant_service&& ts, const diag_epilogue& log_writer) const noexcept { NOTIFICATION_DIAG (log_writer); @@ -521,7 +536,7 @@ build_unloaded (tenant_service&& ts, repository_location rl (*ts.data); if (!load (error, warn, verb_ ? &trace : nullptr, - *build_db_, + *build_db_, retry_, move (ts), rl)) return nullptr; // The diagnostics is already issued. @@ -534,7 +549,10 @@ build_unloaded (tenant_service&& ts, return nullptr; } - return [] (const tenant_service& ts) {return "loaded " + *ts.data;}; + return [] (const string& tenant_id, const tenant_service& ts) + { + return "loaded " + tenant_id + ' ' + *ts.data; + }; } #endif #endif @@ -590,7 +608,10 @@ handle (request& rq, response& rs) if (tid.empty ()) throw invalid_request (400, "invalid CI request id"); - if (!cancel (error, warn, verb_ ? &trace : nullptr, reason, *build_db_, tid)) + if (!cancel (error, warn, verb_ ? &trace : nullptr, + reason, + *build_db_, retry_, + tid)) throw invalid_request (400, "unknown CI request id"); // We have all the data, so don't buffer the response content. diff --git a/mod/mod-ci.hxx b/mod/mod-ci.hxx index bd91e99..54532e6 100644 --- a/mod/mod-ci.hxx +++ b/mod/mod-ci.hxx @@ -40,7 +40,7 @@ namespace brep public tenant_service_build_building, public tenant_service_build_built #ifdef BREP_CI_TENANT_SERVICE_UNLOADED - , tenant_service_build_unloaded + , public tenant_service_build_unloaded #endif #endif { @@ -71,26 +71,34 @@ namespace brep cli_options () const override {return options::ci::description ();} #ifdef BREP_CI_TENANT_SERVICE - virtual function<optional<string> (const tenant_service&)> - build_queued (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, const vector<build>&, optional<build_state> initial_state, const build_queued_hints&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_building (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept override; - virtual function<optional<string> (const tenant_service&)> - build_built (const tenant_service&, + virtual function<pair<optional<string>, bool> (const string& tenant_id, + const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept override; #ifdef BREP_CI_TENANT_SERVICE_UNLOADED - virtual function<optional<string> (const tenant_service&)> - build_unloaded (tenant_service&&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, const diag_epilogue& log_writer) const noexcept override; #endif #endif diff --git a/mod/mod-repository-root.cxx b/mod/mod-repository-root.cxx index 165302d..b0d5e0e 100644 --- a/mod/mod-repository-root.cxx +++ b/mod/mod-repository-root.cxx @@ -15,6 +15,7 @@ #include <mod/module-options.hxx> #include <mod/mod-ci.hxx> +#include <mod/mod-ci-github.hxx> #include <mod/mod-submit.hxx> #include <mod/mod-upload.hxx> #include <mod/mod-builds.hxx> @@ -136,6 +137,7 @@ namespace brep ci_ (make_shared<ci> ()), #endif ci_cancel_ (make_shared<ci_cancel> ()), + ci_github_ (make_shared<ci_github> (*tenant_service_map_)), upload_ (make_shared<upload> ()) { } @@ -212,6 +214,10 @@ namespace brep r.initialized_ ? r.ci_cancel_ : make_shared<ci_cancel> (*r.ci_cancel_)), + ci_github_ ( + r.initialized_ + ? r.ci_github_ + : make_shared<ci_github> (*r.ci_github_, *tenant_service_map_)), upload_ ( r.initialized_ ? r.upload_ @@ -244,6 +250,7 @@ namespace brep append (r, submit_->options ()); append (r, ci_->options ()); append (r, ci_cancel_->options ()); + append (r, ci_github_->options ()); append (r, upload_->options ()); return r; } @@ -292,6 +299,7 @@ namespace brep sub_init (*submit_, "submit"); sub_init (*ci_, "ci"); sub_init (*ci_cancel_, "ci-cancel"); + sub_init (*ci_github_, "ci_github"); sub_init (*upload_, "upload"); // Parse own configuration options. @@ -319,7 +327,8 @@ namespace brep "build-configs", "about", "submit", - "ci"}); + "ci", + "ci-github"}); if (find (vs.begin (), vs.end (), v) == vs.end ()) fail << what << " value '" << v << "' is invalid"; @@ -508,6 +517,13 @@ namespace brep return handle ("ci-cancel", param); } + else if (func == "ci-github") + { + if (handler_ == nullptr) + handler_.reset (new ci_github (*ci_github_)); + + return handle ("ci_github", param); + } else if (func == "upload") { if (handler_ == nullptr) diff --git a/mod/mod-repository-root.hxx b/mod/mod-repository-root.hxx index 5a57403..38f6adc 100644 --- a/mod/mod-repository-root.hxx +++ b/mod/mod-repository-root.hxx @@ -27,6 +27,7 @@ namespace brep class submit; class ci; class ci_cancel; + class ci_github; class upload; class repository_root: public handler @@ -78,6 +79,7 @@ namespace brep shared_ptr<submit> submit_; shared_ptr<ci> ci_; shared_ptr<ci_cancel> ci_cancel_; + shared_ptr<ci_github> ci_github_; shared_ptr<upload> upload_; shared_ptr<options::repository_root> options_; diff --git a/mod/mod-submit.cxx b/mod/mod-submit.cxx index 5ee358a..6c767cb 100644 --- a/mod/mod-submit.cxx +++ b/mod/mod-submit.cxx @@ -93,10 +93,10 @@ init (scanner& s) if (options_->submit_handler_specified () && options_->submit_handler ().relative ()) fail << "submit-handler path must be absolute"; - } - if (options_->root ().empty ()) - options_->root (dir_path ("/")); + if (options_->root ().empty ()) + options_->root (dir_path ("/")); + } } bool brep::submit:: @@ -109,8 +109,6 @@ handle (request& rq, response& rs) HANDLER_DIAG; - const dir_path& root (options_->root ()); - // We will respond with the manifest to the submission protocol violations // and with a plain text message on the internal errors. In the latter case // we will always respond with the same neutral message for security reason, @@ -163,6 +161,8 @@ handle (request& rq, response& rs) if (!options_->submit_data_specified ()) return respond_manifest (404, "submission disabled"); + const dir_path& root (options_->root ()); + // Parse the request form data and verify the submission size limit. // // Note that if it is exceeded, then there are parameters and this is the diff --git a/mod/module.cli b/mod/module.cli index ccfe032..57a5f31 100644 --- a/mod/module.cli +++ b/mod/module.cli @@ -845,11 +845,33 @@ namespace brep { }; - class ci_github: ci_start, build, build_db, handler + class ci_github: ci_start, ci_cancel, repository_url { - // GitHub CI-specific options (e.g., request timeout when invoking - // GitHub APIs). + // GitHub CI-specific options. // + + path ci-github-app-webhook-secret + { + "<path>", + "The GitHub App's configured webhook secret. If not set, then the + GitHub CI service is disabled. Note that the path must be absolute. + Note: make sure to choose a strong (random) secret." + } + + std::map<uint64_t, dir_path> ci-github-app-id-private-key + { + "<id>=<path>", + "The private key used during GitHub API authentication for the + specified GitHub App ID. Both vales are found in the GitHub App's + settings. Note that the paths must be absolute." + } + + uint16_t ci-github-jwt-validity-period = 600 + { + "<seconds>", + "The number of seconds a JWT (authentication token) should be valid for. + The maximum allowed by GitHub is 10 minutes." + } }; class upload: build, build_db, build_upload, repository_email, handler @@ -975,6 +997,13 @@ namespace brep // Only consider tenants with this interactive build mode. // bbot::interactive_mode interactive = bbot::interactive_mode::both; + + // Only consider tenants which have third-party services of any of these + // types. The special empty type value denotes tenants without the + // associated service. If this parameter is absent, then consider all + // tenant types. + // + vector<string> tenant_service_type | t; }; class build_result diff --git a/mod/module.hxx b/mod/module.hxx index f3e062e..ba8958a 100644 --- a/mod/module.hxx +++ b/mod/module.hxx @@ -41,6 +41,8 @@ namespace brep diag_data data; server_error (diag_data&& d): data (move (d)) {} + + server_error () = default; }; // Every handler member function that needs to produce any diagnostics @@ -93,6 +95,7 @@ namespace brep template <class F> void l1 (const F& f) const {if (verb_ >= 1) f ();} template <class F> void l2 (const F& f) const {if (verb_ >= 2) f ();} + template <class F> void l3 (const F& f) const {if (verb_ >= 3) f ();} // Set to true when the handler is successfully initialized. // diff --git a/mod/tenant-service.cxx b/mod/tenant-service.cxx new file mode 100644 index 0000000..2c1f3bc --- /dev/null +++ b/mod/tenant-service.cxx @@ -0,0 +1,18 @@ +// file : mod/tenant-service.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/tenant-service.hxx> + +namespace brep +{ + void tenant_service_build_built:: + build_completed (const string& /* tenant_id */, + const tenant_service&, + const diag_epilogue& /* log_writer */) const noexcept + { + // If this notification is requested, then this function needs to be + // overridden by the tenant service implementation. + // + assert (false); + } +} diff --git a/mod/tenant-service.hxx b/mod/tenant-service.hxx index b7f5c02..d909eaa 100644 --- a/mod/tenant-service.hxx +++ b/mod/tenant-service.hxx @@ -39,16 +39,20 @@ namespace brep // While the implementation tries to make sure the notifications arrive in // the correct order, this is currently done by imposing delays (some // natural, such as building->built, and some artificial, such as - // queued->building). As result, it is unlikely but possible to be notified - // about the state transitions in the wrong order, especially if the - // notifications take a long time. To minimize the chance of this happening, - // the service implementation should strive to batch the queued state - // notifications (or which there could be hundreds) in a single request if + // queued->building). As result, it is unlikely but possible to observe the + // state transition notifications in the wrong order, especially if + // processing notifications can take a long time. For example, while + // processing the queued notification, the building notification may arrive + // in a different thread. To minimize the chance of this happening, the + // service implementation should strive to batch the queued state + // notifications (of which there could be hundreds) in a single request if // at all possible. Also, if supported by the third-party API, it makes // sense for the implementation to protect against overwriting later states // with earlier. For example, if it's possible to place a condition on a // notification, it makes sense to only set the state to queued if none of - // the later states (e.g., building) are already in effect. + // the later states (e.g., building) are already in effect. See also + // ci_start::rebuild() for additional details on the build->queued + // transition. // // Note also that it's possible for the build to get deleted at any stage // without any further notifications. This can happen, for example, due to @@ -70,8 +74,11 @@ namespace brep // If the returned function is not NULL, it is called to update the // service data. It should return the new data or nullopt if no update is // necessary. Note: tenant_service::data passed to the callback and to the - // returned function may not be the same. Also, the returned function may - // be called multiple times (on transaction retries). + // returned function may not be the same. Furthermore, tenant_ids may not + // be the same either, in case the tenant was replaced. Also, the returned + // function may be called multiple times (on transaction retries). Note + // that the passed log_writer is valid during the calls to the returned + // function. // // The passed initial_state indicates the logical initial state and is // either absent, `building` (interrupted), or `built` (rebuild). Note @@ -96,8 +103,10 @@ namespace brep bool single_package_config; }; - virtual function<optional<string> (const tenant_service&)> - build_queued (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_queued (const string& tenant_id, + const tenant_service&, const vector<build>&, optional<build_state> initial_state, const build_queued_hints&, @@ -107,8 +116,10 @@ namespace brep class tenant_service_build_building: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_building (const tenant_service&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_building (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept = 0; }; @@ -116,25 +127,39 @@ namespace brep class tenant_service_build_built: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_built (const tenant_service&, + // The second half of the pair signals whether to call the + // build_completed() notification. + // + virtual function<pair<optional<string>, bool> (const string& tenant_id, + const tenant_service&)> + build_built (const string& tenant_id, + const tenant_service&, const build&, const diag_epilogue& log_writer) const noexcept = 0; + + virtual void + build_completed (const string& tenant_id, + const tenant_service&, + const diag_epilogue& log_writer) const noexcept; }; // This notification is only made on unloaded CI requests created with the // ci_start::create() call and until they are loaded with ci_start::load() - // or, alternatively, abandoned with ci_start::abandon(). + // or, alternatively, abandoned with ci_start::cancel() (in which case the + // returned callback should be NULL). // // Note: make sure the implementation of this notification does not take - // too long (currently 40 seconds) to avoid nested notifications. Note - // also that the first notification is delayed (currently 10 seconds). + // longer than the notification_interval argument of ci_start::create() to + // avoid nested notifications. The first notification can be delayed with + // the notify_delay argument. // class tenant_service_build_unloaded: public virtual tenant_service_base { public: - virtual function<optional<string> (const tenant_service&)> - build_unloaded (tenant_service&&, + virtual function<optional<string> (const string& tenant_id, + const tenant_service&)> + build_unloaded (const string& tenant_id, + tenant_service&&, const diag_epilogue& log_writer) const noexcept = 0; }; |