diff options
-rw-r--r-- | etc/brep-module.conf | 15 | ||||
-rw-r--r-- | etc/private/install/brep-module.conf | 15 | ||||
-rw-r--r-- | libbrep/package.hxx | 17 | ||||
-rw-r--r-- | mod/database-module.cxx | 56 | ||||
-rw-r--r-- | mod/database-module.hxx | 16 | ||||
-rw-r--r-- | mod/mod-build-force.cxx | 26 | ||||
-rw-r--r-- | mod/mod-build-result.cxx | 24 | ||||
-rw-r--r-- | mod/mod-build-task.cxx | 193 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 7 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.hxx | 3 | ||||
-rw-r--r-- | mod/mod-ci-github.cxx | 6 | ||||
-rw-r--r-- | mod/module.cli | 18 |
12 files changed, 307 insertions, 89 deletions
diff --git a/etc/brep-module.conf b/etc/brep-module.conf index cdf028a..834202a 100644 --- a/etc/brep-module.conf +++ b/etc/brep-module.conf @@ -209,12 +209,15 @@ menu About=?about # build-alt-hard-rebuild-stop -# Time to wait before assuming the 'queued' notifications are delivered for -# package CI requests submitted via third-party services (GitHub, etc). During -# this time a package is not considered for a build. Must be specified in -# seconds. Default is 30 seconds. -# -# build-queued-timeout 30 +# Time to wait (in seconds) before assuming the 'queued' notification batch is +# delivered and the maximum number of build configurations in such a batch for +# package CI requests submitted via third-party services (GitHub, etc). Until +# all such batches are delivered, a package is not considered for a build. By +# default, assume the batch delivery time is 15 seconds and there are up to 60 +# build configurations in a batch. +# +# build-queued-timeout 15 +# build-queued-batch 60 # The maximum size of the build task request manifest accepted. Note that the diff --git a/etc/private/install/brep-module.conf b/etc/private/install/brep-module.conf index 2545a87..7711b82 100644 --- a/etc/private/install/brep-module.conf +++ b/etc/private/install/brep-module.conf @@ -209,12 +209,15 @@ menu About=?about # build-alt-hard-rebuild-stop -# Time to wait before assuming the 'queued' notifications are delivered for -# package CI requests submitted via third-party services (GitHub, etc). During -# this time a package is not considered for a build. Must be specified in -# seconds. Default is 30 seconds. -# -# build-queued-timeout 30 +# Time to wait (in seconds) before assuming the 'queued' notification batch is +# delivered and the maximum number of build configurations in such a batch for +# package CI requests submitted via third-party services (GitHub, etc). Until +# all such batches are delivered, a package is not considered for a build. By +# default, assume the batch delivery time is 15 seconds and there are up to 60 +# build configurations in a batch. +# +# build-queued-timeout 15 +# build-queued-batch 60 # The maximum size of the build task request manifest accepted. Note that the diff --git a/libbrep/package.hxx b/libbrep/package.hxx index 2714d10..51af338 100644 --- a/libbrep/package.hxx +++ b/libbrep/package.hxx @@ -282,11 +282,18 @@ namespace brep // for it. It feels like there is no easy way to reliably fix that. // Instead, we just decrease the probability of such a notifications // sequence failure by delaying builds of the freshly queued packages for - // some time. Specifically, whenever the `queued` notification is ought - // to be sent (normally out of the database transaction, since it likely - // sends an HTTP request, etc) the tenant's queued_timestamp member is set - // to the current time. During the configured time interval since that - // time point the build tasks may not be issued for the tenant's packages. + // some time. Specifically, whenever the `queued` notification, + // potentially for multiple builds, is ought to be sent (normally out of + // the database transaction, since it likely sends one or more HTTP + // requests, etc) the tenant's queued_timestamp member is set to the time + // point when we assume this notification will finally be delivered to the + // service. This timestamp is calculated as the sum of the current time + // and some time interval, proportional to the number of the queued builds + // we need to notify the third party service about. Until this time point + // is reached, the build tasks may not be issued for the tenant's + // packages. The thinking here is that the bigger the number of builds we + // need to notify about, the longer it normally takes to deliver this + // notification. // // Also note that while there are similar potential races for other // notification sequences, their probability is rather low due to the diff --git a/mod/database-module.cxx b/mod/database-module.cxx index 629e393..d838d5c 100644 --- a/mod/database-module.cxx +++ b/mod/database-module.cxx @@ -92,23 +92,18 @@ namespace brep optional<string> r; - for (size_t retry (retry_);; ) - { - try + update_tenant_service_state ( + conn, type, id, + [&r, &f, this] (const shared_ptr<build_tenant>& t) { - transaction tr (conn->begin ()); - - using query = query<build_tenant>; - - shared_ptr<build_tenant> t ( - build_db_->query_one<build_tenant> (query::service.id == id && - query::service.type == type)); + // Reset, in case this is a retry after the recoverable database + // failure. + // + r = nullopt; if (t != nullptr) { - // Shouldn't be here otherwise. - // - assert (t->service); + assert (t->service); // Shouldn't be here otherwise. tenant_service& s (*t->service); @@ -120,6 +115,37 @@ namespace brep r = move (s.data); } } + }); + + return r; + } + + void database_module:: + update_tenant_service_state ( + const connection_ptr& conn, + const string& type, + const string& id, + const function<void (const shared_ptr<build_tenant>&)>& f) + { + assert (f != nullptr); // Shouldn't be called otherwise. + + // Must be initialized via the init(options::build_db) function call. + // + assert (build_db_ != nullptr); + + for (size_t retry (retry_);;) + { + try + { + transaction tr (conn->begin ()); + + using query = query<build_tenant>; + + shared_ptr<build_tenant> t ( + build_db_->query_one<build_tenant> (query::service.id == id && + query::service.type == type)); + + f (t); tr.commit (); @@ -139,11 +165,7 @@ namespace brep l1 ([&]{trace << e << "; " << retry + 1 << " tenant service " << "state update retries left";}); - - r = nullopt; // Prepare for the next iteration. } } - - return r; } } diff --git a/mod/database-module.hxx b/mod/database-module.hxx index 76f13d4..29b4cb7 100644 --- a/mod/database-module.hxx +++ b/mod/database-module.hxx @@ -14,6 +14,7 @@ namespace brep { + class build_tenant; struct tenant_service; // A handler that utilises the database. Specifically, it will retry the @@ -74,6 +75,21 @@ namespace brep const function<optional<string> (const string& tenant_id, const tenant_service&)>&); + // A low-level version of the above. + // + // Specifically, the specified function is expected to update the + // tenant-associated service state directly, if required. While at it, it + // may also update some other tenant members. Note that if no tenant with + // the specified service type/id exists in the database, then the function + // will be called with the NULL pointer. + // + void + update_tenant_service_state ( + const odb::core::connection_ptr&, + const string& type, + const string& id, + const function<void (const shared_ptr<build_tenant>&)>&); + protected: size_t retry_ = 0; // Max of all retries. diff --git a/mod/mod-build-force.cxx b/mod/mod-build-force.cxx index 146acd9..25b860d 100644 --- a/mod/mod-build-force.cxx +++ b/mod/mod-build-force.cxx @@ -3,6 +3,8 @@ #include <mod/mod-build-force.hxx> +#include <chrono> + #include <odb/database.hxx> #include <odb/transaction.hxx> @@ -260,11 +262,11 @@ handle (request& rq, response& rs) // If we ought to call the // tenant_service_build_queued::build_queued() callback, then also - // set the package tenant's queued timestamp to the current time - // to prevent the task handler from picking the build and - // potentially interfering with us by sending its `building` - // notification before we send our `queued` notification (see - // tenant::queued_timestamp for details). + // set the package tenant's queued timestamp to prevent the task + // handler from picking the build and potentially interfering with + // us by sending its `building` notification before we send our + // `queued` notification (see tenant::queued_timestamp for + // details). // if (tsq != nullptr) { @@ -280,10 +282,18 @@ handle (request& rq, response& rs) qhs = tenant_service_build_queued::build_queued_hints { tpc == 1, p->configs.size () == 1}; - // Set the package tenant's queued timestamp. + // Set the package tenant's queued timestamp, unless it is + // already set to the same or greater value. // - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + timestamp ts ( + system_clock::now () + + chrono::seconds (options_->build_queued_timeout ())); + + if (!t->queued_timestamp || *t->queued_timestamp < ts) + { + t->queued_timestamp = ts; + build_db_->update (t); + } tss = make_pair (move (*t->service), move (b)); } diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx index 666e7ef..38f13a6 100644 --- a/mod/mod-build-result.cxx +++ b/mod/mod-build-result.cxx @@ -3,6 +3,8 @@ #include <mod/mod-build-result.hxx> +#include <chrono> + #include <odb/database.hxx> #include <odb/transaction.hxx> @@ -354,10 +356,10 @@ handle (request& rq, response&) // If we ought to call the tenant_service_build_queued::build_queued() // callback, then also set the package tenant's queued timestamp to - // the current time to prevent the task handler from picking the build - // and potentially interfering with us by sending its `building` - // notification before we send our `queued` notification (see - // tenant::queued_timestamp for details). + // prevent the task handler from picking the build and potentially + // interfering with us by sending its `building` notification before + // we send our `queued` notification (see tenant::queued_timestamp for + // details). // if (tsq != nullptr) { @@ -375,10 +377,18 @@ handle (request& rq, response&) qhs = tenant_service_build_queued::build_queued_hints { tpc == 1, p->configs.size () == 1}; - // Set the package tenant's queued timestamp. + // Set the package tenant's queued timestamp, unless it is already + // set to the same or greater value. // - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + timestamp ts ( + system_clock::now () + + chrono::seconds (options_->build_queued_timeout ())); + + if (!t->queued_timestamp || *t->queued_timestamp < ts) + { + t->queued_timestamp = ts; + build_db_->update (t); + } } } else // Regular or skip build result. diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index e769f6a..5490e8a 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -129,6 +129,9 @@ init (scanner& s) options_->build_alt_hard_rebuild_stop_specified ()) bad_alt ("hard"); + if (options_->build_queued_batch () == 0) + fail << "build-queued-batch must not be 0"; + database_module::init (*options_, options_->build_db_retry ()); // Check that the database 'build' schema matches the current one. It's @@ -156,8 +159,7 @@ template <typename T> static inline query<T> package_query (bool custom_bot, brep::params::build_task& params, - interactive_mode imode, - uint64_t queued_expiration_ns) + interactive_mode imode) { using namespace brep; using query = query<T>; @@ -258,7 +260,9 @@ package_query (bool custom_bot, return q && (query::build_tenant::queued_timestamp.is_null () || - query::build_tenant::queued_timestamp < queued_expiration_ns); + query::build_tenant::queued_timestamp < + chrono::duration_cast<chrono::nanoseconds> ( + system_clock::now ().time_since_epoch ()).count ()); } bool brep::build_task:: @@ -540,7 +544,7 @@ handle (request& rq, response& rs) const config_machine& cm) -> task_response_manifest { uint64_t ts ( - chrono::duration_cast<std::chrono::nanoseconds> ( + chrono::duration_cast<chrono::nanoseconds> ( b.timestamp.time_since_epoch ()).count ()); string session (b.tenant + '/' + @@ -633,8 +637,8 @@ handle (request& rq, response& rs) move (task)); }; - // Calculate the build/rebuild (building/built state) and the `queued` - // notifications expiration time for package configurations. + // Calculate the build/rebuild (building/built state) for package + // configurations. // timestamp now (system_clock::now ()); @@ -658,9 +662,6 @@ handle (request& rq, response& rs) timestamp forced_rebuild_expiration ( expiration (options_->build_forced_rebuild_timeout ())); - uint64_t queued_expiration_ns ( - expiration_ns (options_->build_queued_timeout ())); - // Calculate the soft/hard rebuild expiration time, based on the // respective build-{soft,hard}-rebuild-timeout and // build-alt-{soft,hard}-rebuild-{start,stop,timeout} configuration @@ -777,8 +778,7 @@ handle (request& rq, response& rs) pkg_query pq (package_query<buildable_package> (custom_bot, params, - imode, - queued_expiration_ns)); + imode)); // Transform (in-place) the interactive login information into the actual // login command, if specified in the manifest and the transformation @@ -934,8 +934,7 @@ handle (request& rq, response& rs) query q (package_query<buildable_package_count> (custom_bot, params, - imode, - queued_expiration_ns)); + imode)); if (conn == nullptr) conn = build_db_->connection (); @@ -1230,6 +1229,44 @@ handle (request& rq, response& rs) tpc == 1, p.configs.size () == 1}; }; + // Calculate the tenant's queued timestamp based on the number of queued + // builds, update the tenant, and return the resulting timestamp. Must + // be called inside the build db transaction. + // + // Note that it feels unlikely that the tenant queued timestamp is + // already set to some greater value. Let's, for good measure, consider + // such a possibility and, if that's the case, leave the timestamp + // intact and return nullopt. + // + auto update_queued_timestamp = [this] (const shared_ptr<build_tenant>& t, + size_t queued_builds) + -> optional<timestamp> + { + assert (transaction::has_current ()); + + // For simplicity, let's assume that the total `queued` notification + // delivery time is proportional to the number of build configuration + // batches. + // + size_t b (options_->build_queued_batch ()); + size_t n (queued_builds / b + (queued_builds % b != 0 ? 1 : 0)); + + timestamp ts (system_clock::now () + + chrono::seconds (n * options_->build_queued_timeout ())); + + if (t->queued_timestamp && *t->queued_timestamp > ts) + return nullopt; + + t->queued_timestamp = ts; + build_db_->update (t); + + return t->queued_timestamp; + }; + + // Stash the tenant queued timestamp, if we set it. + // + optional<timestamp> queued_timestamp; + // Collect the auxiliary machines required for testing of the specified // package configuration and the external test packages, if present for // the specified target configuration (task_auxiliary_machines), @@ -2006,14 +2043,14 @@ handle (request& rq, response& rs) // If we ought to call the // tenant_service_build_queued::build_queued() callback // for the queued builds (qbs is not empty), then we - // also set the package tenant's queued timestamp to the - // current time to prevent any concurrently running task - // handlers from picking any of these queued builds now - // and so potentially interfering with us by sending - // their `building` notification before we send our - // `queued` notification (see tenant::queued_timestamp - // for details). Note that the `queued` notification for - // the being built configuration doesn't require setting + // also set the package tenant's queued timestamp to + // prevent any concurrently running task handlers from + // picking any of these queued builds now and so + // potentially interfering with us by sending their + // `building` notification before we send our `queued` + // notification (see tenant::queued_timestamp for + // details). Note that the `queued` notification for the + // being built configuration doesn't require setting // this timestamp, since after the respective build // object is changed and updated in the database it may // not be picked by any task handler in the foreseeable @@ -2029,8 +2066,8 @@ handle (request& rq, response& rs) if (!qbs.empty ()) { - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + queued_timestamp = + update_queued_timestamp (t, qbs.size ()); } } } @@ -2275,9 +2312,9 @@ handle (request& rq, response& rs) // If we ought to call the // tenant_service_build_queued::build_queued() // callback for the queued builds, then also set the - // package tenant's queued timestamp to the current - // time to prevent the notifications race (see above - // building from scratch for details). + // package tenant's queued timestamp to prevent the + // notifications race (see above building from scratch + // for details). // if (!qbs.empty () || !rebuild_interrupted_rebuild) { @@ -2285,8 +2322,8 @@ handle (request& rq, response& rs) if (!qbs.empty ()) { - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + queued_timestamp = + update_queued_timestamp (t, qbs.size ()); } } } @@ -2334,6 +2371,7 @@ handle (request& rq, response& rs) tsq = nullptr; tss = nullopt; qbs.clear (); + queued_timestamp = nullopt; } // If the task manifest is prepared, then bail out from the package @@ -2386,9 +2424,74 @@ handle (request& rq, response& rs) { conn = build_db_->connection (); - if (optional<string> data = - update_tenant_service_state (conn, ss.type, ss.id, f)) - ss.data = move (data); + // Now, as the `queued` notification is delivered, let's check if + // we have set the tenant's queued timestamp to prevent the race + // and, if that's the case, reset it. Not to start additional + // database transaction, we will do this together with the service + // data update. + // + optional<string> service_data; + bool queued_timestamp_reset (false); + + update_tenant_service_state ( + conn, ss.type, ss.id, + [&service_data, + &queued_timestamp_reset, + &queued_timestamp, + &f, + this] (const shared_ptr<build_tenant>& t) + { + // Reset, in case this is a retry after the recoverable + // database failure. + // + service_data = nullopt; + queued_timestamp_reset = false; + + if (t != nullptr) + { + // Reset the queued timestamp. + // + // Note that it may potentially be re-assigned by some other + // handler to some greater value (timeout calculated by the + // update_queued_timestamp() was too small, etc). Thus, + // let's only reset it if it was set by us. + // + if (t->queued_timestamp == queued_timestamp) + { + t->queued_timestamp = nullopt; + queued_timestamp_reset = true; + } + + // Update the service data. + // + assert (t->service); // Shouldn't be here otherwise. + + tenant_service& s (*t->service); + optional<string> data (f (t->id, s)); + + if (data) + s.data = move (*data); + + // If queued timestamp is reset and/or the service data is + // updated, then update the tenant object in the database. + // + if (queued_timestamp_reset || data) + build_db_->update (t); + + // If the service data is updated, then stash the new data. + // + if (data) + service_data = move (s.data); + } + }); + + if (service_data) + ss.data = move (service_data); + + // Make sure we will not try to reset the queued timestamp again. + // + if (queued_timestamp_reset) + queued_timestamp = nullopt; } } @@ -2461,6 +2564,34 @@ handle (request& rq, response& rs) } } + // Now, as all the potential tenant service notifications are delivered, + // let's check if we have set the tenant's queued timestamp to prevent + // the race and didn't reset it yet. If that's the case, let's reset it + // now. + // + if (queued_timestamp) + { + assert (tss); // Wouldn't be here otherwise. + + const build& b (*tss->second); + + if (conn == nullptr) + conn = build_db_->connection (); + + transaction tr (conn->begin ()); + + shared_ptr<build_tenant> t ( + build_db_->load<build_tenant> (b.tenant)); + + if (t->queued_timestamp == queued_timestamp) + { + t->queued_timestamp = nullopt; + build_db_->update (t); + } + + tr.commit (); + } + // If the task manifest is prepared, then check that the number of the // build auxiliary machines is less than 10. If that's not the case, // then turn the build into the built state with the abort status. diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx index 763842c..d49dfa2 100644 --- a/mod/mod-ci-github-gq.cxx +++ b/mod/mod-ci-github-gq.cxx @@ -820,8 +820,11 @@ namespace brep const string& iat, uint64_t ai, const string& rid, - const string& hs) + const string& hs, + size_t batch) { + assert (batch != 0); + // No support for result_status so state cannot be built. // #ifndef NDEBUG @@ -840,7 +843,7 @@ namespace brep // responding with 502). We handle this here by batching the creation. // size_t n (crs.size ()); - size_t b (n / 60 + (n % 60 != 0 ? 1 : 0)); + size_t b (n / batch + (n % batch != 0 ? 1 : 0)); size_t bn (n / b); auto i (crs.begin ()); diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx index 5fc75aa..2a8138a 100644 --- a/mod/mod-ci-github-gq.hxx +++ b/mod/mod-ci-github-gq.hxx @@ -39,7 +39,8 @@ namespace brep const string& installation_access_token, uint64_t app_id, const string& repository_id, - const string& head_sha); + const string& head_sha, + size_t batch); // Create a new check run on GitHub for a build in the queued or building // state. Note that the state cannot be built because in that case a diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx index cd45c4c..08ff2dc 100644 --- a/mod/mod-ci-github.cxx +++ b/mod/mod-ci-github.cxx @@ -1530,7 +1530,8 @@ namespace brep conclusion_building_summary}; if (gq_create_check_runs (error, check_runs, iat->token, - cr.check_run.app_id, repo_node_id, head_sha)) + cr.check_run.app_id, repo_node_id, head_sha, + options_->build_queued_batch ())) { assert (bcr.state == build_state::queued); assert (ccr.state == build_state::building); @@ -2420,7 +2421,8 @@ namespace brep iat->token, sd.app_id, sd.repository_node_id, - sd.report_sha)) + sd.report_sha, + options_->build_queued_batch ())) { for (const check_run& cr: crs) { diff --git a/mod/module.cli b/mod/module.cli index 57a5f31..f78cd06 100644 --- a/mod/module.cli +++ b/mod/module.cli @@ -305,13 +305,23 @@ namespace brep for the \cb{build-hard-rebuild-timeout} option." } - size_t build-queued-timeout = 30 + size_t build-queued-timeout = 15 { "<seconds>", - "Time to wait before assuming the \cb{queued} notifications are + "Time to wait before assuming the \cb{queued} notification batch is delivered for package CI requests submitted via third-party services - (GitHub, etc). During this time a package is not considered for a - build. Must be specified in seconds. Default is 30 seconds." + (GitHub, etc). Until all such batches are delivered, a package is + not considered for a build. Must be specified in seconds. Default + is 15 seconds. See also \cb{build-queued-batch}." + } + + size_t build-queued-batch = 60 + { + "<num>", + "The maximum number of build configurations in the \cb{queued} + notification batch for package CI requests submitted via third-party + services (see \cb{build-queued-timeout} for details). The default + is 60." } }; |