diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2025-02-14 14:08:04 +0200 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2025-02-17 21:31:24 +0200 |
commit | 62a6bd8d6bb576edf76c42db8ffb73fcb0f87fb7 (patch) | |
tree | 867897ce90d2b7fe1d62a6c93feb44f4b69d5d1d /mod | |
parent | 1795263201cc930c349add08aac8cc268796d773 (diff) |
Automatically increase build queued timeout for big number of builds in 'queued' notifications
Note that this commit changes the semantics of the tenant::queued_timestamp
member. Previously it was set to the time when the build objects are turned
into the queued state. Now it is set to the time when the queued notification
for them is assumed to be delivered to the third party service.
Diffstat (limited to 'mod')
-rw-r--r-- | mod/database-module.cxx | 56 | ||||
-rw-r--r-- | mod/database-module.hxx | 16 | ||||
-rw-r--r-- | mod/mod-build-force.cxx | 26 | ||||
-rw-r--r-- | mod/mod-build-result.cxx | 24 | ||||
-rw-r--r-- | mod/mod-build-task.cxx | 193 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 7 | ||||
-rw-r--r-- | mod/mod-ci-github-gq.hxx | 3 | ||||
-rw-r--r-- | mod/mod-ci-github.cxx | 6 | ||||
-rw-r--r-- | mod/module.cli | 18 |
9 files changed, 277 insertions, 72 deletions
diff --git a/mod/database-module.cxx b/mod/database-module.cxx index 629e393..d838d5c 100644 --- a/mod/database-module.cxx +++ b/mod/database-module.cxx @@ -92,23 +92,18 @@ namespace brep optional<string> r; - for (size_t retry (retry_);; ) - { - try + update_tenant_service_state ( + conn, type, id, + [&r, &f, this] (const shared_ptr<build_tenant>& t) { - transaction tr (conn->begin ()); - - using query = query<build_tenant>; - - shared_ptr<build_tenant> t ( - build_db_->query_one<build_tenant> (query::service.id == id && - query::service.type == type)); + // Reset, in case this is a retry after the recoverable database + // failure. + // + r = nullopt; if (t != nullptr) { - // Shouldn't be here otherwise. - // - assert (t->service); + assert (t->service); // Shouldn't be here otherwise. tenant_service& s (*t->service); @@ -120,6 +115,37 @@ namespace brep r = move (s.data); } } + }); + + return r; + } + + void database_module:: + update_tenant_service_state ( + const connection_ptr& conn, + const string& type, + const string& id, + const function<void (const shared_ptr<build_tenant>&)>& f) + { + assert (f != nullptr); // Shouldn't be called otherwise. + + // Must be initialized via the init(options::build_db) function call. + // + assert (build_db_ != nullptr); + + for (size_t retry (retry_);;) + { + try + { + transaction tr (conn->begin ()); + + using query = query<build_tenant>; + + shared_ptr<build_tenant> t ( + build_db_->query_one<build_tenant> (query::service.id == id && + query::service.type == type)); + + f (t); tr.commit (); @@ -139,11 +165,7 @@ namespace brep l1 ([&]{trace << e << "; " << retry + 1 << " tenant service " << "state update retries left";}); - - r = nullopt; // Prepare for the next iteration. } } - - return r; } } diff --git a/mod/database-module.hxx b/mod/database-module.hxx index 76f13d4..29b4cb7 100644 --- a/mod/database-module.hxx +++ b/mod/database-module.hxx @@ -14,6 +14,7 @@ namespace brep { + class build_tenant; struct tenant_service; // A handler that utilises the database. Specifically, it will retry the @@ -74,6 +75,21 @@ namespace brep const function<optional<string> (const string& tenant_id, const tenant_service&)>&); + // A low-level version of the above. + // + // Specifically, the specified function is expected to update the + // tenant-associated service state directly, if required. While at it, it + // may also update some other tenant members. Note that if no tenant with + // the specified service type/id exists in the database, then the function + // will be called with the NULL pointer. + // + void + update_tenant_service_state ( + const odb::core::connection_ptr&, + const string& type, + const string& id, + const function<void (const shared_ptr<build_tenant>&)>&); + protected: size_t retry_ = 0; // Max of all retries. diff --git a/mod/mod-build-force.cxx b/mod/mod-build-force.cxx index 146acd9..25b860d 100644 --- a/mod/mod-build-force.cxx +++ b/mod/mod-build-force.cxx @@ -3,6 +3,8 @@ #include <mod/mod-build-force.hxx> +#include <chrono> + #include <odb/database.hxx> #include <odb/transaction.hxx> @@ -260,11 +262,11 @@ handle (request& rq, response& rs) // If we ought to call the // tenant_service_build_queued::build_queued() callback, then also - // set the package tenant's queued timestamp to the current time - // to prevent the task handler from picking the build and - // potentially interfering with us by sending its `building` - // notification before we send our `queued` notification (see - // tenant::queued_timestamp for details). + // set the package tenant's queued timestamp to prevent the task + // handler from picking the build and potentially interfering with + // us by sending its `building` notification before we send our + // `queued` notification (see tenant::queued_timestamp for + // details). // if (tsq != nullptr) { @@ -280,10 +282,18 @@ handle (request& rq, response& rs) qhs = tenant_service_build_queued::build_queued_hints { tpc == 1, p->configs.size () == 1}; - // Set the package tenant's queued timestamp. + // Set the package tenant's queued timestamp, unless it is + // already set to the same or greater value. // - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + timestamp ts ( + system_clock::now () + + chrono::seconds (options_->build_queued_timeout ())); + + if (!t->queued_timestamp || *t->queued_timestamp < ts) + { + t->queued_timestamp = ts; + build_db_->update (t); + } tss = make_pair (move (*t->service), move (b)); } diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx index 666e7ef..38f13a6 100644 --- a/mod/mod-build-result.cxx +++ b/mod/mod-build-result.cxx @@ -3,6 +3,8 @@ #include <mod/mod-build-result.hxx> +#include <chrono> + #include <odb/database.hxx> #include <odb/transaction.hxx> @@ -354,10 +356,10 @@ handle (request& rq, response&) // If we ought to call the tenant_service_build_queued::build_queued() // callback, then also set the package tenant's queued timestamp to - // the current time to prevent the task handler from picking the build - // and potentially interfering with us by sending its `building` - // notification before we send our `queued` notification (see - // tenant::queued_timestamp for details). + // prevent the task handler from picking the build and potentially + // interfering with us by sending its `building` notification before + // we send our `queued` notification (see tenant::queued_timestamp for + // details). // if (tsq != nullptr) { @@ -375,10 +377,18 @@ handle (request& rq, response&) qhs = tenant_service_build_queued::build_queued_hints { tpc == 1, p->configs.size () == 1}; - // Set the package tenant's queued timestamp. + // Set the package tenant's queued timestamp, unless it is already + // set to the same or greater value. // - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + timestamp ts ( + system_clock::now () + + chrono::seconds (options_->build_queued_timeout ())); + + if (!t->queued_timestamp || *t->queued_timestamp < ts) + { + t->queued_timestamp = ts; + build_db_->update (t); + } } } else // Regular or skip build result. diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index e769f6a..5490e8a 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -129,6 +129,9 @@ init (scanner& s) options_->build_alt_hard_rebuild_stop_specified ()) bad_alt ("hard"); + if (options_->build_queued_batch () == 0) + fail << "build-queued-batch must not be 0"; + database_module::init (*options_, options_->build_db_retry ()); // Check that the database 'build' schema matches the current one. It's @@ -156,8 +159,7 @@ template <typename T> static inline query<T> package_query (bool custom_bot, brep::params::build_task& params, - interactive_mode imode, - uint64_t queued_expiration_ns) + interactive_mode imode) { using namespace brep; using query = query<T>; @@ -258,7 +260,9 @@ package_query (bool custom_bot, return q && (query::build_tenant::queued_timestamp.is_null () || - query::build_tenant::queued_timestamp < queued_expiration_ns); + query::build_tenant::queued_timestamp < + chrono::duration_cast<chrono::nanoseconds> ( + system_clock::now ().time_since_epoch ()).count ()); } bool brep::build_task:: @@ -540,7 +544,7 @@ handle (request& rq, response& rs) const config_machine& cm) -> task_response_manifest { uint64_t ts ( - chrono::duration_cast<std::chrono::nanoseconds> ( + chrono::duration_cast<chrono::nanoseconds> ( b.timestamp.time_since_epoch ()).count ()); string session (b.tenant + '/' + @@ -633,8 +637,8 @@ handle (request& rq, response& rs) move (task)); }; - // Calculate the build/rebuild (building/built state) and the `queued` - // notifications expiration time for package configurations. + // Calculate the build/rebuild (building/built state) for package + // configurations. // timestamp now (system_clock::now ()); @@ -658,9 +662,6 @@ handle (request& rq, response& rs) timestamp forced_rebuild_expiration ( expiration (options_->build_forced_rebuild_timeout ())); - uint64_t queued_expiration_ns ( - expiration_ns (options_->build_queued_timeout ())); - // Calculate the soft/hard rebuild expiration time, based on the // respective build-{soft,hard}-rebuild-timeout and // build-alt-{soft,hard}-rebuild-{start,stop,timeout} configuration @@ -777,8 +778,7 @@ handle (request& rq, response& rs) pkg_query pq (package_query<buildable_package> (custom_bot, params, - imode, - queued_expiration_ns)); + imode)); // Transform (in-place) the interactive login information into the actual // login command, if specified in the manifest and the transformation @@ -934,8 +934,7 @@ handle (request& rq, response& rs) query q (package_query<buildable_package_count> (custom_bot, params, - imode, - queued_expiration_ns)); + imode)); if (conn == nullptr) conn = build_db_->connection (); @@ -1230,6 +1229,44 @@ handle (request& rq, response& rs) tpc == 1, p.configs.size () == 1}; }; + // Calculate the tenant's queued timestamp based on the number of queued + // builds, update the tenant, and return the resulting timestamp. Must + // be called inside the build db transaction. + // + // Note that it feels unlikely that the tenant queued timestamp is + // already set to some greater value. Let's, for good measure, consider + // such a possibility and, if that's the case, leave the timestamp + // intact and return nullopt. + // + auto update_queued_timestamp = [this] (const shared_ptr<build_tenant>& t, + size_t queued_builds) + -> optional<timestamp> + { + assert (transaction::has_current ()); + + // For simplicity, let's assume that the total `queued` notification + // delivery time is proportional to the number of build configuration + // batches. + // + size_t b (options_->build_queued_batch ()); + size_t n (queued_builds / b + (queued_builds % b != 0 ? 1 : 0)); + + timestamp ts (system_clock::now () + + chrono::seconds (n * options_->build_queued_timeout ())); + + if (t->queued_timestamp && *t->queued_timestamp > ts) + return nullopt; + + t->queued_timestamp = ts; + build_db_->update (t); + + return t->queued_timestamp; + }; + + // Stash the tenant queued timestamp, if we set it. + // + optional<timestamp> queued_timestamp; + // Collect the auxiliary machines required for testing of the specified // package configuration and the external test packages, if present for // the specified target configuration (task_auxiliary_machines), @@ -2006,14 +2043,14 @@ handle (request& rq, response& rs) // If we ought to call the // tenant_service_build_queued::build_queued() callback // for the queued builds (qbs is not empty), then we - // also set the package tenant's queued timestamp to the - // current time to prevent any concurrently running task - // handlers from picking any of these queued builds now - // and so potentially interfering with us by sending - // their `building` notification before we send our - // `queued` notification (see tenant::queued_timestamp - // for details). Note that the `queued` notification for - // the being built configuration doesn't require setting + // also set the package tenant's queued timestamp to + // prevent any concurrently running task handlers from + // picking any of these queued builds now and so + // potentially interfering with us by sending their + // `building` notification before we send our `queued` + // notification (see tenant::queued_timestamp for + // details). Note that the `queued` notification for the + // being built configuration doesn't require setting // this timestamp, since after the respective build // object is changed and updated in the database it may // not be picked by any task handler in the foreseeable @@ -2029,8 +2066,8 @@ handle (request& rq, response& rs) if (!qbs.empty ()) { - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + queued_timestamp = + update_queued_timestamp (t, qbs.size ()); } } } @@ -2275,9 +2312,9 @@ handle (request& rq, response& rs) // If we ought to call the // tenant_service_build_queued::build_queued() // callback for the queued builds, then also set the - // package tenant's queued timestamp to the current - // time to prevent the notifications race (see above - // building from scratch for details). + // package tenant's queued timestamp to prevent the + // notifications race (see above building from scratch + // for details). // if (!qbs.empty () || !rebuild_interrupted_rebuild) { @@ -2285,8 +2322,8 @@ handle (request& rq, response& rs) if (!qbs.empty ()) { - t->queued_timestamp = system_clock::now (); - build_db_->update (t); + queued_timestamp = + update_queued_timestamp (t, qbs.size ()); } } } @@ -2334,6 +2371,7 @@ handle (request& rq, response& rs) tsq = nullptr; tss = nullopt; qbs.clear (); + queued_timestamp = nullopt; } // If the task manifest is prepared, then bail out from the package @@ -2386,9 +2424,74 @@ handle (request& rq, response& rs) { conn = build_db_->connection (); - if (optional<string> data = - update_tenant_service_state (conn, ss.type, ss.id, f)) - ss.data = move (data); + // Now, as the `queued` notification is delivered, let's check if + // we have set the tenant's queued timestamp to prevent the race + // and, if that's the case, reset it. Not to start additional + // database transaction, we will do this together with the service + // data update. + // + optional<string> service_data; + bool queued_timestamp_reset (false); + + update_tenant_service_state ( + conn, ss.type, ss.id, + [&service_data, + &queued_timestamp_reset, + &queued_timestamp, + &f, + this] (const shared_ptr<build_tenant>& t) + { + // Reset, in case this is a retry after the recoverable + // database failure. + // + service_data = nullopt; + queued_timestamp_reset = false; + + if (t != nullptr) + { + // Reset the queued timestamp. + // + // Note that it may potentially be re-assigned by some other + // handler to some greater value (timeout calculated by the + // update_queued_timestamp() was too small, etc). Thus, + // let's only reset it if it was set by us. + // + if (t->queued_timestamp == queued_timestamp) + { + t->queued_timestamp = nullopt; + queued_timestamp_reset = true; + } + + // Update the service data. + // + assert (t->service); // Shouldn't be here otherwise. + + tenant_service& s (*t->service); + optional<string> data (f (t->id, s)); + + if (data) + s.data = move (*data); + + // If queued timestamp is reset and/or the service data is + // updated, then update the tenant object in the database. + // + if (queued_timestamp_reset || data) + build_db_->update (t); + + // If the service data is updated, then stash the new data. + // + if (data) + service_data = move (s.data); + } + }); + + if (service_data) + ss.data = move (service_data); + + // Make sure we will not try to reset the queued timestamp again. + // + if (queued_timestamp_reset) + queued_timestamp = nullopt; } } @@ -2461,6 +2564,34 @@ handle (request& rq, response& rs) } } + // Now, as all the potential tenant service notifications are delivered, + // let's check if we have set the tenant's queued timestamp to prevent + // the race and didn't reset it yet. If that's the case, let's reset it + // now. + // + if (queued_timestamp) + { + assert (tss); // Wouldn't be here otherwise. + + const build& b (*tss->second); + + if (conn == nullptr) + conn = build_db_->connection (); + + transaction tr (conn->begin ()); + + shared_ptr<build_tenant> t ( + build_db_->load<build_tenant> (b.tenant)); + + if (t->queued_timestamp == queued_timestamp) + { + t->queued_timestamp = nullopt; + build_db_->update (t); + } + + tr.commit (); + } + // If the task manifest is prepared, then check that the number of the // build auxiliary machines is less than 10. If that's not the case, // then turn the build into the built state with the abort status. diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx index 763842c..d49dfa2 100644 --- a/mod/mod-ci-github-gq.cxx +++ b/mod/mod-ci-github-gq.cxx @@ -820,8 +820,11 @@ namespace brep const string& iat, uint64_t ai, const string& rid, - const string& hs) + const string& hs, + size_t batch) { + assert (batch != 0); + // No support for result_status so state cannot be built. // #ifndef NDEBUG @@ -840,7 +843,7 @@ namespace brep // responding with 502). We handle this here by batching the creation. // size_t n (crs.size ()); - size_t b (n / 60 + (n % 60 != 0 ? 1 : 0)); + size_t b (n / batch + (n % batch != 0 ? 1 : 0)); size_t bn (n / b); auto i (crs.begin ()); diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx index 5fc75aa..2a8138a 100644 --- a/mod/mod-ci-github-gq.hxx +++ b/mod/mod-ci-github-gq.hxx @@ -39,7 +39,8 @@ namespace brep const string& installation_access_token, uint64_t app_id, const string& repository_id, - const string& head_sha); + const string& head_sha, + size_t batch); // Create a new check run on GitHub for a build in the queued or building // state. Note that the state cannot be built because in that case a diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx index cd45c4c..08ff2dc 100644 --- a/mod/mod-ci-github.cxx +++ b/mod/mod-ci-github.cxx @@ -1530,7 +1530,8 @@ namespace brep conclusion_building_summary}; if (gq_create_check_runs (error, check_runs, iat->token, - cr.check_run.app_id, repo_node_id, head_sha)) + cr.check_run.app_id, repo_node_id, head_sha, + options_->build_queued_batch ())) { assert (bcr.state == build_state::queued); assert (ccr.state == build_state::building); @@ -2420,7 +2421,8 @@ namespace brep iat->token, sd.app_id, sd.repository_node_id, - sd.report_sha)) + sd.report_sha, + options_->build_queued_batch ())) { for (const check_run& cr: crs) { diff --git a/mod/module.cli b/mod/module.cli index 57a5f31..f78cd06 100644 --- a/mod/module.cli +++ b/mod/module.cli @@ -305,13 +305,23 @@ namespace brep for the \cb{build-hard-rebuild-timeout} option." } - size_t build-queued-timeout = 30 + size_t build-queued-timeout = 15 { "<seconds>", - "Time to wait before assuming the \cb{queued} notifications are + "Time to wait before assuming the \cb{queued} notification batch is delivered for package CI requests submitted via third-party services - (GitHub, etc). During this time a package is not considered for a - build. Must be specified in seconds. Default is 30 seconds." + (GitHub, etc). Until all such batches are delivered, a package is + not considered for a build. Must be specified in seconds. Default + is 15 seconds. See also \cb{build-queued-batch}." + } + + size_t build-queued-batch = 60 + { + "<num>", + "The maximum number of build configurations in the \cb{queued} + notification batch for package CI requests submitted via third-party + services (see \cb{build-queued-timeout} for details). The default + is 60." } }; |