aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--etc/brep-module.conf15
-rw-r--r--etc/private/install/brep-module.conf15
-rw-r--r--libbrep/package.hxx17
-rw-r--r--mod/database-module.cxx56
-rw-r--r--mod/database-module.hxx16
-rw-r--r--mod/mod-build-force.cxx26
-rw-r--r--mod/mod-build-result.cxx24
-rw-r--r--mod/mod-build-task.cxx193
-rw-r--r--mod/mod-ci-github-gq.cxx7
-rw-r--r--mod/mod-ci-github-gq.hxx3
-rw-r--r--mod/mod-ci-github.cxx6
-rw-r--r--mod/module.cli18
12 files changed, 307 insertions, 89 deletions
diff --git a/etc/brep-module.conf b/etc/brep-module.conf
index cdf028a..834202a 100644
--- a/etc/brep-module.conf
+++ b/etc/brep-module.conf
@@ -209,12 +209,15 @@ menu About=?about
# build-alt-hard-rebuild-stop
-# Time to wait before assuming the 'queued' notifications are delivered for
-# package CI requests submitted via third-party services (GitHub, etc). During
-# this time a package is not considered for a build. Must be specified in
-# seconds. Default is 30 seconds.
-#
-# build-queued-timeout 30
+# Time to wait (in seconds) before assuming the 'queued' notification batch is
+# delivered and the maximum number of build configurations in such a batch for
+# package CI requests submitted via third-party services (GitHub, etc). Until
+# all such batches are delivered, a package is not considered for a build. By
+# default, assume the batch delivery time is 15 seconds and there are up to 60
+# build configurations in a batch.
+#
+# build-queued-timeout 15
+# build-queued-batch 60
# The maximum size of the build task request manifest accepted. Note that the
diff --git a/etc/private/install/brep-module.conf b/etc/private/install/brep-module.conf
index 2545a87..7711b82 100644
--- a/etc/private/install/brep-module.conf
+++ b/etc/private/install/brep-module.conf
@@ -209,12 +209,15 @@ menu About=?about
# build-alt-hard-rebuild-stop
-# Time to wait before assuming the 'queued' notifications are delivered for
-# package CI requests submitted via third-party services (GitHub, etc). During
-# this time a package is not considered for a build. Must be specified in
-# seconds. Default is 30 seconds.
-#
-# build-queued-timeout 30
+# Time to wait (in seconds) before assuming the 'queued' notification batch is
+# delivered and the maximum number of build configurations in such a batch for
+# package CI requests submitted via third-party services (GitHub, etc). Until
+# all such batches are delivered, a package is not considered for a build. By
+# default, assume the batch delivery time is 15 seconds and there are up to 60
+# build configurations in a batch.
+#
+# build-queued-timeout 15
+# build-queued-batch 60
# The maximum size of the build task request manifest accepted. Note that the
diff --git a/libbrep/package.hxx b/libbrep/package.hxx
index 2714d10..51af338 100644
--- a/libbrep/package.hxx
+++ b/libbrep/package.hxx
@@ -282,11 +282,18 @@ namespace brep
// for it. It feels like there is no easy way to reliably fix that.
// Instead, we just decrease the probability of such a notifications
// sequence failure by delaying builds of the freshly queued packages for
- // some time. Specifically, whenever the `queued` notification is ought
- // to be sent (normally out of the database transaction, since it likely
- // sends an HTTP request, etc) the tenant's queued_timestamp member is set
- // to the current time. During the configured time interval since that
- // time point the build tasks may not be issued for the tenant's packages.
+ // some time. Specifically, whenever the `queued` notification,
+ // potentially for multiple builds, is ought to be sent (normally out of
+ // the database transaction, since it likely sends one or more HTTP
+ // requests, etc) the tenant's queued_timestamp member is set to the time
+ // point when we assume this notification will finally be delivered to the
+ // service. This timestamp is calculated as the sum of the current time
+ // and some time interval, proportional to the number of the queued builds
+ // we need to notify the third party service about. Until this time point
+ // is reached, the build tasks may not be issued for the tenant's
+ // packages. The thinking here is that the bigger the number of builds we
+ // need to notify about, the longer it normally takes to deliver this
+ // notification.
//
// Also note that while there are similar potential races for other
// notification sequences, their probability is rather low due to the
diff --git a/mod/database-module.cxx b/mod/database-module.cxx
index 629e393..d838d5c 100644
--- a/mod/database-module.cxx
+++ b/mod/database-module.cxx
@@ -92,23 +92,18 @@ namespace brep
optional<string> r;
- for (size_t retry (retry_);; )
- {
- try
+ update_tenant_service_state (
+ conn, type, id,
+ [&r, &f, this] (const shared_ptr<build_tenant>& t)
{
- transaction tr (conn->begin ());
-
- using query = query<build_tenant>;
-
- shared_ptr<build_tenant> t (
- build_db_->query_one<build_tenant> (query::service.id == id &&
- query::service.type == type));
+ // Reset, in case this is a retry after the recoverable database
+ // failure.
+ //
+ r = nullopt;
if (t != nullptr)
{
- // Shouldn't be here otherwise.
- //
- assert (t->service);
+ assert (t->service); // Shouldn't be here otherwise.
tenant_service& s (*t->service);
@@ -120,6 +115,37 @@ namespace brep
r = move (s.data);
}
}
+ });
+
+ return r;
+ }
+
+ void database_module::
+ update_tenant_service_state (
+ const connection_ptr& conn,
+ const string& type,
+ const string& id,
+ const function<void (const shared_ptr<build_tenant>&)>& f)
+ {
+ assert (f != nullptr); // Shouldn't be called otherwise.
+
+ // Must be initialized via the init(options::build_db) function call.
+ //
+ assert (build_db_ != nullptr);
+
+ for (size_t retry (retry_);;)
+ {
+ try
+ {
+ transaction tr (conn->begin ());
+
+ using query = query<build_tenant>;
+
+ shared_ptr<build_tenant> t (
+ build_db_->query_one<build_tenant> (query::service.id == id &&
+ query::service.type == type));
+
+ f (t);
tr.commit ();
@@ -139,11 +165,7 @@ namespace brep
l1 ([&]{trace << e << "; " << retry + 1 << " tenant service "
<< "state update retries left";});
-
- r = nullopt; // Prepare for the next iteration.
}
}
-
- return r;
}
}
diff --git a/mod/database-module.hxx b/mod/database-module.hxx
index 76f13d4..29b4cb7 100644
--- a/mod/database-module.hxx
+++ b/mod/database-module.hxx
@@ -14,6 +14,7 @@
namespace brep
{
+ class build_tenant;
struct tenant_service;
// A handler that utilises the database. Specifically, it will retry the
@@ -74,6 +75,21 @@ namespace brep
const function<optional<string> (const string& tenant_id,
const tenant_service&)>&);
+ // A low-level version of the above.
+ //
+ // Specifically, the specified function is expected to update the
+ // tenant-associated service state directly, if required. While at it, it
+ // may also update some other tenant members. Note that if no tenant with
+ // the specified service type/id exists in the database, then the function
+ // will be called with the NULL pointer.
+ //
+ void
+ update_tenant_service_state (
+ const odb::core::connection_ptr&,
+ const string& type,
+ const string& id,
+ const function<void (const shared_ptr<build_tenant>&)>&);
+
protected:
size_t retry_ = 0; // Max of all retries.
diff --git a/mod/mod-build-force.cxx b/mod/mod-build-force.cxx
index 146acd9..25b860d 100644
--- a/mod/mod-build-force.cxx
+++ b/mod/mod-build-force.cxx
@@ -3,6 +3,8 @@
#include <mod/mod-build-force.hxx>
+#include <chrono>
+
#include <odb/database.hxx>
#include <odb/transaction.hxx>
@@ -260,11 +262,11 @@ handle (request& rq, response& rs)
// If we ought to call the
// tenant_service_build_queued::build_queued() callback, then also
- // set the package tenant's queued timestamp to the current time
- // to prevent the task handler from picking the build and
- // potentially interfering with us by sending its `building`
- // notification before we send our `queued` notification (see
- // tenant::queued_timestamp for details).
+ // set the package tenant's queued timestamp to prevent the task
+ // handler from picking the build and potentially interfering with
+ // us by sending its `building` notification before we send our
+ // `queued` notification (see tenant::queued_timestamp for
+ // details).
//
if (tsq != nullptr)
{
@@ -280,10 +282,18 @@ handle (request& rq, response& rs)
qhs = tenant_service_build_queued::build_queued_hints {
tpc == 1, p->configs.size () == 1};
- // Set the package tenant's queued timestamp.
+ // Set the package tenant's queued timestamp, unless it is
+ // already set to the same or greater value.
//
- t->queued_timestamp = system_clock::now ();
- build_db_->update (t);
+ timestamp ts (
+ system_clock::now () +
+ chrono::seconds (options_->build_queued_timeout ()));
+
+ if (!t->queued_timestamp || *t->queued_timestamp < ts)
+ {
+ t->queued_timestamp = ts;
+ build_db_->update (t);
+ }
tss = make_pair (move (*t->service), move (b));
}
diff --git a/mod/mod-build-result.cxx b/mod/mod-build-result.cxx
index 666e7ef..38f13a6 100644
--- a/mod/mod-build-result.cxx
+++ b/mod/mod-build-result.cxx
@@ -3,6 +3,8 @@
#include <mod/mod-build-result.hxx>
+#include <chrono>
+
#include <odb/database.hxx>
#include <odb/transaction.hxx>
@@ -354,10 +356,10 @@ handle (request& rq, response&)
// If we ought to call the tenant_service_build_queued::build_queued()
// callback, then also set the package tenant's queued timestamp to
- // the current time to prevent the task handler from picking the build
- // and potentially interfering with us by sending its `building`
- // notification before we send our `queued` notification (see
- // tenant::queued_timestamp for details).
+ // prevent the task handler from picking the build and potentially
+ // interfering with us by sending its `building` notification before
+ // we send our `queued` notification (see tenant::queued_timestamp for
+ // details).
//
if (tsq != nullptr)
{
@@ -375,10 +377,18 @@ handle (request& rq, response&)
qhs = tenant_service_build_queued::build_queued_hints {
tpc == 1, p->configs.size () == 1};
- // Set the package tenant's queued timestamp.
+ // Set the package tenant's queued timestamp, unless it is already
+ // set to the same or greater value.
//
- t->queued_timestamp = system_clock::now ();
- build_db_->update (t);
+ timestamp ts (
+ system_clock::now () +
+ chrono::seconds (options_->build_queued_timeout ()));
+
+ if (!t->queued_timestamp || *t->queued_timestamp < ts)
+ {
+ t->queued_timestamp = ts;
+ build_db_->update (t);
+ }
}
}
else // Regular or skip build result.
diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx
index e769f6a..5490e8a 100644
--- a/mod/mod-build-task.cxx
+++ b/mod/mod-build-task.cxx
@@ -129,6 +129,9 @@ init (scanner& s)
options_->build_alt_hard_rebuild_stop_specified ())
bad_alt ("hard");
+ if (options_->build_queued_batch () == 0)
+ fail << "build-queued-batch must not be 0";
+
database_module::init (*options_, options_->build_db_retry ());
// Check that the database 'build' schema matches the current one. It's
@@ -156,8 +159,7 @@ template <typename T>
static inline query<T>
package_query (bool custom_bot,
brep::params::build_task& params,
- interactive_mode imode,
- uint64_t queued_expiration_ns)
+ interactive_mode imode)
{
using namespace brep;
using query = query<T>;
@@ -258,7 +260,9 @@ package_query (bool custom_bot,
return q &&
(query::build_tenant::queued_timestamp.is_null () ||
- query::build_tenant::queued_timestamp < queued_expiration_ns);
+ query::build_tenant::queued_timestamp <
+ chrono::duration_cast<chrono::nanoseconds> (
+ system_clock::now ().time_since_epoch ()).count ());
}
bool brep::build_task::
@@ -540,7 +544,7 @@ handle (request& rq, response& rs)
const config_machine& cm) -> task_response_manifest
{
uint64_t ts (
- chrono::duration_cast<std::chrono::nanoseconds> (
+ chrono::duration_cast<chrono::nanoseconds> (
b.timestamp.time_since_epoch ()).count ());
string session (b.tenant + '/' +
@@ -633,8 +637,8 @@ handle (request& rq, response& rs)
move (task));
};
- // Calculate the build/rebuild (building/built state) and the `queued`
- // notifications expiration time for package configurations.
+ // Calculate the build/rebuild (building/built state) for package
+ // configurations.
//
timestamp now (system_clock::now ());
@@ -658,9 +662,6 @@ handle (request& rq, response& rs)
timestamp forced_rebuild_expiration (
expiration (options_->build_forced_rebuild_timeout ()));
- uint64_t queued_expiration_ns (
- expiration_ns (options_->build_queued_timeout ()));
-
// Calculate the soft/hard rebuild expiration time, based on the
// respective build-{soft,hard}-rebuild-timeout and
// build-alt-{soft,hard}-rebuild-{start,stop,timeout} configuration
@@ -777,8 +778,7 @@ handle (request& rq, response& rs)
pkg_query pq (package_query<buildable_package> (custom_bot,
params,
- imode,
- queued_expiration_ns));
+ imode));
// Transform (in-place) the interactive login information into the actual
// login command, if specified in the manifest and the transformation
@@ -934,8 +934,7 @@ handle (request& rq, response& rs)
query q (package_query<buildable_package_count> (custom_bot,
params,
- imode,
- queued_expiration_ns));
+ imode));
if (conn == nullptr)
conn = build_db_->connection ();
@@ -1230,6 +1229,44 @@ handle (request& rq, response& rs)
tpc == 1, p.configs.size () == 1};
};
+ // Calculate the tenant's queued timestamp based on the number of queued
+ // builds, update the tenant, and return the resulting timestamp. Must
+ // be called inside the build db transaction.
+ //
+ // Note that it feels unlikely that the tenant queued timestamp is
+ // already set to some greater value. Let's, for good measure, consider
+ // such a possibility and, if that's the case, leave the timestamp
+ // intact and return nullopt.
+ //
+ auto update_queued_timestamp = [this] (const shared_ptr<build_tenant>& t,
+ size_t queued_builds)
+ -> optional<timestamp>
+ {
+ assert (transaction::has_current ());
+
+ // For simplicity, let's assume that the total `queued` notification
+ // delivery time is proportional to the number of build configuration
+ // batches.
+ //
+ size_t b (options_->build_queued_batch ());
+ size_t n (queued_builds / b + (queued_builds % b != 0 ? 1 : 0));
+
+ timestamp ts (system_clock::now () +
+ chrono::seconds (n * options_->build_queued_timeout ()));
+
+ if (t->queued_timestamp && *t->queued_timestamp > ts)
+ return nullopt;
+
+ t->queued_timestamp = ts;
+ build_db_->update (t);
+
+ return t->queued_timestamp;
+ };
+
+ // Stash the tenant queued timestamp, if we set it.
+ //
+ optional<timestamp> queued_timestamp;
+
// Collect the auxiliary machines required for testing of the specified
// package configuration and the external test packages, if present for
// the specified target configuration (task_auxiliary_machines),
@@ -2006,14 +2043,14 @@ handle (request& rq, response& rs)
// If we ought to call the
// tenant_service_build_queued::build_queued() callback
// for the queued builds (qbs is not empty), then we
- // also set the package tenant's queued timestamp to the
- // current time to prevent any concurrently running task
- // handlers from picking any of these queued builds now
- // and so potentially interfering with us by sending
- // their `building` notification before we send our
- // `queued` notification (see tenant::queued_timestamp
- // for details). Note that the `queued` notification for
- // the being built configuration doesn't require setting
+ // also set the package tenant's queued timestamp to
+ // prevent any concurrently running task handlers from
+ // picking any of these queued builds now and so
+ // potentially interfering with us by sending their
+ // `building` notification before we send our `queued`
+ // notification (see tenant::queued_timestamp for
+ // details). Note that the `queued` notification for the
+ // being built configuration doesn't require setting
// this timestamp, since after the respective build
// object is changed and updated in the database it may
// not be picked by any task handler in the foreseeable
@@ -2029,8 +2066,8 @@ handle (request& rq, response& rs)
if (!qbs.empty ())
{
- t->queued_timestamp = system_clock::now ();
- build_db_->update (t);
+ queued_timestamp =
+ update_queued_timestamp (t, qbs.size ());
}
}
}
@@ -2275,9 +2312,9 @@ handle (request& rq, response& rs)
// If we ought to call the
// tenant_service_build_queued::build_queued()
// callback for the queued builds, then also set the
- // package tenant's queued timestamp to the current
- // time to prevent the notifications race (see above
- // building from scratch for details).
+ // package tenant's queued timestamp to prevent the
+ // notifications race (see above building from scratch
+ // for details).
//
if (!qbs.empty () || !rebuild_interrupted_rebuild)
{
@@ -2285,8 +2322,8 @@ handle (request& rq, response& rs)
if (!qbs.empty ())
{
- t->queued_timestamp = system_clock::now ();
- build_db_->update (t);
+ queued_timestamp =
+ update_queued_timestamp (t, qbs.size ());
}
}
}
@@ -2334,6 +2371,7 @@ handle (request& rq, response& rs)
tsq = nullptr;
tss = nullopt;
qbs.clear ();
+ queued_timestamp = nullopt;
}
// If the task manifest is prepared, then bail out from the package
@@ -2386,9 +2424,74 @@ handle (request& rq, response& rs)
{
conn = build_db_->connection ();
- if (optional<string> data =
- update_tenant_service_state (conn, ss.type, ss.id, f))
- ss.data = move (data);
+ // Now, as the `queued` notification is delivered, let's check if
+ // we have set the tenant's queued timestamp to prevent the race
+ // and, if that's the case, reset it. Not to start additional
+ // database transaction, we will do this together with the service
+ // data update.
+ //
+ optional<string> service_data;
+ bool queued_timestamp_reset (false);
+
+ update_tenant_service_state (
+ conn, ss.type, ss.id,
+ [&service_data,
+ &queued_timestamp_reset,
+ &queued_timestamp,
+ &f,
+ this] (const shared_ptr<build_tenant>& t)
+ {
+ // Reset, in case this is a retry after the recoverable
+ // database failure.
+ //
+ service_data = nullopt;
+ queued_timestamp_reset = false;
+
+ if (t != nullptr)
+ {
+ // Reset the queued timestamp.
+ //
+ // Note that it may potentially be re-assigned by some other
+ // handler to some greater value (timeout calculated by the
+ // update_queued_timestamp() was too small, etc). Thus,
+ // let's only reset it if it was set by us.
+ //
+ if (t->queued_timestamp == queued_timestamp)
+ {
+ t->queued_timestamp = nullopt;
+ queued_timestamp_reset = true;
+ }
+
+ // Update the service data.
+ //
+ assert (t->service); // Shouldn't be here otherwise.
+
+ tenant_service& s (*t->service);
+ optional<string> data (f (t->id, s));
+
+ if (data)
+ s.data = move (*data);
+
+ // If queued timestamp is reset and/or the service data is
+ // updated, then update the tenant object in the database.
+ //
+ if (queued_timestamp_reset || data)
+ build_db_->update (t);
+
+ // If the service data is updated, then stash the new data.
+ //
+ if (data)
+ service_data = move (s.data);
+ }
+ });
+
+ if (service_data)
+ ss.data = move (service_data);
+
+ // Make sure we will not try to reset the queued timestamp again.
+ //
+ if (queued_timestamp_reset)
+ queued_timestamp = nullopt;
}
}
@@ -2461,6 +2564,34 @@ handle (request& rq, response& rs)
}
}
+ // Now, as all the potential tenant service notifications are delivered,
+ // let's check if we have set the tenant's queued timestamp to prevent
+ // the race and didn't reset it yet. If that's the case, let's reset it
+ // now.
+ //
+ if (queued_timestamp)
+ {
+ assert (tss); // Wouldn't be here otherwise.
+
+ const build& b (*tss->second);
+
+ if (conn == nullptr)
+ conn = build_db_->connection ();
+
+ transaction tr (conn->begin ());
+
+ shared_ptr<build_tenant> t (
+ build_db_->load<build_tenant> (b.tenant));
+
+ if (t->queued_timestamp == queued_timestamp)
+ {
+ t->queued_timestamp = nullopt;
+ build_db_->update (t);
+ }
+
+ tr.commit ();
+ }
+
// If the task manifest is prepared, then check that the number of the
// build auxiliary machines is less than 10. If that's not the case,
// then turn the build into the built state with the abort status.
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx
index 763842c..d49dfa2 100644
--- a/mod/mod-ci-github-gq.cxx
+++ b/mod/mod-ci-github-gq.cxx
@@ -820,8 +820,11 @@ namespace brep
const string& iat,
uint64_t ai,
const string& rid,
- const string& hs)
+ const string& hs,
+ size_t batch)
{
+ assert (batch != 0);
+
// No support for result_status so state cannot be built.
//
#ifndef NDEBUG
@@ -840,7 +843,7 @@ namespace brep
// responding with 502). We handle this here by batching the creation.
//
size_t n (crs.size ());
- size_t b (n / 60 + (n % 60 != 0 ? 1 : 0));
+ size_t b (n / batch + (n % batch != 0 ? 1 : 0));
size_t bn (n / b);
auto i (crs.begin ());
diff --git a/mod/mod-ci-github-gq.hxx b/mod/mod-ci-github-gq.hxx
index 5fc75aa..2a8138a 100644
--- a/mod/mod-ci-github-gq.hxx
+++ b/mod/mod-ci-github-gq.hxx
@@ -39,7 +39,8 @@ namespace brep
const string& installation_access_token,
uint64_t app_id,
const string& repository_id,
- const string& head_sha);
+ const string& head_sha,
+ size_t batch);
// Create a new check run on GitHub for a build in the queued or building
// state. Note that the state cannot be built because in that case a
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
index cd45c4c..08ff2dc 100644
--- a/mod/mod-ci-github.cxx
+++ b/mod/mod-ci-github.cxx
@@ -1530,7 +1530,8 @@ namespace brep
conclusion_building_summary};
if (gq_create_check_runs (error, check_runs, iat->token,
- cr.check_run.app_id, repo_node_id, head_sha))
+ cr.check_run.app_id, repo_node_id, head_sha,
+ options_->build_queued_batch ()))
{
assert (bcr.state == build_state::queued);
assert (ccr.state == build_state::building);
@@ -2420,7 +2421,8 @@ namespace brep
iat->token,
sd.app_id,
sd.repository_node_id,
- sd.report_sha))
+ sd.report_sha,
+ options_->build_queued_batch ()))
{
for (const check_run& cr: crs)
{
diff --git a/mod/module.cli b/mod/module.cli
index 57a5f31..f78cd06 100644
--- a/mod/module.cli
+++ b/mod/module.cli
@@ -305,13 +305,23 @@ namespace brep
for the \cb{build-hard-rebuild-timeout} option."
}
- size_t build-queued-timeout = 30
+ size_t build-queued-timeout = 15
{
"<seconds>",
- "Time to wait before assuming the \cb{queued} notifications are
+ "Time to wait before assuming the \cb{queued} notification batch is
delivered for package CI requests submitted via third-party services
- (GitHub, etc). During this time a package is not considered for a
- build. Must be specified in seconds. Default is 30 seconds."
+ (GitHub, etc). Until all such batches are delivered, a package is
+ not considered for a build. Must be specified in seconds. Default
+ is 15 seconds. See also \cb{build-queued-batch}."
+ }
+
+ size_t build-queued-batch = 60
+ {
+ "<num>",
+ "The maximum number of build configurations in the \cb{queued}
+ notification batch for package CI requests submitted via third-party
+ services (see \cb{build-queued-timeout} for details). The default
+ is 60."
}
};