aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2024-11-05 12:11:22 +0200
committerKaren Arutyunov <karen@codesynthesis.com>2024-11-05 15:27:37 +0200
commitf8353c612fb655348fdd69ff942a04560d8d55dc (patch)
tree4d4a18bfe46770c86fe7101c6aed06da15e0e345
parent153f38c301f48dc4c1c363ab81ca5a938fc83a7a (diff)
Retry database operations on recoverable failures in ci_start class functions
-rw-r--r--libbrep/build-package.hxx8
-rw-r--r--mod/ci-common.cxx470
-rw-r--r--mod/ci-common.hxx20
-rw-r--r--mod/database-module.cxx8
-rw-r--r--mod/mod-ci-github.cxx12
-rw-r--r--mod/mod-ci.cxx5
6 files changed, 331 insertions, 192 deletions
diff --git a/libbrep/build-package.hxx b/libbrep/build-package.hxx
index 14217b2..f022a0d 100644
--- a/libbrep/build-package.hxx
+++ b/libbrep/build-package.hxx
@@ -42,6 +42,10 @@ namespace brep
unloaded_timestamp (t),
unloaded_notify_interval (n) {}
+ // Create tenant for subsequent loading or incremental building.
+ //
+ build_tenant () = default;
+
string id;
bool private_ = false;
@@ -57,10 +61,6 @@ namespace brep
// Database mapping.
//
#pragma db member(id) id
-
- private:
- friend class odb::access;
- build_tenant () = default;
};
// Foreign object that is mapped to a subset of the repository object.
diff --git a/mod/ci-common.cxx b/mod/ci-common.cxx
index c7dfdb9..cc361d2 100644
--- a/mod/ci-common.cxx
+++ b/mod/ci-common.cxx
@@ -541,6 +541,7 @@ namespace brep
const basic_mark&,
const basic_mark* trace,
odb::core::database& db,
+ size_t retry,
tenant_service&& service,
duration notify_interval,
duration notify_delay,
@@ -551,107 +552,135 @@ namespace brep
assert (mode == duplicate_tenant_mode::fail || !service.id.empty ());
assert (!transaction::has_current ());
+ build_tenant t;
duplicate_tenant_result r (duplicate_tenant_result::created);
- transaction tr (db.begin ());
-
- // Unless we are in the 'fail on duplicate' mode, check if this service
- // type/id pair is already in use and, if that's the case, either ignore
- // it or reassign this service to a new tenant, canceling the old one.
- //
- if (mode != duplicate_tenant_mode::fail)
+ for (string request_id;;)
{
- using query = query<build_tenant>;
-
- shared_ptr<build_tenant> t (
- db.query_one<build_tenant> (query::service.id == service.id &&
- query::service.type == service.type));
- if (t != nullptr)
+ try
{
- // Reduce the replace_archived mode to the replace or ignore mode.
+ transaction tr (db.begin ());
+
+ // Unless we are in the 'fail on duplicate' mode, check if this
+ // service type/id pair is already in use and, if that's the case,
+ // either ignore it or reassign this service to a new tenant,
+ // canceling the old one.
//
- if (mode == duplicate_tenant_mode::replace_archived)
+ if (mode != duplicate_tenant_mode::fail)
{
- mode = (t->archived
- ? duplicate_tenant_mode::replace
- : duplicate_tenant_mode::ignore);
+ using query = query<build_tenant>;
+
+ shared_ptr<build_tenant> t (
+ db.query_one<build_tenant> (query::service.id == service.id &&
+ query::service.type == service.type));
+ if (t != nullptr)
+ {
+ // Reduce the replace_archived mode to the replace or ignore mode.
+ //
+ if (mode == duplicate_tenant_mode::replace_archived)
+ {
+ mode = (t->archived
+ ? duplicate_tenant_mode::replace
+ : duplicate_tenant_mode::ignore);
+ }
+
+ // Bail out in the ignore mode and cancel the tenant in the
+ // replace mode.
+ //
+ if (mode == duplicate_tenant_mode::ignore)
+ return make_pair (move (t->id), duplicate_tenant_result::ignored);
+
+ assert (mode == duplicate_tenant_mode::replace);
+
+ if (t->unloaded_timestamp)
+ {
+ db.erase (t);
+ }
+ else
+ {
+ t->service = nullopt;
+ t->archived = true;
+ db.update (t);
+ }
+
+ r = duplicate_tenant_result::replaced;
+ }
}
- // Bail out in the ignore mode and cancel the tenant in the replace
- // mode.
+ // Generate the request id.
//
- if (mode == duplicate_tenant_mode::ignore)
- return make_pair (move (t->id), duplicate_tenant_result::ignored);
-
- assert (mode == duplicate_tenant_mode::replace);
-
- if (t->unloaded_timestamp)
+ if (request_id.empty ())
+ try
{
- db.erase (t);
+ request_id = uuid::generate ().string ();
}
- else
+ catch (const system_error& e)
{
- t->service = nullopt;
- t->archived = true;
- db.update (t);
+ error << "unable to generate request id: " << e;
+ return nullopt;
}
- r = duplicate_tenant_result::replaced;
- }
- }
-
- // Generate the request id.
- //
- string request_id;
-
- try
- {
- request_id = uuid::generate ().string ();
- }
- catch (const system_error& e)
- {
- error << "unable to generate request id: " << e;
- return nullopt;
- }
-
- // Use the generated request id if the tenant service id is not specified.
- //
- if (service.id.empty ())
- service.id = request_id;
-
- build_tenant t (move (request_id),
- move (service),
- system_clock::now () - notify_interval + notify_delay,
- notify_interval);
- // Note that in contrast to brep-load, we know that the tenant id is
- // unique and thus we don't try to remove a tenant with such an id.
- // There is also not much reason to assume that we may have switched
- // from the single-tenant mode here and remove the respective tenant,
- // unless we are in the tenant-service functionality development mode.
- //
+ // Use the generated request id if the tenant service id is not
+ // specified.
+ //
+ if (service.id.empty ())
+ service.id = request_id;
+
+ t = build_tenant (move (request_id),
+ move (service),
+ system_clock::now () - notify_interval + notify_delay,
+ notify_interval);
+
+ // Note that in contrast to brep-load, we know that the tenant id is
+ // unique and thus we don't try to remove a tenant with such an id.
+ // There is also not much reason to assume that we may have switched
+ // from the single-tenant mode here and remove the respective tenant,
+ // unless we are in the tenant-service functionality development mode.
+ //
#ifdef BREP_CI_TENANT_SERVICE_UNLOADED
- cstrings ts ({""});
+ cstrings ts ({""});
- db.erase_query<build_package> (
- query<build_package>::id.tenant.in_range (ts.begin (), ts.end ()));
+ db.erase_query<build_package> (
+ query<build_package>::id.tenant.in_range (ts.begin (), ts.end ()));
- db.erase_query<build_repository> (
- query<build_repository>::id.tenant.in_range (ts.begin (), ts.end ()));
+ db.erase_query<build_repository> (
+ query<build_repository>::id.tenant.in_range (ts.begin (), ts.end ()));
- db.erase_query<build_public_key> (
- query<build_public_key>::id.tenant.in_range (ts.begin (), ts.end ()));
+ db.erase_query<build_public_key> (
+ query<build_public_key>::id.tenant.in_range (ts.begin (), ts.end ()));
- db.erase_query<build_tenant> (
- query<build_tenant>::id.in_range (ts.begin (), ts.end ()));
+ db.erase_query<build_tenant> (
+ query<build_tenant>::id.in_range (ts.begin (), ts.end ()));
#endif
- db.persist (t);
+ db.persist (t);
- tr.commit ();
+ tr.commit ();
- if (trace != nullptr)
- *trace << "unloaded CI request " << t.id << " for service "
- << t.service->id << ' ' << t.service->type << " is created";
+ if (trace != nullptr)
+ *trace << "unloaded CI request " << t.id << " for service "
+ << t.service->id << ' ' << t.service->type << " is created";
+
+ // Bail out if we have successfully erased, updated, or persisted the
+ // tenant object.
+ //
+ break;
+ }
+ catch (const odb::recoverable& e)
+ {
+ // If no more retries left, don't re-throw odb::recoverable not to
+ // retry at the upper level.
+ //
+ if (retry-- == 0)
+ throw runtime_error (e.what ());
+
+ // Prepare for the next iteration.
+ //
+ request_id = move (t.id);
+ service = move (*t.service);
+ r = duplicate_tenant_result::created;
+ }
+ }
return make_pair (move (t.id), r);
}
@@ -661,51 +690,69 @@ namespace brep
const basic_mark& warn,
const basic_mark* trace,
odb::core::database& db,
+ size_t retry,
tenant_service&& service,
const repository_location& repository) const
{
using namespace odb::core;
string request_id;
+
+ for (;;)
{
- assert (!transaction::has_current ());
+ try
+ {
+ assert (!transaction::has_current ());
- transaction tr (db.begin ());
+ transaction tr (db.begin ());
- using query = query<build_tenant>;
+ using query = query<build_tenant>;
- shared_ptr<build_tenant> t (
- db.query_one<build_tenant> (query::service.id == service.id &&
- query::service.type == service.type));
+ shared_ptr<build_tenant> t (
+ db.query_one<build_tenant> (query::service.id == service.id &&
+ query::service.type == service.type));
- if (t == nullptr)
- {
- error << "unable to find tenant for service " << service.id << ' '
- << service.type;
+ if (t == nullptr)
+ {
+ error << "unable to find tenant for service " << service.id << ' '
+ << service.type;
- return nullopt;
- }
- else if (t->archived)
- {
- error << "tenant " << t->id << " for service " << service.id << ' '
- << service.type << " is already archived";
+ return nullopt;
+ }
+ else if (t->archived)
+ {
+ error << "tenant " << t->id << " for service " << service.id << ' '
+ << service.type << " is already archived";
- return nullopt;
- }
- else if (!t->unloaded_timestamp)
- {
- error << "tenant " << t->id << " for service " << service.id << ' '
- << service.type << " is already loaded";
+ return nullopt;
+ }
+ else if (!t->unloaded_timestamp)
+ {
+ error << "tenant " << t->id << " for service " << service.id << ' '
+ << service.type << " is already loaded";
- return nullopt;
- }
+ return nullopt;
+ }
- t->unloaded_timestamp = nullopt;
- db.update (t);
+ t->unloaded_timestamp = nullopt;
+ db.update (t);
- tr.commit ();
+ tr.commit ();
- request_id = move (t->id);
+ request_id = move (t->id);
+
+ // Bail out if we have successfully updated the tenant object.
+ //
+ break;
+ }
+ catch (const odb::recoverable& e)
+ {
+ // If no more retries left, don't re-throw odb::recoverable not to
+ // retry at the upper level.
+ //
+ if (retry-- == 0)
+ throw runtime_error (e.what ());
+ }
}
assert (options_ != nullptr); // Shouldn't be called otherwise.
@@ -739,6 +786,7 @@ namespace brep
const basic_mark&,
const basic_mark* trace,
odb::core::database& db,
+ size_t retry,
const string& type,
const string& id) const
{
@@ -746,34 +794,57 @@ namespace brep
assert (!transaction::has_current ());
- transaction tr (db.begin ());
+ optional<tenant_service> r;
- using query = query<build_tenant>;
+ for (;;)
+ {
+ try
+ {
+ transaction tr (db.begin ());
- shared_ptr<build_tenant> t (
- db.query_one<build_tenant> (query::service.id == id &&
- query::service.type == type));
- if (t == nullptr)
- return nullopt;
+ using query = query<build_tenant>;
- optional<tenant_service> r (move (t->service));
+ shared_ptr<build_tenant> t (
+ db.query_one<build_tenant> (query::service.id == id &&
+ query::service.type == type));
+ if (t == nullptr)
+ return nullopt;
- if (t->unloaded_timestamp)
- {
- db.erase (t);
- }
- else
- {
- t->service = nullopt;
- t->archived = true;
- db.update (t);
- }
+ r = move (t->service);
- tr.commit ();
+ if (t->unloaded_timestamp)
+ {
+ db.erase (t);
+ }
+ else
+ {
+ t->service = nullopt;
+ t->archived = true;
+ db.update (t);
+ }
- if (trace != nullptr)
- *trace << "CI request " << t->id << " for service " << id << ' ' << type
- << " is canceled";
+ tr.commit ();
+
+ if (trace != nullptr)
+ *trace << "CI request " << t->id << " for service " << id << ' '
+ << type << " is canceled";
+
+ // Bail out if we have successfully updated or erased the tenant
+ // object.
+ //
+ break;
+ }
+ catch (const odb::recoverable& e)
+ {
+ // If no more retries left, don't re-throw odb::recoverable not to
+ // retry at the upper level.
+ //
+ if (retry-- == 0)
+ throw runtime_error (e.what ());
+
+ r = nullopt; // Prepare for the next iteration.
+ }
+ }
return r;
}
@@ -784,30 +855,50 @@ namespace brep
const basic_mark* trace,
const string& reason,
odb::core::database& db,
+ size_t retry,
const string& tid) const
{
using namespace odb::core;
assert (!transaction::has_current ());
- transaction tr (db.begin ());
+ for (;;)
+ {
+ try
+ {
+ transaction tr (db.begin ());
- shared_ptr<build_tenant> t (db.find<build_tenant> (tid));
+ shared_ptr<build_tenant> t (db.find<build_tenant> (tid));
- if (t == nullptr)
- return false;
+ if (t == nullptr)
+ return false;
- if (t->unloaded_timestamp)
- {
- db.erase (t);
- }
- else if (!t->archived)
- {
- t->archived = true;
- db.update (t);
- }
+ if (t->unloaded_timestamp)
+ {
+ db.erase (t);
+ }
+ else if (!t->archived)
+ {
+ t->archived = true;
+ db.update (t);
+ }
- tr.commit ();
+ tr.commit ();
+
+ // Bail out if we have successfully updated or erased the tenant
+ // object.
+ //
+ break;
+ }
+ catch (const odb::recoverable& e)
+ {
+ // If no more retries left, don't re-throw odb::recoverable not to
+ // retry at the upper level.
+ //
+ if (retry-- == 0)
+ throw runtime_error (e.what ());
+ }
+ }
if (trace != nullptr)
*trace << "CI request " << tid << " is canceled: "
@@ -820,61 +911,80 @@ namespace brep
optional<build_state> ci_start::
rebuild (odb::core::database& db,
+ size_t retry,
const build_id& id,
function<optional<string> (const tenant_service&,
build_state)> uf) const
{
using namespace odb::core;
- //@@@ Should we not retry transactions (here and in other functions that
- // update)?
+ build_state s;
- // NOTE: don't forget to update build_force::handle() if changing anything
- // here.
- //
- transaction t (db.begin ());
-
- package_build pb;
- if (!db.query_one<package_build> (query<package_build>::build::id == id,
- pb) ||
- pb.archived)
+ for (;;)
{
- return nullopt;
- }
-
- const shared_ptr<build>& b (pb.build);
- build_state s (b->state);
-
- if (s != build_state::queued)
- {
- force_state force (s == build_state::built
- ? force_state::forced
- : force_state::forcing);
-
- if (b->force != force)
- {
- b->force = force;
- db.update (b);
- }
-
- if (uf != nullptr)
+ try
{
- shared_ptr<build_tenant> t (db.load<build_tenant> (b->tenant));
+ // NOTE: don't forget to update build_force::handle() if changing
+ // anything here.
+ //
+ transaction t (db.begin ());
- assert (t->service);
+ package_build pb;
+ if (!db.query_one<package_build> (query<package_build>::build::id == id,
+ pb) ||
+ pb.archived)
+ {
+ return nullopt;
+ }
- tenant_service& ts (*t->service);
+ const shared_ptr<build>& b (pb.build);
+ s = b->state;
- if (optional<string> data = uf (ts, s))
+ if (s != build_state::queued)
{
- ts.data = move (*data);
- db.update (t);
+ force_state force (s == build_state::built
+ ? force_state::forced
+ : force_state::forcing);
+
+ if (b->force != force)
+ {
+ b->force = force;
+ db.update (b);
+ }
+
+ if (uf != nullptr)
+ {
+ shared_ptr<build_tenant> t (db.load<build_tenant> (b->tenant));
+
+ assert (t->service);
+
+ tenant_service& ts (*t->service);
+
+ if (optional<string> data = uf (ts, s))
+ {
+ ts.data = move (*data);
+ db.update (t);
+ }
+ }
}
+
+ t.commit ();
+
+ // Bail out if we have successfully updated the build and tenant
+ // objects.
+ //
+ break;
+ }
+ catch (const odb::recoverable& e)
+ {
+ // If no more retries left, don't re-throw odb::recoverable not to
+ // retry at the upper level.
+ //
+ if (retry-- == 0)
+ throw runtime_error (e.what ());
}
}
- t.commit ();
-
return s;
}
diff --git a/mod/ci-common.hxx b/mod/ci-common.hxx
index b9cf14a..6d21ba5 100644
--- a/mod/ci-common.hxx
+++ b/mod/ci-common.hxx
@@ -103,6 +103,9 @@ namespace brep
// Finally note that only duplicate_tenant_mode::fail can be used if the
// service id is empty.
//
+ // Repeat the attempts on the recoverable database failures (deadlocks,
+ // etc) and throw runtime_error if no more retries left.
+ //
// Note: should be called out of the database transaction.
//
enum class duplicate_tenant_mode {fail, ignore, replace, replace_archived};
@@ -113,6 +116,7 @@ namespace brep
const basic_mark& warn,
const basic_mark* trace,
odb::core::database&,
+ size_t retry,
tenant_service&&,
duration notify_interval,
duration notify_delay,
@@ -121,6 +125,9 @@ namespace brep
// Load (and start) previously created (as unloaded) CI request. Similarly
// to the start() function, return nullopt on an internal error.
//
+ // Repeat the attempts on the recoverable database failures (deadlocks,
+ // etc) and throw runtime_error if no more retries left.
+ //
// Note that tenant_service::id is used to identify the CI request tenant.
//
// Note: should be called out of the database transaction.
@@ -130,6 +137,7 @@ namespace brep
const basic_mark& warn,
const basic_mark* trace,
odb::core::database&,
+ size_t retry,
tenant_service&&,
const repository_location& repository) const;
@@ -142,6 +150,9 @@ namespace brep
// dropped. Note that the latter allow using unloaded tenants as a
// relatively cheap asynchronous execution mechanism.
//
+ // Repeat the attempts on the recoverable database failures (deadlocks,
+ // etc) and throw runtime_error if no more retries left.
+ //
// Note: should be called out of the database transaction.
//
optional<tenant_service>
@@ -149,6 +160,7 @@ namespace brep
const basic_mark& warn,
const basic_mark* trace,
odb::core::database&,
+ size_t retry,
const string& type,
const string& id) const;
@@ -161,6 +173,9 @@ namespace brep
// this version does not touch the service state (use the above version if
// you want to clear it).
//
+ // Repeat the attempts on the recoverable database failures (deadlocks,
+ // etc) and throw runtime_error if no more retries left.
+ //
// Note: should be called out of the database transaction.
//
bool
@@ -169,6 +184,7 @@ namespace brep
const basic_mark* trace,
const string& reason,
odb::core::database&,
+ size_t retry,
const string& tenant_id) const;
// Schedule the re-build of the package build and return the build object
@@ -207,10 +223,14 @@ namespace brep
// called if the rebuild was actually scheduled, that is, the current
// state is building or built.
//
+ // Repeat the attempts on the recoverable database failures (deadlocks,
+ // etc) and throw runtime_error if no more retries left.
+ //
// Note: should be called out of the database transaction.
//
optional<build_state>
rebuild (odb::core::database&,
+ size_t retry,
const build_id&,
function<optional<string> (const tenant_service&,
build_state)> = nullptr) const;
diff --git a/mod/database-module.cxx b/mod/database-module.cxx
index bbb3e59..bce8c93 100644
--- a/mod/database-module.cxx
+++ b/mod/database-module.cxx
@@ -119,10 +119,14 @@ namespace brep
}
catch (const odb::recoverable& e)
{
+ HANDLER_DIAG;
+
+ // If no more retries left, don't re-throw odb::recoverable not to
+ // retry at the upper level.
+ //
if (retry-- == 0)
- throw;
+ fail << e << "; no tenant service state update retries left";
- HANDLER_DIAG;
l1 ([&]{trace << e << "; " << retry + 1 << " tenant service "
<< "state update retries left";});
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
index a3e2586..0b04791 100644
--- a/mod/mod-ci-github.cxx
+++ b/mod/mod-ci-github.cxx
@@ -561,7 +561,7 @@ namespace brep
auto pr (create (error,
warn,
verb_ ? &trace : nullptr,
- *build_db_,
+ *build_db_, retry_,
tenant_service (sid, "ci-github", sd.json ()),
chrono::seconds (30) /* interval */,
chrono::seconds (0) /* delay */,
@@ -714,7 +714,7 @@ namespace brep
if (!create (error,
warn,
verb_ ? &trace : nullptr,
- *build_db_,
+ *build_db_, retry_,
move (ts),
chrono::seconds (30) /* interval */,
chrono::seconds (0) /* delay */))
@@ -838,7 +838,7 @@ namespace brep
// it gets archived after some timeout.
//
if (auto pr = create (error, warn, verb_ ? &trace : nullptr,
- *build_db_,
+ *build_db_, retry_,
tenant_service (sid, "ci-github", sd.json ()),
chrono::seconds (30) /* interval */,
chrono::seconds (0) /* delay */,
@@ -877,7 +877,9 @@ namespace brep
// Cancel the pre-check tenant.
//
if (!cancel (error, warn, verb_ ? &trace : nullptr,
- *build_db_, ts.type, ts.id))
+ *build_db_, retry_,
+ ts.type,
+ ts.id))
{
// Should never happen (no such tenant).
//
@@ -1028,7 +1030,7 @@ namespace brep
repository_location rl (move (ru), repository_type::git);
optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr,
- *build_db_,
+ *build_db_, retry_,
move (ts),
move (rl)));
diff --git a/mod/mod-ci.cxx b/mod/mod-ci.cxx
index 8c47bc4..52f4644 100644
--- a/mod/mod-ci.cxx
+++ b/mod/mod-ci.cxx
@@ -590,7 +590,10 @@ handle (request& rq, response& rs)
if (tid.empty ())
throw invalid_request (400, "invalid CI request id");
- if (!cancel (error, warn, verb_ ? &trace : nullptr, reason, *build_db_, tid))
+ if (!cancel (error, warn, verb_ ? &trace : nullptr,
+ reason,
+ *build_db_, retry_,
+ tid))
throw invalid_request (400, "unknown CI request id");
// We have all the data, so don't buffer the response content.