From 4ec9bd7bf1280b11afbfae50258380aecd3fa1b9 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 18 Apr 2023 01:06:53 +0300 Subject: Make random package ordering distribution more even in build task module --- mod/mod-build-task.cxx | 227 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 211 insertions(+), 16 deletions(-) diff --git a/mod/mod-build-task.cxx b/mod/mod-build-task.cxx index 9ce2520..33a7f58 100644 --- a/mod/mod-build-task.cxx +++ b/mod/mod-build-task.cxx @@ -607,10 +607,122 @@ handle (request& rq, response& rs) // starting from the random offset and wrapping around when reaching the // end. // + // Note, however, that since there can be some packages which are already + // built for all configurations and are not archived yet, picking an + // unbuilt package this way may not work as desired. Think of the + // following case with 5 packages in 3 non-archived tenants: + // + // 0: A - unbuilt, tenant 1 + // 1: B - built, tenant 2 + // 2: C - built, tenant 2 + // 3: D - built, tenant 2 + // 4: E - unbuilt, tenant 3 + // + // If we just pick a random starting offset in the [0, 4] range, then we + // will build A package with probability 0.2 and E with probability 0.8. + // + // To fix that we will only try to build a package from a tenant that the + // random starting offset refers to. Failed that, we will randomly pick + // new starting offset and retry. To make sure we don't retry indefinitely + // when there are no more packages to build (and also for the sake of + // optimization; see below), we will track positions of packages which we + // (unsuccessfully) have already tried to build and skip them while + // generating the random starting offsets and while iterating over + // packages. + // + // Also note that since we iterate over packages in chunks, each queried + // in a separate transaction, the number of packages may potentially + // increase or decrease while iterating over them. Thus, to keep things + // consistent, we may need to update our tried positions tracking state + // accordingly (not to cycle, not to refer to an entry out of the list + // boundaries, etc). Generally, regardless whether the number of packages + // has changed or not, the offsets and position statuses may now refer to + // some different packages. The only sensible thing we can do in such + // cases (without trying to detect this situation and restart from + // scratch) is to serve the request and issue some build task, if + // possible. + // + bool random (options_->build_package_order () == build_order::random); size_t start_offset (0); - optional package_count; - if (options_->build_package_order () == build_order::random) + // List of "tried to build" package statuses. True entries denote + // positions of packages which we have tried to build. Initially all + // entries are false. + // + vector tried_positions; + + // Number of false entries in the above vector. Used merely as an + // optimization to bail out. + // + size_t untried_positions_count (0); + + // Return a random position of a package that we have not yet tried to + // build, if present, and nullopt otherwise. + // + auto rand_position = [&tried_positions, + &untried_positions_count] () -> optional + { + assert (untried_positions_count <= tried_positions.size ()); + + if (untried_positions_count == 0) + return nullopt; + + size_t r; + while (tried_positions[r = rand (0, tried_positions.size () - 1)]) ; + return r; + }; + + // Mark the package at specified position as tried to build. Assume that + // it is not yet been tried to build. + // + auto position_tried = [&tried_positions, + &untried_positions_count] (size_t i) + { + assert (i < tried_positions.size () && + !tried_positions[i] && + untried_positions_count != 0); + + tried_positions[i] = true; + --untried_positions_count; + }; + + // Resize the tried positions list and update the untried positions + // counter accordingly if the package number has changed. + // + // For simplicity, assume that packages are added/removed to/from the end + // of the list. Note that misguessing in such a rare cases are possible + // but not harmful (see above for the reasoning). + // + auto resize_tried_positions = [&tried_positions, &untried_positions_count] + (size_t n) + { + if (n > tried_positions.size ()) // Packages added? + { + untried_positions_count += n - tried_positions.size (); + tried_positions.resize (n, false); + } + else if (n < tried_positions.size ()) // Packages removed? + { + for (size_t i (n); i != tried_positions.size (); ++i) + { + if (!tried_positions[i]) + { + assert (untried_positions_count != 0); + --untried_positions_count; + } + } + + tried_positions.resize (n); + } + else + { + // Not supposed to be called if the number of packages didn't change. + // + assert (false); + } + }; + + if (random) { using query = query; @@ -618,25 +730,45 @@ handle (request& rq, response& rs) transaction t (build_db_->begin ()); - // If there are any non-archived interactive build tennants, then we - // need to start from one of packages they contain. Note that packages - // from the interactive build tenants appear first (see below for the - // package ordering details). + // If there are any non-archived interactive build tenants, then the + // chosen randomization approach doesn't really work since interactive + // tenants must be preferred over non-interactive ones, which is + // achieved by proper ordering of the package query result (see + // below). Thus, we just disable randomization if there are any + // interactive tenants. // - package_count = + // But shouldn't we randomize the order between packages in multiple + // interactive tenants? Given that such a tenant may only contain a + // single package and can only be built in a single configuration that + // is probably not important. However, we may assume that the + // randomization still happens naturally due to the random nature of the + // tenant id, which is used as a primary sorting criteria (see below). + // + size_t interactive_package_count ( build_db_->query_value ( - q && query::build_tenant::interactive.is_not_null ()); + q && query::build_tenant::interactive.is_not_null ())); - if (*package_count == 0) - package_count = build_db_->query_value (q); + if (interactive_package_count == 0) + { + untried_positions_count = + build_db_->query_value (q); + } + else + random = false; t.commit (); - if (*package_count != 0) - start_offset = rand (0, *package_count - 1); + if (untried_positions_count != 0) + { + tried_positions.resize (untried_positions_count, false); + + optional so (rand_position ()); + assert (so); // Wouldn't be here otherwise. + start_offset = *so; + } } - if (!package_count || *package_count != 0) + if (!random || !tried_positions.empty ()) { // Specify the portion. // @@ -762,6 +894,10 @@ handle (request& rq, response& rs) return m.id; }; + // Tenant that the start offset refers to. + // + optional start_tenant; + for (bool done (false); tsm.session.empty () && !done; ) { transaction t (conn->begin ()); @@ -780,10 +916,24 @@ handle (request& rq, response& rs) // auto packages (pkg_prep_query.execute ()); + size_t chunk_size (packages.size ()); + size_t next_offset (offset + chunk_size); + + // If we are in the random package ordering mode, then also check if + // the package number has changed and, if that's the case, resize the + // tried positions list accordingly. + // + if (random && + (next_offset > tried_positions.size () || + (next_offset < tried_positions.size () && chunk_size < limit))) + { + resize_tried_positions (next_offset); + } + // Bail out if there is nothing left, unless we need to wrap around in // the random package ordering mode. // - if (packages.empty ()) + if (chunk_size == 0) { t.commit (); @@ -795,14 +945,59 @@ handle (request& rq, response& rs) continue; } - offset += packages.size (); + size_t position (offset); // Current package position. + offset = next_offset; - // Iterate over packages until we find one that needs building. + // Iterate over packages until we find one that needs building or have + // to bail out in the random package ordering mode for some reason (no + // more untried positions, need to restart, etc). // for (auto& bp: packages) { id = move (bp.id); + // If we are in the random package ordering mode, then cache the + // tenant the start offset refers to, if not cached yet, and check + // if we are still iterating over packages from this tenant + // otherwise. If the latter is not the case, then restart from a new + // random untried offset, if present, and bail out otherwise. + // + if (random) + { + if (!start_tenant) + { + start_tenant = id.tenant; + } + else if (*start_tenant != id.tenant) + { + if (optional so = rand_position ()) + { + start_offset = *so; + offset = start_offset; + start_tenant = nullopt; + limit = 50; + done = false; + } + else + done = true; + + break; + } + + size_t pos (position++); + + // Should have been resized, if required. + // + assert (pos < tried_positions.size ()); + + // Skip the position if it has already been tried. + // + if (tried_positions[pos]) + continue; + + position_tried (pos); + } + shared_ptr p (build_db_->load (id)); // Note that a request to interactively build a package in multiple -- cgit v1.1