From 48f1c1feda91d9809406c83569443eedc9ea799e Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 14 Jul 2023 22:27:02 +0300 Subject: Optimize builds page by discouraging PostgreSQL from using the nested loop join strategy --- mod/database.cxx | 6 +- mod/mod-builds.cxx | 389 +++++++++++++++++------------------- mod/mod-package-version-details.cxx | 6 +- 3 files changed, 194 insertions(+), 207 deletions(-) (limited to 'mod') diff --git a/mod/database.cxx b/mod/database.cxx index c88555a..02d521d 100644 --- a/mod/database.cxx +++ b/mod/database.cxx @@ -24,10 +24,10 @@ namespace brep operator< (const db_key& x, const db_key& y) { int r; - if ((r = x.user.compare (y.user)) != 0 || - (r = x.role.compare (y.role)) != 0 || + if ((r = x.user.compare (y.user)) != 0 || + (r = x.role.compare (y.role)) != 0 || (r = x.password.compare (y.password)) != 0 || - (r = x.name.compare (y.name)) != 0 || + (r = x.name.compare (y.name)) != 0 || (r = x.host.compare (y.host))) return r < 0; diff --git a/mod/mod-builds.cxx b/mod/mod-builds.cxx index bd87d84..58aa3aa 100644 --- a/mod/mod-builds.cxx +++ b/mod/mod-builds.cxx @@ -533,33 +533,19 @@ handle (request& rq, response& rs) vector builds; builds.reserve (page_configs); - // Prepare the package build prepared query. + // Prepare the package build query. // using query = query; - using prep_query = prepared_query; query q (build_query (&conf_ids, params, tn)); - // Specify the portion. Note that we will be querying builds in chunks, - // not to hold locks for too long. - // - // Also note that for each build we also load the corresponding - // package. Nevertheless, we use a fairly large portion to speed-up the - // builds traversal but also cache the package objects (see below). - // - size_t offset (0); - // Print package build configurations ordered by the timestamp (later goes // first). // - q += "ORDER BY" + query::build::timestamp + "DESC" + - "OFFSET" + query::_ref (offset) + "LIMIT 500"; + q += "ORDER BY" + query::build::timestamp + "DESC"; connection_ptr conn (build_db_->connection ()); - prep_query pq ( - conn->prepare_query ("mod-builds-query", q)); - // Note that we can't skip the proper number of builds in the database // query for a page numbers greater than one. So we will query builds from // the very beginning and skip the appropriate number of them while @@ -575,103 +561,102 @@ handle (request& rq, response& rs) // session sn; - for (bool ne (true); ne; ) + transaction t (conn->begin ()); + + // For some reason PostgreSQL (as of 9.4) picks the nested loop join + // strategy for the below package_build query, which executes quite slow + // even for reasonably small number of builds. Thus, we just discourage + // PostgreSQL from using this strategy in the current transaction. + // + // @@ TMP Re-check for the later PostgreSQL versions if we can drop this + // hint. If drop, then also grep for other places where this hint + // is used. + // + conn->execute ("SET LOCAL enable_nestloop=off"); + + // Iterate over builds and cache build objects that should be printed. + // Skip the appropriate number of them (for page number greater than + // one). + // + for (auto& pb: build_db_->query (q)) { - transaction t (conn->begin ()); + shared_ptr& b (pb.build); - // Query package builds (and cache the result). - // - auto bs (pq.execute ()); + auto i ( + target_conf_map_->find ( + build_target_config_id {b->target, b->target_config_name})); - if ((ne = !bs.empty ())) - { - offset += bs.size (); + assert (i != target_conf_map_->end ()); - // Iterate over builds and cache build objects that should be printed. - // Skip the appropriate number of them (for page number greater than - // one). - // - for (auto& pb: bs) - { - shared_ptr& b (pb.build); + // Match the target configuration against the package build + // configuration expressions/constraints. + // + shared_ptr p ( + build_db_->load (b->id.package)); - auto i (target_conf_map_->find ( - build_target_config_id {b->target, - b->target_config_name})); + const build_package_config* pc (find (b->package_config_name, + p->configs)); - assert (i != target_conf_map_->end ()); + // The package configuration should be present since the configurations + // set cannot change if the package version doesn't change. If that's + // not the case, then the database has probably been manually amended. + // In this case let's just skip such a build as if it excluded and log + // the warning. + // + if (pc == nullptr) + { + warn << "cannot find configuration '" << b->package_config_name + << "' for package " << p->id.name << '/' << p->version; - // Match the target configuration against the package build - // configuration expressions/constraints. - // - shared_ptr p ( - build_db_->load (b->id.package)); + continue; + } - const build_package_config* pc (find (b->package_config_name, - p->configs)); + if (!p->constraints_section.loaded ()) + build_db_->load (*p, p->constraints_section); - // The package configuration should be present since the - // configurations set cannot change if the package version doesn't - // change. If that's not the case, then the database has probably - // been manually amended. In this case let's just skip such a build - // as if it excluded and log the warning. + if (!exclude (*pc, p->builds, p->constraints, *i->second)) + { + if (skip != 0) + --skip; + else if (print != 0) + { + // As we query builds in multiple transactions we may see the same + // build multiple times. Let's skip the duplicates. Note: we don't + // increment the counter in this case. // - if (pc == nullptr) - { - warn << "cannot find configuration '" << b->package_config_name - << "' for package " << p->id.name << '/' << p->version; - + if (find_if (builds.begin (), builds.end (), + [&b] (const package_build& pb) + { + return b->id == pb.build->id; + }) != builds.end ()) continue; - } - build_db_->load (*p, p->constraints_section); - - if (!exclude (*pc, p->builds, p->constraints, *i->second)) + if (b->state == build_state::built) { - if (skip != 0) - --skip; - else if (print != 0) - { - // As we query builds in multiple transactions we may see the - // same build multiple times. Let's skip the duplicates. Note: - // we don't increment the counter in this case. - // - if (find_if (builds.begin (), builds.end (), - [&b] (const package_build& pb) - { - return b->id == pb.build->id; - }) != builds.end ()) - continue; - - if (b->state == build_state::built) - { - build_db_->load (*b, b->results_section); + build_db_->load (*b, b->results_section); - // Let's clear unneeded result logs for builds being cached. - // - for (operation_result& r: b->results) - r.log.clear (); - } + // Let's clear unneeded result logs for builds being cached. + // + for (operation_result& r: b->results) + r.log.clear (); + } - builds.push_back (move (pb)); + builds.push_back (move (pb)); - --print; - } - - ++count; - } + --print; } - } - // - // Print the filter form after the build count is calculated. Note: - // query_toolchains() must be called inside the build db transaction. - // - else - print_form (query_toolchains (), count); - t.commit (); + ++count; + } } + // Print the filter form after the build count is calculated. Note: + // query_toolchains() must be called inside the build db transaction. + // + print_form (query_toolchains (), count); + + t.commit (); + // Finally, print the cached package build configurations. // timestamp now (system_clock::now ()); @@ -750,9 +735,27 @@ handle (request& rq, response& rs) const bpkg::version& toolchain_version; }; + // Cache the build package objects that would otherwise be loaded twice: + // first time during calculating the builds count and then during printing + // the builds. Note that the build package is a subset of the package + // object and normally has a small memory footprint. + // + // @@ TMP It feels that we can try to combine the mentioned steps and + // improve the performance a bit. We won't need the session in this + // case. + // + session sn; + + connection_ptr conn (build_db_->connection ()); + transaction t (conn->begin ()); + + // Discourage PostgreSQL from using the nested loop join strategy in the + // current transaction (see above for details). + // + conn->execute ("SET LOCAL enable_nestloop=off"); + vector config_toolchains; { - transaction t (build_db_->begin ()); toolchains = query_toolchains (); string th_name; @@ -925,8 +928,6 @@ handle (request& rq, response& rs) } else count = 0; - - t.commit (); } // Print the filter form. @@ -944,7 +945,7 @@ handle (request& rq, response& rs) // 7: target configuration name // 8: package configuration name // - // Prepare the build package prepared query. + // Prepare the build package query. // // Note that we can't skip the proper number of packages in the database // query for a page numbers greater than one. So we will query packages @@ -959,27 +960,14 @@ handle (request& rq, response& rs) // URL query parameter. Alternatively, we can invent the page number cap. // using pkg_query = query; - using prep_pkg_query = prepared_query; pkg_query pq (package_query (params, tn)); - // Specify the portion. Note that we will still be querying packages in - // chunks, not to hold locks for too long. For each package we will query - // its builds, so let's keep the portion small. - // - size_t offset (0); - pq += "ORDER BY" + pkg_query::build_package::id.name + order_by_version_desc (pkg_query::build_package::id.version, false /* first */) + "," + - pkg_query::build_package::id.tenant + - "OFFSET" + pkg_query::_ref (offset) + "LIMIT 50"; - - connection_ptr conn (build_db_->connection ()); - - prep_pkg_query pkg_prep_query ( - conn->prepare_query ("mod-builds-package-query", pq)); + pkg_query::build_package::id.tenant; // Prepare the build prepared query. // @@ -1010,120 +998,115 @@ handle (request& rq, response& rs) // Enclose the subsequent tables to be able to use nth-child CSS selector. // s << DIV; - while (print != 0) - { - transaction t (conn->begin ()); - // Query (and cache) buildable packages. - // - auto packages (pkg_prep_query.execute ()); + // Query (and cache) buildable packages. + // + auto packages (build_db_->query (pq)); - if (packages.empty ()) - print = 0; - else + if (packages.empty ()) + print = 0; + else + { + // Iterate over packages and print unbuilt configurations. Skip the + // appropriate number of them first (for page number greater than one). + // + for (auto& bp: packages) { - offset += packages.size (); + shared_ptr& p (bp.package); - // Iterate over packages and print unbuilt configurations. Skip the - // appropriate number of them first (for page number greater than one). - // - for (auto& bp: packages) - { - shared_ptr& p (bp.package); + id = p->id; - id = p->id; - - // Copy configuration/toolchain combinations for this package, - // skipping excluded configurations. - // - set unbuilt_configs; + // Copy configuration/toolchain combinations for this package, + // skipping excluded configurations. + // + set unbuilt_configs; - // Load the constrains section lazily. + // Load the constrains section lazily. + // + for (const build_package_config& pc: p->configs) + { + // Filter by package config name. // - for (const build_package_config& pc: p->configs) + if (pkg_cfg.empty () || path_match (pc.name, pkg_cfg)) { - // Filter by package config name. - // - if (pkg_cfg.empty () || path_match (pc.name, pkg_cfg)) + for (const target_config_toolchain& ct: config_toolchains) { - for (const target_config_toolchain& ct: config_toolchains) - { - auto i ( - target_conf_map_->find ( - build_target_config_id {ct.target, ct.target_config})); - - assert (i != target_conf_map_->end ()); - - if (!p->constraints_section.loaded ()) - build_db_->load (*p, p->constraints_section); - - if (!exclude (pc, p->builds, p->constraints, *i->second)) - unbuilt_configs.insert ( - config_toolchain {ct.target, - ct.target_config, - pc.name, - ct.toolchain_name, - ct.toolchain_version}); - } + auto i ( + target_conf_map_->find ( + build_target_config_id {ct.target, ct.target_config})); + + assert (i != target_conf_map_->end ()); + + if (!p->constraints_section.loaded ()) + build_db_->load (*p, p->constraints_section); + + if (!exclude (pc, p->builds, p->constraints, *i->second)) + unbuilt_configs.insert ( + config_toolchain {ct.target, + ct.target_config, + pc.name, + ct.toolchain_name, + ct.toolchain_version}); } } + } - // Iterate through the package configuration builds and erase them - // from the unbuilt configurations set. - // - for (const auto& pb: bld_prep_query.execute ()) - { - const build& b (*pb.build); + // Iterate through the package configuration builds and erase them + // from the unbuilt configurations set. + // + for (const auto& pb: bld_prep_query.execute ()) + { + const build& b (*pb.build); - unbuilt_configs.erase (config_toolchain {b.target, - b.target_config_name, - b.package_config_name, - b.toolchain_name, - b.toolchain_version}); - } + unbuilt_configs.erase (config_toolchain {b.target, + b.target_config_name, + b.package_config_name, + b.toolchain_name, + b.toolchain_version}); + } - // Print unbuilt package configurations. - // - for (const auto& ct: unbuilt_configs) + // Print unbuilt package configurations. + // + for (const auto& ct: unbuilt_configs) + { + if (skip != 0) { - if (skip != 0) - { - --skip; - continue; - } - - s << TABLE(CLASS="proplist build") - << TBODY - << TR_NAME (id.name, string (), root, id.tenant) - << TR_VERSION (id.name, p->version, root, id.tenant) - << TR_VALUE ("toolchain", - string (ct.toolchain_name) + '-' + - ct.toolchain_version.string ()) - << TR_VALUE ("target", ct.target.string ()) - << TR_VALUE ("tgt config", ct.target_config) - << TR_VALUE ("pkg config", ct.package_config); - - // In the global view mode add the tenant builds link. Note that - // the global view (and the link) makes sense only in the - // multi-tenant mode. - // - if (!tn && !id.tenant.empty ()) - s << TR_TENANT (tenant_name, "builds", root, id.tenant); + --skip; + continue; + } - s << ~TBODY - << ~TABLE; + s << TABLE(CLASS="proplist build") + << TBODY + << TR_NAME (id.name, string (), root, id.tenant) + << TR_VERSION (id.name, p->version, root, id.tenant) + << TR_VALUE ("toolchain", + string (ct.toolchain_name) + '-' + + ct.toolchain_version.string ()) + << TR_VALUE ("target", ct.target.string ()) + << TR_VALUE ("tgt config", ct.target_config) + << TR_VALUE ("pkg config", ct.package_config); + + // In the global view mode add the tenant builds link. Note that + // the global view (and the link) makes sense only in the + // multi-tenant mode. + // + if (!tn && !id.tenant.empty ()) + s << TR_TENANT (tenant_name, "builds", root, id.tenant); - if (--print == 0) // Bail out the configuration loop. - break; - } + s << ~TBODY + << ~TABLE; - if (print == 0) // Bail out the package loop. + if (--print == 0) // Bail out the configuration loop. break; } - } - t.commit (); + if (print == 0) // Bail out the package loop. + break; + } } + + t.commit (); + s << ~DIV; } diff --git a/mod/mod-package-version-details.cxx b/mod/mod-package-version-details.cxx index bf592e9..632e2ca 100644 --- a/mod/mod-package-version-details.cxx +++ b/mod/mod-package-version-details.cxx @@ -624,7 +624,7 @@ handle (request& rq, response& rs) dir_path vd (fdd / rd / dir_path (pkg->project.string ()) / - dir_path (pn.string ()) / + dir_path (pn.string ()) / dir_path (sver)); try @@ -799,6 +799,10 @@ handle (request& rq, response& rs) ts += ')'; + // @@ Note that here we also load result logs which we don't need. + // Probably we should invent some table view to only load operation + // names and statuses. + // if (b.state == build_state::built) build_db_->load (b, b.results_section); -- cgit v1.1