From 918ac5755c782f39b32e833efd20c853c312c616 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 9 Jul 2012 10:51:46 -0700 Subject: [PATCH] Limit maximum size of Owners package queries Summary: Currently, a change may affect a very large number of paths. When we run the OwnersWorker on it, we'll execute a query which looks up packages for the paths. This may exceed "max_allowed_packet". Instead, break the list of paths into smaller chunks. This is mostly to unblock r4nt / llvm, I'm going to add a more finessed approach to array_chunk() shortly. Test Plan: Ran `reparse.php --owners` on a revision which affected packages, verified results are the same before and after the change. Set chunk size to 1, verified query results aggregated properly. Reviewers: btrahan, jungejason, nh Reviewed By: jungejason CC: aran Differential Revision: https://secure.phabricator.com/D2943 --- .../storage/PhabricatorOwnersPackage.php | 63 +++++++++++-------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/src/applications/owners/storage/PhabricatorOwnersPackage.php b/src/applications/owners/storage/PhabricatorOwnersPackage.php index 82e2deae3a..aad671425c 100644 --- a/src/applications/owners/storage/PhabricatorOwnersPackage.php +++ b/src/applications/owners/storage/PhabricatorOwnersPackage.php @@ -142,40 +142,51 @@ final class PhabricatorOwnersPackage extends PhabricatorOwnersDAO { $path = new PhabricatorOwnersPath(); $conn = $package->establishConnection('r'); - $repository_clause = qsprintf($conn, 'AND p.repositoryPHID = %s', + $repository_clause = qsprintf( + $conn, + 'AND p.repositoryPHID = %s', $repository->getPHID()); - $limit_clause = ''; - if (!empty($limit)) { - $limit_clause = qsprintf($conn, 'LIMIT %d', $limit); - } + // NOTE: The list of $paths may be very large if we're coming from + // the OwnersWorker and processing, e.g., an SVN commit which created a new + // branch. Break it apart so that it will fit within 'max_allowed_packet', + // and then merge results in PHP. - $data = queryfx_all( - $conn, - 'SELECT pkg.id FROM %T pkg JOIN %T p ON p.packageID = pkg.id - WHERE p.path IN (%Ls) %Q ORDER BY LENGTH(p.path) DESC %Q', - $package->getTableName(), - $path->getTableName(), - $paths, - $repository_clause, - $limit_clause); + $ids = array(); + foreach (array_chunk($paths, 128) as $chunk) { + $rows = queryfx_all( + $conn, + 'SELECT pkg.id id, LENGTH(p.path) len + FROM %T pkg JOIN %T p ON p.packageID = pkg.id + WHERE p.path IN (%Ls) %Q', + $package->getTableName(), + $path->getTableName(), + $chunk, + $repository_clause); - $ids = ipull($data, 'id'); - - if (empty($ids)) { - return array(); - } - - $order = array(); - foreach ($ids as $id) { - if (empty($order[$id])) { - $order[$id] = true; + foreach ($rows as $row) { + $id = (int)$row['id']; + $len = (int)$row['len']; + if (isset($ids[$id])) { + $ids[$id] = max($len, $ids[$id]); + } else { + $ids[$id] = $len; + } } } - $packages = $package->loadAllWhere('id in (%Ld)', array_keys($order)); + if (!$ids) { + return array(); + } - $packages = array_select_keys($packages, array_keys($order)); + arsort($ids); + if ($limit) { + $ids = array_slice($ids, 0, $limit, $preserve_keys = true); + } + $ids = array_keys($ids); + + $packages = $package->loadAllWhere('id in (%Ld)', array_keys($ids)); + $packages = array_select_keys($packages, array_keys($ids)); return $packages; }