From 16648c28bcd70b01326e231379789afd7366c14d Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 10 Jul 2014 10:16:26 -0700 Subject: [PATCH] Add GROUP BY to commit query Summary: Ref T4715. Some minor stuff I caught locally while poking around: - Since we don't `GROUP BY`, we can still get duplicate commits. These get silently de-duplicated by `loadAllFromArray()` because that returns an array keyed by `id`, but we fetch too much data and this can cause us to execute too many queries to fill pages. Instead, `GROUP BY` if we joined the audit table. - After adding `GROUP BY`, getting the audit IDs out of the query is no longer reliable. Instead, query audits by the commit PHIDs. This is approximately equiavlent. - Since we always `JOIN`, we currently never return commits that don't have any audits. If we don't know that all results will have an audit, just `LEFT JOIN`. - Add some `!== null` to catch the `withIDs(array())` issue that we hit with Khan Academy a little while ago. Test Plan: - Verified that "All Commits" shows commits with no audits of any kind. - Verified that the raw data comes out of the query without duplicates. Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T5433, T4715 Differential Revision: https://secure.phabricator.com/D8879 --- .../conduit/ConduitAPI_audit_query_Method.php | 12 ++++ ...abricatorAuditManagementDeleteWorkflow.php | 26 ++++++-- .../diffusion/query/DiffusionCommitQuery.php | 59 +++++++++---------- 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/src/applications/audit/conduit/ConduitAPI_audit_query_Method.php b/src/applications/audit/conduit/ConduitAPI_audit_query_Method.php index af66e7b828..8db30d54b4 100644 --- a/src/applications/audit/conduit/ConduitAPI_audit_query_Method.php +++ b/src/applications/audit/conduit/ConduitAPI_audit_query_Method.php @@ -51,15 +51,27 @@ final class ConduitAPI_audit_query_Method extends ConduitAPI_audit_Method { DiffusionCommitQuery::AUDIT_STATUS_ANY); $query->withAuditStatus($status); + // NOTE: These affect the number of commits identified, which is sort of + // reasonable but means the method may return an arbitrary number of + // actual audit requests. $query->setOffset($request->getValue('offset', 0)); $query->setLimit($request->getValue('limit', 100)); $commits = $query->execute(); + $auditor_map = array_fuse($auditor_phids); + $results = array(); foreach ($commits as $commit) { $requests = $commit->getAudits(); foreach ($requests as $request) { + + // If this audit isn't triggered for one of the requested PHIDs, + // skip it. + if ($auditor_map && empty($auditor_map[$request->getAuditorPHID()])) { + continue; + } + $results[] = array( 'id' => $request->getID(), 'commitPHID' => $request->getCommitPHID(), diff --git a/src/applications/audit/management/PhabricatorAuditManagementDeleteWorkflow.php b/src/applications/audit/management/PhabricatorAuditManagementDeleteWorkflow.php index af951c754f..9700c38b26 100644 --- a/src/applications/audit/management/PhabricatorAuditManagementDeleteWorkflow.php +++ b/src/applications/audit/management/PhabricatorAuditManagementDeleteWorkflow.php @@ -85,7 +85,9 @@ final class PhabricatorAuditManagementDeleteWorkflow $query->withAuditStatus($status); } + $id_map = array(); if ($ids) { + $id_map = array_fuse($ids); $query->withAuditIDs($ids); } @@ -93,8 +95,10 @@ final class PhabricatorAuditManagementDeleteWorkflow $query->withRepositoryIDs(mpull($repos, 'getID')); } + $auditor_map = array(); if ($users) { - $query->withAuditorPHIDs(mpull($users, 'getPHID')); + $auditor_map = array_fuse(mpull($users, 'getPHID')); + $query->withAuditorPHIDs($auditor_map); } if ($commits) { @@ -105,19 +109,29 @@ final class PhabricatorAuditManagementDeleteWorkflow $commits = mpull($commits, null, 'getPHID'); $audits = array(); foreach ($commits as $commit) { - $curr_audits = $commit->getAudits(); - foreach ($audits as $key => $audit) { + $commit_audits = $commit->getAudits(); + foreach ($commit_audits as $key => $audit) { + if ($id_map && empty($id_map[$audit->getID()])) { + unset($commit_audits[$key]); + continue; + } + + if ($auditor_map && empty($auditor_map[$audit->getAuditorPHID()])) { + unset($commit_audits[$key]); + continue; + } + if ($min_date && $commit->getEpoch() < $min_date) { - unset($audits[$key]); + unset($commit_audits[$key]); continue; } if ($max_date && $commit->getEpoch() > $max_date) { - unset($audits[$key]); + unset($commit_audits[$key]); continue; } } - $audits[] = $curr_audits; + $audits[] = $commit_audits; } $audits = array_mergev($audits); diff --git a/src/applications/diffusion/query/DiffusionCommitQuery.php b/src/applications/diffusion/query/DiffusionCommitQuery.php index 73a9221726..0e28479040 100644 --- a/src/applications/diffusion/query/DiffusionCommitQuery.php +++ b/src/applications/diffusion/query/DiffusionCommitQuery.php @@ -19,7 +19,6 @@ final class DiffusionCommitQuery const AUDIT_STATUS_ANY = 'audit-status-any'; const AUDIT_STATUS_OPEN = 'audit-status-open'; const AUDIT_STATUS_CONCERN = 'audit-status-concern'; - private $loadAuditIds; private $needCommitData; @@ -94,10 +93,8 @@ final class DiffusionCommitQuery * rows must always have it. */ private function shouldJoinAudits() { - return - $this->needAuditRequests || - $this->auditStatus || - $this->rowsMustHaveAudits(); + return $this->auditStatus || + $this->rowsMustHaveAudits(); } @@ -156,31 +153,17 @@ final class DiffusionCommitQuery $data = queryfx_all( $conn_r, - 'SELECT commit.* %Q FROM %T commit %Q %Q %Q %Q', - $this->buildAuditSelect($conn_r), + 'SELECT commit.* FROM %T commit %Q %Q %Q %Q %Q', $table->getTableName(), $this->buildJoinClause($conn_r), $this->buildWhereClause($conn_r), + $this->buildGroupClause($conn_r), $this->buildOrderClause($conn_r), $this->buildLimitClause($conn_r)); - if ($this->shouldJoinAudits()) { - $this->loadAuditIds = ipull($data, 'audit_id'); - } - return $table->loadAllFromArray($data); } - private function buildAuditSelect($conn_r) { - if ($this->shouldJoinAudits()) { - return qsprintf( - $conn_r, - ', audit.id as audit_id'); - } - - return ''; - } - protected function willFilterPage(array $commits) { $repository_ids = mpull($commits, 'getRepositoryID', 'getRepositoryID'); $repos = id(new PhabricatorRepositoryQuery()) @@ -230,7 +213,7 @@ final class DiffusionCommitQuery $result[$identifier] = head($matching_commits); } else { // This reference is ambiguous (it matches more than one commit) so - // don't link it + // don't link it. unset($result[$identifier]); } } @@ -257,14 +240,13 @@ final class DiffusionCommitQuery } } - if ($this->shouldJoinAudits()) { - $load_ids = array_filter($this->loadAuditIds); - if ($load_ids) { - $requests = id(new PhabricatorRepositoryAuditRequest()) - ->loadAllWhere('id IN (%Ld)', $this->loadAuditIds); - } else { - $requests = array(); - } + // TODO: This should just be `needAuditRequests`, not `shouldJoinAudits()`, + // but leave that for a future diff. + + if ($this->needAuditRequests || $this->shouldJoinAudits()) { + $requests = id(new PhabricatorRepositoryAuditRequest())->loadAllWhere( + 'commitPHID IN (%Ls)', + mpull($commits, 'getPHID')); $requests = mgroup($requests, 'getCommitPHID'); foreach ($commits as $commit) { @@ -508,6 +490,23 @@ final class DiffusionCommitQuery } } + private function buildGroupClause(AphrontDatabaseConnection $conn_r) { + $should_group = $this->shouldJoinAudits(); + + // TODO: Currently, the audit table is missing a unique key, so we may + // require a GROUP BY if we perform this join. See T1768. This can be + // removed once the table has the key. + if ($this->auditAwaitingUser) { + $should_group = true; + } + + if ($should_group) { + return 'GROUP BY commit.id'; + } else { + return ''; + } + } + public function getQueryApplicationClass() { return 'PhabricatorApplicationDiffusion'; }