1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-12-03 20:22:46 +01:00
phorge-phorge/src/applications/diffusion/query/DiffusionCommitQuery.php
epriestley 157f47cd14 Rewrite CommitQuery to use UNION for performance
Summary:
Ref T12680. See PHI167. See that task for discussion.

Rewrite `DiffusionCommitQuery` to work more like `DifferentialRevisionQuery`, and use a UNION to find "all revisions you need to audit OR respond to".

I tried to get this working a little more cleanly than RevisionQuery does, and can probably simplify that now.

Test Plan: Poked at the UI locally without hitting any apparent issues, but my local data is pretty garbage at this point. I'll take a look at how the query plans work on `secure`.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T12680

Differential Revision: https://secure.phabricator.com/D18722
2017-10-23 10:32:24 -07:00

679 lines
18 KiB
PHP

<?php
final class DiffusionCommitQuery
extends PhabricatorCursorPagedPolicyAwareQuery {
private $ids;
private $phids;
private $authorPHIDs;
private $defaultRepository;
private $identifiers;
private $repositoryIDs;
private $repositoryPHIDs;
private $identifierMap;
private $responsiblePHIDs;
private $statuses;
private $packagePHIDs;
private $unreachable;
private $needAuditRequests;
private $auditIDs;
private $auditorPHIDs;
private $epochMin;
private $epochMax;
private $importing;
private $needCommitData;
private $needDrafts;
public function withIDs(array $ids) {
$this->ids = $ids;
return $this;
}
public function withPHIDs(array $phids) {
$this->phids = $phids;
return $this;
}
public function withAuthorPHIDs(array $phids) {
$this->authorPHIDs = $phids;
return $this;
}
/**
* Load commits by partial or full identifiers, e.g. "rXab82393", "rX1234",
* or "a9caf12". When an identifier matches multiple commits, they will all
* be returned; callers should be prepared to deal with more results than
* they queried for.
*/
public function withIdentifiers(array $identifiers) {
// Some workflows (like blame lookups) can pass in large numbers of
// duplicate identifiers. We only care about unique identifiers, so
// get rid of duplicates immediately.
$identifiers = array_fuse($identifiers);
$this->identifiers = $identifiers;
return $this;
}
/**
* Look up commits in a specific repository. This is a shorthand for calling
* @{method:withDefaultRepository} and @{method:withRepositoryIDs}.
*/
public function withRepository(PhabricatorRepository $repository) {
$this->withDefaultRepository($repository);
$this->withRepositoryIDs(array($repository->getID()));
return $this;
}
/**
* Look up commits in a specific repository. Prefer
* @{method:withRepositoryIDs}; the underlying table is keyed by ID such
* that this method requires a separate initial query to map PHID to ID.
*/
public function withRepositoryPHIDs(array $phids) {
$this->repositoryPHIDs = $phids;
return $this;
}
/**
* If a default repository is provided, ambiguous commit identifiers will
* be assumed to belong to the default repository.
*
* For example, "r123" appearing in a commit message in repository X is
* likely to be unambiguously "rX123". Normally the reference would be
* considered ambiguous, but if you provide a default repository it will
* be correctly resolved.
*/
public function withDefaultRepository(PhabricatorRepository $repository) {
$this->defaultRepository = $repository;
return $this;
}
public function withRepositoryIDs(array $repository_ids) {
$this->repositoryIDs = $repository_ids;
return $this;
}
public function needCommitData($need) {
$this->needCommitData = $need;
return $this;
}
public function needDrafts($need) {
$this->needDrafts = $need;
return $this;
}
public function needAuditRequests($need) {
$this->needAuditRequests = $need;
return $this;
}
public function withAuditIDs(array $ids) {
$this->auditIDs = $ids;
return $this;
}
public function withAuditorPHIDs(array $auditor_phids) {
$this->auditorPHIDs = $auditor_phids;
return $this;
}
public function withResponsiblePHIDs(array $responsible_phids) {
$this->responsiblePHIDs = $responsible_phids;
return $this;
}
public function withPackagePHIDs(array $package_phids) {
$this->packagePHIDs = $package_phids;
return $this;
}
public function withUnreachable($unreachable) {
$this->unreachable = $unreachable;
return $this;
}
public function withStatuses(array $statuses) {
$this->statuses = $statuses;
return $this;
}
public function withEpochRange($min, $max) {
$this->epochMin = $min;
$this->epochMax = $max;
return $this;
}
public function withImporting($importing) {
$this->importing = $importing;
return $this;
}
public function getIdentifierMap() {
if ($this->identifierMap === null) {
throw new Exception(
pht(
'You must %s the query before accessing the identifier map.',
'execute()'));
}
return $this->identifierMap;
}
protected function getPrimaryTableAlias() {
return 'commit';
}
protected function willExecute() {
if ($this->identifierMap === null) {
$this->identifierMap = array();
}
}
public function newResultObject() {
return new PhabricatorRepositoryCommit();
}
protected function loadPage() {
$table = $this->newResultObject();
$conn = $table->establishConnection('r');
$subqueries = array();
if ($this->responsiblePHIDs) {
$base_authors = $this->authorPHIDs;
$base_auditors = $this->auditorPHIDs;
$responsible_phids = $this->responsiblePHIDs;
if ($base_authors) {
$all_authors = array_merge($base_authors, $responsible_phids);
} else {
$all_authors = $responsible_phids;
}
if ($base_auditors) {
$all_auditors = array_merge($base_auditors, $responsible_phids);
} else {
$all_auditors = $responsible_phids;
}
$this->authorPHIDs = $all_authors;
$this->auditorPHIDs = $base_auditors;
$subqueries[] = $this->buildStandardPageQuery(
$conn,
$table->getTableName());
$this->authorPHIDs = $base_authors;
$this->auditorPHIDs = $all_auditors;
$subqueries[] = $this->buildStandardPageQuery(
$conn,
$table->getTableName());
} else {
$subqueries[] = $this->buildStandardPageQuery(
$conn,
$table->getTableName());
}
if (count($subqueries) > 1) {
foreach ($subqueries as $key => $subquery) {
$subqueries[$key] = '('.$subquery.')';
}
$query = qsprintf(
$conn,
'%Q %Q %Q',
implode(' UNION DISTINCT ', $subqueries),
$this->buildOrderClause($conn, true),
$this->buildLimitClause($conn));
} else {
$query = head($subqueries);
}
$rows = queryfx_all($conn, '%Q', $query);
$rows = $this->didLoadRawRows($rows);
return $table->loadAllFromArray($rows);
}
protected function willFilterPage(array $commits) {
$repository_ids = mpull($commits, 'getRepositoryID', 'getRepositoryID');
$repos = id(new PhabricatorRepositoryQuery())
->setViewer($this->getViewer())
->withIDs($repository_ids)
->execute();
$min_qualified = PhabricatorRepository::MINIMUM_QUALIFIED_HASH;
$result = array();
foreach ($commits as $key => $commit) {
$repo = idx($repos, $commit->getRepositoryID());
if ($repo) {
$commit->attachRepository($repo);
} else {
$this->didRejectResult($commit);
unset($commits[$key]);
continue;
}
// Build the identifierMap
if ($this->identifiers !== null) {
$ids = $this->identifiers;
$prefixes = array(
'r'.$commit->getRepository()->getCallsign(),
'r'.$commit->getRepository()->getCallsign().':',
'R'.$commit->getRepository()->getID().':',
'', // No prefix is valid too and will only match the commitIdentifier
);
$suffix = $commit->getCommitIdentifier();
if ($commit->getRepository()->isSVN()) {
foreach ($prefixes as $prefix) {
if (isset($ids[$prefix.$suffix])) {
$result[$prefix.$suffix][] = $commit;
}
}
} else {
// This awkward construction is so we can link the commits up in O(N)
// time instead of O(N^2).
for ($ii = $min_qualified; $ii <= strlen($suffix); $ii++) {
$part = substr($suffix, 0, $ii);
foreach ($prefixes as $prefix) {
if (isset($ids[$prefix.$part])) {
$result[$prefix.$part][] = $commit;
}
}
}
}
}
}
if ($result) {
foreach ($result as $identifier => $matching_commits) {
if (count($matching_commits) == 1) {
$result[$identifier] = head($matching_commits);
} else {
// This reference is ambiguous (it matches more than one commit) so
// don't link it.
unset($result[$identifier]);
}
}
$this->identifierMap += $result;
}
return $commits;
}
protected function didFilterPage(array $commits) {
$viewer = $this->getViewer();
if ($this->needCommitData) {
$data = id(new PhabricatorRepositoryCommitData())->loadAllWhere(
'commitID in (%Ld)',
mpull($commits, 'getID'));
$data = mpull($data, null, 'getCommitID');
foreach ($commits as $commit) {
$commit_data = idx($data, $commit->getID());
if (!$commit_data) {
$commit_data = new PhabricatorRepositoryCommitData();
}
$commit->attachCommitData($commit_data);
}
}
if ($this->needAuditRequests) {
$requests = id(new PhabricatorRepositoryAuditRequest())->loadAllWhere(
'commitPHID IN (%Ls)',
mpull($commits, 'getPHID'));
$requests = mgroup($requests, 'getCommitPHID');
foreach ($commits as $commit) {
$audit_requests = idx($requests, $commit->getPHID(), array());
$commit->attachAudits($audit_requests);
foreach ($audit_requests as $audit_request) {
$audit_request->attachCommit($commit);
}
}
}
if ($this->needDrafts) {
PhabricatorDraftEngine::attachDrafts(
$viewer,
$commits);
}
return $commits;
}
protected function buildWhereClauseParts(AphrontDatabaseConnection $conn) {
$where = parent::buildWhereClauseParts($conn);
if ($this->repositoryPHIDs !== null) {
$map_repositories = id(new PhabricatorRepositoryQuery())
->setViewer($this->getViewer())
->withPHIDs($this->repositoryPHIDs)
->execute();
if (!$map_repositories) {
throw new PhabricatorEmptyQueryException();
}
$repository_ids = mpull($map_repositories, 'getID');
if ($this->repositoryIDs !== null) {
$repository_ids = array_merge($repository_ids, $this->repositoryIDs);
}
$this->withRepositoryIDs($repository_ids);
}
if ($this->ids !== null) {
$where[] = qsprintf(
$conn,
'commit.id IN (%Ld)',
$this->ids);
}
if ($this->phids !== null) {
$where[] = qsprintf(
$conn,
'commit.phid IN (%Ls)',
$this->phids);
}
if ($this->repositoryIDs !== null) {
$where[] = qsprintf(
$conn,
'commit.repositoryID IN (%Ld)',
$this->repositoryIDs);
}
if ($this->authorPHIDs !== null) {
$where[] = qsprintf(
$conn,
'commit.authorPHID IN (%Ls)',
$this->authorPHIDs);
}
if ($this->epochMin !== null) {
$where[] = qsprintf(
$conn,
'commit.epoch >= %d',
$this->epochMin);
}
if ($this->epochMax !== null) {
$where[] = qsprintf(
$conn,
'commit.epoch <= %d',
$this->epochMax);
}
if ($this->importing !== null) {
if ($this->importing) {
$where[] = qsprintf(
$conn,
'(commit.importStatus & %d) != %d',
PhabricatorRepositoryCommit::IMPORTED_ALL,
PhabricatorRepositoryCommit::IMPORTED_ALL);
} else {
$where[] = qsprintf(
$conn,
'(commit.importStatus & %d) = %d',
PhabricatorRepositoryCommit::IMPORTED_ALL,
PhabricatorRepositoryCommit::IMPORTED_ALL);
}
}
if ($this->identifiers !== null) {
$min_unqualified = PhabricatorRepository::MINIMUM_UNQUALIFIED_HASH;
$min_qualified = PhabricatorRepository::MINIMUM_QUALIFIED_HASH;
$refs = array();
$bare = array();
foreach ($this->identifiers as $identifier) {
$matches = null;
preg_match('/^(?:[rR]([A-Z]+:?|[0-9]+:))?(.*)$/',
$identifier, $matches);
$repo = nonempty(rtrim($matches[1], ':'), null);
$commit_identifier = nonempty($matches[2], null);
if ($repo === null) {
if ($this->defaultRepository) {
$repo = $this->defaultRepository->getPHID();
}
}
if ($repo === null) {
if (strlen($commit_identifier) < $min_unqualified) {
continue;
}
$bare[] = $commit_identifier;
} else {
$refs[] = array(
'repository' => $repo,
'identifier' => $commit_identifier,
);
}
}
$sql = array();
foreach ($bare as $identifier) {
$sql[] = qsprintf(
$conn,
'(commit.commitIdentifier LIKE %> AND '.
'LENGTH(commit.commitIdentifier) = 40)',
$identifier);
}
if ($refs) {
$repositories = ipull($refs, 'repository');
$repos = id(new PhabricatorRepositoryQuery())
->setViewer($this->getViewer())
->withIdentifiers($repositories);
$repos->execute();
$repos = $repos->getIdentifierMap();
foreach ($refs as $key => $ref) {
$repo = idx($repos, $ref['repository']);
if (!$repo) {
continue;
}
if ($repo->isSVN()) {
if (!ctype_digit((string)$ref['identifier'])) {
continue;
}
$sql[] = qsprintf(
$conn,
'(commit.repositoryID = %d AND commit.commitIdentifier = %s)',
$repo->getID(),
// NOTE: Because the 'commitIdentifier' column is a string, MySQL
// ignores the index if we hand it an integer. Hand it a string.
// See T3377.
(int)$ref['identifier']);
} else {
if (strlen($ref['identifier']) < $min_qualified) {
continue;
}
$identifier = $ref['identifier'];
if (strlen($identifier) == 40) {
// MySQL seems to do slightly better with this version if the
// clause, so issue it if we have a full commit hash.
$sql[] = qsprintf(
$conn,
'(commit.repositoryID = %d
AND commit.commitIdentifier = %s)',
$repo->getID(),
$identifier);
} else {
$sql[] = qsprintf(
$conn,
'(commit.repositoryID = %d
AND commit.commitIdentifier LIKE %>)',
$repo->getID(),
$identifier);
}
}
}
}
if (!$sql) {
// If we discarded all possible identifiers (e.g., they all referenced
// bogus repositories or were all too short), make sure the query finds
// nothing.
throw new PhabricatorEmptyQueryException(
pht('No commit identifiers.'));
}
$where[] = '('.implode(' OR ', $sql).')';
}
if ($this->auditIDs !== null) {
$where[] = qsprintf(
$conn,
'auditor.id IN (%Ld)',
$this->auditIDs);
}
if ($this->auditorPHIDs !== null) {
$where[] = qsprintf(
$conn,
'auditor.auditorPHID IN (%Ls)',
$this->auditorPHIDs);
}
if ($this->statuses !== null) {
$where[] = qsprintf(
$conn,
'commit.auditStatus IN (%Ld)',
$this->statuses);
}
if ($this->packagePHIDs !== null) {
$where[] = qsprintf(
$conn,
'package.dst IN (%Ls)',
$this->packagePHIDs);
}
if ($this->unreachable !== null) {
if ($this->unreachable) {
$where[] = qsprintf(
$conn,
'(commit.importStatus & %d) = %d',
PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE,
PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE);
} else {
$where[] = qsprintf(
$conn,
'(commit.importStatus & %d) = 0',
PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE);
}
}
return $where;
}
protected function didFilterResults(array $filtered) {
if ($this->identifierMap) {
foreach ($this->identifierMap as $name => $commit) {
if (isset($filtered[$commit->getPHID()])) {
unset($this->identifierMap[$name]);
}
}
}
}
private function shouldJoinAuditor() {
return ($this->auditIDs || $this->auditorPHIDs);
}
private function shouldJoinOwners() {
return (bool)$this->packagePHIDs;
}
protected function buildJoinClauseParts(AphrontDatabaseConnection $conn) {
$join = parent::buildJoinClauseParts($conn);
$audit_request = new PhabricatorRepositoryAuditRequest();
if ($this->shouldJoinAuditor()) {
$join[] = qsprintf(
$conn,
'JOIN %T auditor ON commit.phid = auditor.commitPHID',
$audit_request->getTableName());
}
if ($this->shouldJoinOwners()) {
$join[] = qsprintf(
$conn,
'JOIN %T package ON commit.phid = package.src
AND package.type = %s',
PhabricatorEdgeConfig::TABLE_NAME_EDGE,
DiffusionCommitHasPackageEdgeType::EDGECONST);
}
return $join;
}
protected function shouldGroupQueryResultRows() {
if ($this->shouldJoinAuditor()) {
return true;
}
if ($this->shouldJoinOwners()) {
return true;
}
return parent::shouldGroupQueryResultRows();
}
public function getQueryApplicationClass() {
return 'PhabricatorDiffusionApplication';
}
public function getOrderableColumns() {
return parent::getOrderableColumns() + array(
'epoch' => array(
'table' => $this->getPrimaryTableAlias(),
'column' => 'epoch',
'type' => 'int',
'reverse' => false,
),
);
}
protected function getPagingValueMap($cursor, array $keys) {
$commit = $this->loadCursorObject($cursor);
return array(
'id' => $commit->getID(),
'epoch' => $commit->getEpoch(),
);
}
public function getBuiltinOrders() {
$parent = parent::getBuiltinOrders();
// Rename the default ID-based orders.
$parent['importnew'] = array(
'name' => pht('Import Date (Newest First)'),
) + $parent['newest'];
$parent['importold'] = array(
'name' => pht('Import Date (Oldest First)'),
) + $parent['oldest'];
return array(
'newest' => array(
'vector' => array('epoch', 'id'),
'name' => pht('Commit Date (Newest First)'),
),
'oldest' => array(
'vector' => array('-epoch', '-id'),
'name' => pht('Commit Date (Oldest First)'),
),
) + $parent;
}
}