1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-19 05:12:41 +01:00

Improve construction of commit queries from blame lookups

Summary:
Ref T2450. File blame tends to have the same commit a lot of times, and we don't do lookups like this efficiently right now.

In particular, for a file like `__phutil_library_map__.php`, we would issue a query with ~9,000 clauses like this:

```
(repositoryID = 1 AND commitIdentifier LIKE "XYZ%")
```

...but only a few hundred of those identifiers were unique. Instead, issue only one clause per unique identifier.

MySQL also seems to do a little better on "commitIdentifier = X" if we have the full hash, so special case that slightly.

Test Plan:
  - Issuing a query for only unique identifiers dropped the cost from 400ms to 100ms locally.
  - Swapping to `=` if we have the full hash dropped the cost from 100ms to 75ms locally.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T2450

Differential Revision: https://secure.phabricator.com/D14962
This commit is contained in:
epriestley 2016-01-06 15:34:05 -08:00
parent 741118a08f
commit 0759b84d77

View file

@ -51,6 +51,11 @@ final class DiffusionCommitQuery
* they queried for.
*/
public function withIdentifiers(array $identifiers) {
// Some workflows (like blame lookups) can pass in large numbers of
// duplicate identifiers. We only care about unique identifiers, so
// get rid of duplicates immediately.
$identifiers = array_fuse($identifiers);
$this->identifiers = $identifiers;
return $this;
}
@ -185,7 +190,7 @@ final class DiffusionCommitQuery
// Build the identifierMap
if ($this->identifiers !== null) {
$ids = array_fuse($this->identifiers);
$ids = $this->identifiers;
$prefixes = array(
'r'.$commit->getRepository()->getCallsign(),
'r'.$commit->getRepository()->getCallsign().':',
@ -395,7 +400,6 @@ final class DiffusionCommitQuery
$repos->execute();
$repos = $repos->getIdentifierMap();
foreach ($refs as $key => $ref) {
$repo = idx($repos, $ref['callsign']);
@ -404,7 +408,7 @@ final class DiffusionCommitQuery
}
if ($repo->isSVN()) {
if (!ctype_digit($ref['identifier'])) {
if (!ctype_digit((string)$ref['identifier'])) {
continue;
}
$sql[] = qsprintf(
@ -419,11 +423,25 @@ final class DiffusionCommitQuery
if (strlen($ref['identifier']) < $min_qualified) {
continue;
}
$sql[] = qsprintf(
$conn,
'(commit.repositoryID = %d AND commit.commitIdentifier LIKE %>)',
$repo->getID(),
$ref['identifier']);
$identifier = $ref['identifier'];
if (strlen($identifier) == 40) {
// MySQL seems to do slightly better with this version if the
// clause, so issue it if we have a full commit hash.
$sql[] = qsprintf(
$conn,
'(commit.repositoryID = %d
AND commit.commitIdentifier = %s)',
$repo->getID(),
$identifier);
} else {
$sql[] = qsprintf(
$conn,
'(commit.repositoryID = %d
AND commit.commitIdentifier LIKE %>)',
$repo->getID(),
$identifier);
}
}
}
}