mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-19 20:10:55 +01:00
Reduce initial discovery from O(branches * commits) to O(commits)
Summary: Fixes T4414. Currently, when we discover a new repository, we do something like this: foreach (branch) { foreach (commit on this branch) { do_something(); } } In cases where there are a lot of branches which mostly just branch `master`, this leads to us doing roughly `O(branches * commits)` work. We have a `commitCache` to prevent this, but it has two problems: - It only fills out of the DB, and we do this whole thing before writing to the DB, which is the entire point. - It has a fixed size (2048) and on initial discovery we're usually dealing with way more commits than that. Instead, also stop doing work if we hit a commit which is known already. Test Plan: - Added `print` on the number of discovered refs and number of unique refs. - Ran `bin/repository discover --repair X` on a repo with several branches. - Before the patch, got 397 refs and 135 unique refs. - After the patch, got 135 refs and 135 unique refs. Reviewers: btrahan Reviewed By: btrahan CC: aran Maniphest Tasks: T4414 Differential Revision: https://secure.phabricator.com/D8374
This commit is contained in:
parent
e28b78f5eb
commit
d86bb086ca
1 changed files with 27 additions and 2 deletions
|
@ -12,6 +12,8 @@ final class PhabricatorRepositoryDiscoveryEngine
|
|||
|
||||
private $repairMode;
|
||||
private $commitCache = array();
|
||||
private $workingSet = array();
|
||||
|
||||
const MAX_COMMIT_CACHE_SIZE = 2048;
|
||||
|
||||
|
||||
|
@ -50,6 +52,9 @@ final class PhabricatorRepositoryDiscoveryEngine
|
|||
throw new Exception("Unknown VCS '{$vcs}'!");
|
||||
}
|
||||
|
||||
// Clear the working set cache.
|
||||
$this->workingSet = array();
|
||||
|
||||
// Record discovered commits and mark them in the cache.
|
||||
foreach ($refs as $ref) {
|
||||
$this->recordCommit(
|
||||
|
@ -113,10 +118,14 @@ final class PhabricatorRepositoryDiscoveryEngine
|
|||
|
||||
$this->log(pht("Looking for new commits."));
|
||||
|
||||
$refs[] = $this->discoverStreamAncestry(
|
||||
$branch_refs = $this->discoverStreamAncestry(
|
||||
new PhabricatorGitGraphStream($repository, $commit),
|
||||
$commit,
|
||||
$repository->shouldAutocloseBranch($name));
|
||||
|
||||
$this->didDiscoverRefs($branch_refs);
|
||||
|
||||
$refs[] = $branch_refs;
|
||||
}
|
||||
|
||||
return array_mergev($refs);
|
||||
|
@ -289,6 +298,8 @@ final class PhabricatorRepositoryDiscoveryEngine
|
|||
}
|
||||
$refs = array_reverse($refs);
|
||||
|
||||
$this->didDiscoverRefs($refs);
|
||||
|
||||
return $refs;
|
||||
}
|
||||
|
||||
|
@ -363,10 +374,14 @@ final class PhabricatorRepositoryDiscoveryEngine
|
|||
|
||||
$this->log(pht("Looking for new commits."));
|
||||
|
||||
$refs[] = $this->discoverStreamAncestry(
|
||||
$branch_refs = $this->discoverStreamAncestry(
|
||||
new PhabricatorMercurialGraphStream($repository, $commit),
|
||||
$commit,
|
||||
$close_immediately = true);
|
||||
|
||||
$this->didDiscoverRefs($branch_refs);
|
||||
|
||||
$refs[] = $branch_refs;
|
||||
}
|
||||
|
||||
return array_mergev($refs);
|
||||
|
@ -447,6 +462,10 @@ final class PhabricatorRepositoryDiscoveryEngine
|
|||
return true;
|
||||
}
|
||||
|
||||
if (isset($this->workingSet[$identifier])) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->repairMode) {
|
||||
// In repair mode, rediscover the entire repository, ignoring the
|
||||
// database state. We can hit the local cache above, but if we miss it
|
||||
|
@ -563,6 +582,12 @@ final class PhabricatorRepositoryDiscoveryEngine
|
|||
}
|
||||
}
|
||||
|
||||
private function didDiscoverRefs(array $refs) {
|
||||
foreach ($refs as $ref) {
|
||||
$this->workingSet[$ref->getIdentifier()] = true;
|
||||
}
|
||||
}
|
||||
|
||||
private function insertTask(
|
||||
PhabricatorRepository $repository,
|
||||
PhabricatorRepositoryCommit $commit,
|
||||
|
|
Loading…
Reference in a new issue