1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-12-19 20:10:55 +01:00

Reduce initial discovery from O(branches * commits) to O(commits)

Summary:
Fixes T4414. Currently, when we discover a new repository, we do something like this:

  foreach (branch) {
    foreach (commit on this branch) {
      do_something();
    }
  }

In cases where there are a lot of branches which mostly just branch `master`, this leads to us doing roughly `O(branches * commits)` work.

We have a `commitCache` to prevent this, but it has two problems:

  - It only fills out of the DB, and we do this whole thing before writing to the DB, which is the entire point.
  - It has a fixed size (2048) and on initial discovery we're usually dealing with way more commits than that.

Instead, also stop doing work if we hit a commit which is known already.

Test Plan:
  - Added `print` on the number of discovered refs and number of unique refs.
  - Ran `bin/repository discover --repair X` on a repo with several branches.
  - Before the patch, got 397 refs and 135 unique refs.
  - After the patch, got 135 refs and 135 unique refs.

Reviewers: btrahan

Reviewed By: btrahan

CC: aran

Maniphest Tasks: T4414

Differential Revision: https://secure.phabricator.com/D8374
This commit is contained in:
epriestley 2014-02-28 13:02:14 -08:00
parent e28b78f5eb
commit d86bb086ca

View file

@ -12,6 +12,8 @@ final class PhabricatorRepositoryDiscoveryEngine
private $repairMode;
private $commitCache = array();
private $workingSet = array();
const MAX_COMMIT_CACHE_SIZE = 2048;
@ -50,6 +52,9 @@ final class PhabricatorRepositoryDiscoveryEngine
throw new Exception("Unknown VCS '{$vcs}'!");
}
// Clear the working set cache.
$this->workingSet = array();
// Record discovered commits and mark them in the cache.
foreach ($refs as $ref) {
$this->recordCommit(
@ -113,10 +118,14 @@ final class PhabricatorRepositoryDiscoveryEngine
$this->log(pht("Looking for new commits."));
$refs[] = $this->discoverStreamAncestry(
$branch_refs = $this->discoverStreamAncestry(
new PhabricatorGitGraphStream($repository, $commit),
$commit,
$repository->shouldAutocloseBranch($name));
$this->didDiscoverRefs($branch_refs);
$refs[] = $branch_refs;
}
return array_mergev($refs);
@ -289,6 +298,8 @@ final class PhabricatorRepositoryDiscoveryEngine
}
$refs = array_reverse($refs);
$this->didDiscoverRefs($refs);
return $refs;
}
@ -363,10 +374,14 @@ final class PhabricatorRepositoryDiscoveryEngine
$this->log(pht("Looking for new commits."));
$refs[] = $this->discoverStreamAncestry(
$branch_refs = $this->discoverStreamAncestry(
new PhabricatorMercurialGraphStream($repository, $commit),
$commit,
$close_immediately = true);
$this->didDiscoverRefs($branch_refs);
$refs[] = $branch_refs;
}
return array_mergev($refs);
@ -447,6 +462,10 @@ final class PhabricatorRepositoryDiscoveryEngine
return true;
}
if (isset($this->workingSet[$identifier])) {
return true;
}
if ($this->repairMode) {
// In repair mode, rediscover the entire repository, ignoring the
// database state. We can hit the local cache above, but if we miss it
@ -563,6 +582,12 @@ final class PhabricatorRepositoryDiscoveryEngine
}
}
private function didDiscoverRefs(array $refs) {
foreach ($refs as $ref) {
$this->workingSet[$ref->getIdentifier()] = true;
}
}
private function insertTask(
PhabricatorRepository $repository,
PhabricatorRepositoryCommit $commit,