From d86bb086cac317d8eb976540bf91f03fd5a684fa Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 28 Feb 2014 13:02:14 -0800 Subject: [PATCH] Reduce initial discovery from O(branches * commits) to O(commits) Summary: Fixes T4414. Currently, when we discover a new repository, we do something like this: foreach (branch) { foreach (commit on this branch) { do_something(); } } In cases where there are a lot of branches which mostly just branch `master`, this leads to us doing roughly `O(branches * commits)` work. We have a `commitCache` to prevent this, but it has two problems: - It only fills out of the DB, and we do this whole thing before writing to the DB, which is the entire point. - It has a fixed size (2048) and on initial discovery we're usually dealing with way more commits than that. Instead, also stop doing work if we hit a commit which is known already. Test Plan: - Added `print` on the number of discovered refs and number of unique refs. - Ran `bin/repository discover --repair X` on a repo with several branches. - Before the patch, got 397 refs and 135 unique refs. - After the patch, got 135 refs and 135 unique refs. Reviewers: btrahan Reviewed By: btrahan CC: aran Maniphest Tasks: T4414 Differential Revision: https://secure.phabricator.com/D8374 --- .../PhabricatorRepositoryDiscoveryEngine.php | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php index 3744f09d11..9fc268281c 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php @@ -12,6 +12,8 @@ final class PhabricatorRepositoryDiscoveryEngine private $repairMode; private $commitCache = array(); + private $workingSet = array(); + const MAX_COMMIT_CACHE_SIZE = 2048; @@ -50,6 +52,9 @@ final class PhabricatorRepositoryDiscoveryEngine throw new Exception("Unknown VCS '{$vcs}'!"); } + // Clear the working set cache. + $this->workingSet = array(); + // Record discovered commits and mark them in the cache. foreach ($refs as $ref) { $this->recordCommit( @@ -113,10 +118,14 @@ final class PhabricatorRepositoryDiscoveryEngine $this->log(pht("Looking for new commits.")); - $refs[] = $this->discoverStreamAncestry( + $branch_refs = $this->discoverStreamAncestry( new PhabricatorGitGraphStream($repository, $commit), $commit, $repository->shouldAutocloseBranch($name)); + + $this->didDiscoverRefs($branch_refs); + + $refs[] = $branch_refs; } return array_mergev($refs); @@ -289,6 +298,8 @@ final class PhabricatorRepositoryDiscoveryEngine } $refs = array_reverse($refs); + $this->didDiscoverRefs($refs); + return $refs; } @@ -363,10 +374,14 @@ final class PhabricatorRepositoryDiscoveryEngine $this->log(pht("Looking for new commits.")); - $refs[] = $this->discoverStreamAncestry( + $branch_refs = $this->discoverStreamAncestry( new PhabricatorMercurialGraphStream($repository, $commit), $commit, $close_immediately = true); + + $this->didDiscoverRefs($branch_refs); + + $refs[] = $branch_refs; } return array_mergev($refs); @@ -447,6 +462,10 @@ final class PhabricatorRepositoryDiscoveryEngine return true; } + if (isset($this->workingSet[$identifier])) { + return true; + } + if ($this->repairMode) { // In repair mode, rediscover the entire repository, ignoring the // database state. We can hit the local cache above, but if we miss it @@ -563,6 +582,12 @@ final class PhabricatorRepositoryDiscoveryEngine } } + private function didDiscoverRefs(array $refs) { + foreach ($refs as $ref) { + $this->workingSet[$ref->getIdentifier()] = true; + } + } + private function insertTask( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit,