mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-26 16:52:41 +01:00
Use subprocess output streaming to improve performance of Git commit discovery
Summary: Improve performance of large discovery tasks in Git by using subprocess streaming, like we do for Mercurial. Basically, we save the cost of running many `git log` commands by running one big `git log` command but only parsing as much of it as we need to. This is pretty complicated, but we more or less need it for mercurial (which has ~100ms of 'hg' overhead instead of ~5ms of 'git' overhead) so we're already committed to most of the complexity costs. The git implementation is much simpler than the hg implementation because we don't need to handle all the weird parent rules (git gives us to them easily). Test Plan: Before, `discover --repair` on Phabricator took 35s: real 0m35.324s user 0m13.364s sys 0m21.088s Now 7s: real 0m7.236s user 0m2.436s sys 0m3.444s Note that most of the time is spent inserting rows after discover, the actual speedup of the git discovery part is much larger (subjectively, it runs in less than a second now, from ~28 seconds before). Also ran discover/pull on single new commits in normal cases to verify that nothing broke in the common case. Reviewers: jungejason, nh, vrana Reviewed By: vrana CC: aran Maniphest Tasks: T1401 Differential Revision: https://secure.phabricator.com/D2851
This commit is contained in:
parent
ca31e3e84b
commit
7b50b2fbdc
3 changed files with 101 additions and 7 deletions
|
@ -684,6 +684,7 @@ phutil_register_library_map(array(
|
|||
'PhabricatorFlagQuery' => 'applications/flag/query/PhabricatorFlagQuery.php',
|
||||
'PhabricatorFormExample' => 'applications/uiexample/examples/PhabricatorFormExample.php',
|
||||
'PhabricatorGarbageCollectorDaemon' => 'infrastructure/daemon/PhabricatorGarbageCollectorDaemon.php',
|
||||
'PhabricatorGitGraphStream' => 'applications/repository/daemon/PhabricatorGitGraphStream.php',
|
||||
'PhabricatorGoodForNothingWorker' => 'infrastructure/daemon/workers/worker/PhabricatorGoodForNothingWorker.php',
|
||||
'PhabricatorHandleObjectSelectorDataView' => 'applications/phid/handle/view/PhabricatorHandleObjectSelectorDataView.php',
|
||||
'PhabricatorHash' => 'infrastructure/util/PhabricatorHash.php',
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* Copyright 2012 Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
final class PhabricatorGitGraphStream {
|
||||
|
||||
private $repository;
|
||||
private $iterator;
|
||||
|
||||
private $parents = array();
|
||||
private $dates = array();
|
||||
|
||||
public function __construct(
|
||||
PhabricatorRepository $repository,
|
||||
$start_commit) {
|
||||
|
||||
$this->repository = $repository;
|
||||
|
||||
$future = $repository->getLocalCommandFuture(
|
||||
"log --format=%s %s --",
|
||||
'%H%x01%P%x01%ct',
|
||||
$start_commit);
|
||||
|
||||
$this->iterator = new LinesOfALargeExecFuture($future);
|
||||
$this->iterator->setDelimiter("\n");
|
||||
$this->iterator->rewind();
|
||||
}
|
||||
|
||||
public function getParents($commit) {
|
||||
if (!isset($this->parents[$commit])) {
|
||||
$this->parseUntil($commit);
|
||||
}
|
||||
return $this->parents[$commit];
|
||||
}
|
||||
|
||||
public function getCommitDate($commit) {
|
||||
if (!isset($this->dates[$commit])) {
|
||||
$this->parseUntil($commit);
|
||||
}
|
||||
return $this->dates[$commit];
|
||||
}
|
||||
|
||||
private function parseUntil($commit) {
|
||||
if ($this->isParsed($commit)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$gitlog = $this->iterator;
|
||||
|
||||
while ($gitlog->valid()) {
|
||||
$line = $gitlog->current();
|
||||
$gitlog->next();
|
||||
|
||||
$line = trim($line);
|
||||
if (!strlen($line)) {
|
||||
break;
|
||||
}
|
||||
list($hash, $parents, $epoch) = explode("\1", $line);
|
||||
|
||||
if ($parents) {
|
||||
$parents = explode(' ', $parents);
|
||||
} else {
|
||||
// First commit.
|
||||
$parents = array();
|
||||
}
|
||||
|
||||
$this->dates[$hash] = $epoch;
|
||||
$this->parents[$hash] = $parents;
|
||||
|
||||
if ($this->isParsed($commit)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Exception("No such commit '{$commit}' in repository!");
|
||||
}
|
||||
|
||||
private function isParsed($commit) {
|
||||
return isset($this->dates[$commit]);
|
||||
}
|
||||
|
||||
}
|
|
@ -614,12 +614,11 @@ final class PhabricatorRepositoryPullLocalDaemon
|
|||
|
||||
$seen_parent = array();
|
||||
|
||||
$stream = new PhabricatorGitGraphStream($repository, $commit);
|
||||
|
||||
while (true) {
|
||||
$target = array_pop($discover);
|
||||
list($parents) = $repository->execxLocalCommand(
|
||||
'log -n1 --pretty="%%P" %s',
|
||||
$target);
|
||||
$parents = array_filter(explode(' ', trim($parents)));
|
||||
$parents = $stream->getParents($target);
|
||||
foreach ($parents as $parent) {
|
||||
if (isset($seen_parent[$parent])) {
|
||||
// We end up in a loop here somehow when we parse Arcanist if we
|
||||
|
@ -656,9 +655,7 @@ final class PhabricatorRepositoryPullLocalDaemon
|
|||
|
||||
while (true) {
|
||||
$target = array_pop($insert);
|
||||
list($epoch) = $repository->execxLocalCommand(
|
||||
'log -n1 --pretty="%%ct" %s',
|
||||
$target);
|
||||
$epoch = $stream->getCommitDate($target);
|
||||
$epoch = trim($epoch);
|
||||
|
||||
if ($branch !== null) {
|
||||
|
|
Loading…
Reference in a new issue