mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-19 12:00:55 +01:00
Improve performance of repository discovery under Mercurial
Summary: Prelminary, I want to test this a bit more when I'm better rested. Provide a "bulk" import mode for Mercurial so we can do initial discovery more quickly. Test Plan: Imported the Mercurial repository in 2m45s, blocked on MySQL I/O rather than Mercurial I/O. Reviewers: csilvers, btrahan Reviewed By: csilvers CC: aran Differential Revision: https://secure.phabricator.com/D2457
This commit is contained in:
parent
b20ae2a07f
commit
820a6d407a
5 changed files with 186 additions and 23 deletions
|
@ -698,6 +698,7 @@ phutil_register_library_map(array(
|
|||
'PhabricatorMailImplementationTestAdapter' => 'applications/metamta/adapter/test',
|
||||
'PhabricatorMailReplyHandler' => 'applications/metamta/replyhandler/base',
|
||||
'PhabricatorMarkupEngine' => 'applications/markup/engine',
|
||||
'PhabricatorMercurialGraphStream' => 'applications/repository/daemon/pulllocal',
|
||||
'PhabricatorMetaMTAAttachment' => 'applications/metamta/storage/mail',
|
||||
'PhabricatorMetaMTAController' => 'applications/metamta/controller/base',
|
||||
'PhabricatorMetaMTADAO' => 'applications/metamta/storage/base',
|
||||
|
@ -1360,6 +1361,7 @@ phutil_register_library_map(array(
|
|||
'DiffusionRawDiffQuery' => 'DiffusionQuery',
|
||||
'DiffusionRenameHistoryQuery' => 'DiffusionQuery',
|
||||
'DiffusionRepositoryController' => 'DiffusionController',
|
||||
'DiffusionSetupException' => 'AphrontUsageException',
|
||||
'DiffusionSvnBrowseQuery' => 'DiffusionBrowseQuery',
|
||||
'DiffusionSvnCommitParentsQuery' => 'DiffusionCommitParentsQuery',
|
||||
'DiffusionSvnCommitTagsQuery' => 'DiffusionCommitTagsQuery',
|
||||
|
|
|
@ -78,6 +78,7 @@ final class DiffusionMercurialBrowseQuery extends DiffusionBrowseQuery {
|
|||
$result = new DiffusionRepositoryPath();
|
||||
$result->setPath($key);
|
||||
$result->setFileType($type);
|
||||
$result->setFullPath(ltrim($match_against.'/', '/').$key);
|
||||
|
||||
$results[$key] = $result;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* Copyright 2012 Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Streaming interface on top of "hg log" that gives us performant access to
|
||||
* the Mercurial commit graph with one nonblocking invocation of "hg". See
|
||||
* @{class:PhabricatorRepositoryPullLocalDaemon}.
|
||||
*/
|
||||
final class PhabricatorMercurialGraphStream {
|
||||
|
||||
private $repository;
|
||||
private $iterator;
|
||||
|
||||
private $parents = array();
|
||||
private $dates = array();
|
||||
private $local = array();
|
||||
private $localParents = array();
|
||||
|
||||
public function __construct(PhabricatorRepository $repository) {
|
||||
$this->repository = $repository;
|
||||
|
||||
$future = $repository->getLocalCommandFuture(
|
||||
"log --template '{rev}\1{node}\1{date}\1{parents}\2'");
|
||||
|
||||
$this->iterator = new LinesOfALargeExecFuture($future);
|
||||
$this->iterator->setDelimiter("\2");
|
||||
$this->iterator->rewind();
|
||||
}
|
||||
|
||||
public function getParents($commit) {
|
||||
if (!isset($this->parents[$commit])) {
|
||||
$this->parseUntil('node', $commit);
|
||||
|
||||
$local = $this->localParents[$commit];
|
||||
|
||||
// The normal parsing pass gives us the local revision numbers of the
|
||||
// parents, but since we've decided we care about this data, we need to
|
||||
// convert them into full hashes. To do this, we parse to the deepest
|
||||
// one and then just look them up.
|
||||
|
||||
$parents = array();
|
||||
if ($local) {
|
||||
$this->parseUntil('rev', min($local));
|
||||
foreach ($local as $rev) {
|
||||
$parents[] = $this->local[$rev];
|
||||
}
|
||||
}
|
||||
|
||||
$this->parents[$commit] = $parents;
|
||||
|
||||
// Throw away the local info for this commit, we no longer need it.
|
||||
unset($this->localParents[$commit]);
|
||||
}
|
||||
|
||||
return $this->parents[$commit];
|
||||
}
|
||||
|
||||
public function getCommitDate($commit) {
|
||||
if (!isset($this->dates[$commit])) {
|
||||
$this->parseUntil('node', $commit);
|
||||
}
|
||||
return $this->dates[$commit];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse until we have consumed some object. There are two types of parses:
|
||||
* parse until we find a commit hash ($until_type = "node"), or parse until we
|
||||
* find a local commit number ($until_type = "rev"). We use the former when
|
||||
* looking up commits, and the latter when resolving parents.
|
||||
*/
|
||||
private function parseUntil($until_type, $until_name) {
|
||||
if ($this->isParsed($until_type, $until_name)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$hglog = $this->iterator;
|
||||
|
||||
while ($hglog->valid()) {
|
||||
$line = $hglog->current();
|
||||
$hglog->next();
|
||||
|
||||
$line = trim($line);
|
||||
if (!strlen($line)) {
|
||||
break;
|
||||
}
|
||||
list($rev, $node, $date, $parents) = explode("\1", $line);
|
||||
|
||||
$rev = (int)$rev;
|
||||
$date = (int)head(explode('.', $date));
|
||||
|
||||
$this->dates[$node] = $date;
|
||||
$this->local[$rev] = $node;
|
||||
$this->localParents[$node] = $this->parseParents($parents, $rev);
|
||||
|
||||
if ($this->isParsed($until_type, $until_name)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Exception(
|
||||
"No such {$until_type} '{$until_name}' in repository!");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse a {parents} template, returning the local commit numbers.
|
||||
*/
|
||||
private function parseParents($parents, $target_rev) {
|
||||
|
||||
// The hg '{parents}' token is empty if there is one "natural" parent
|
||||
// (predecessor local commit ID). Othwerwise, it may have one or two
|
||||
// parents. The string looks like this:
|
||||
//
|
||||
// 151:1f6c61a60586 154:1d5f799ebe1e
|
||||
|
||||
$parents = trim($parents);
|
||||
if (strlen($parents)) {
|
||||
$local = array();
|
||||
|
||||
$parents = explode(' ', $parents);
|
||||
foreach ($parents as $key => $parent) {
|
||||
$parent = (int)head(explode(':', $parent));
|
||||
if ($parent == -1) {
|
||||
// Initial commits will sometimes have "-1" as a parent.
|
||||
continue;
|
||||
}
|
||||
$local[] = $parent;
|
||||
}
|
||||
} else if ($target_rev) {
|
||||
// We have empty parents. If there's a predecessor, that's the local
|
||||
// parent number.
|
||||
$local = array($target_rev - 1);
|
||||
} else {
|
||||
// Initial commits will sometimes have no parents.
|
||||
$local = array();
|
||||
}
|
||||
|
||||
return $local;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if the object specified by $type ('rev' or 'node') and
|
||||
* $name (rev or node name) has been consumed from the hg process.
|
||||
*/
|
||||
private function isParsed($type, $name) {
|
||||
switch ($type) {
|
||||
case 'rev':
|
||||
return isset($this->local[$name]);
|
||||
case 'node':
|
||||
return isset($this->dates[$name]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -665,15 +665,15 @@ final class PhabricatorRepositoryPullLocalDaemon
|
|||
|
||||
$seen_parent = array();
|
||||
|
||||
// For all the new commits at the branch heads, walk backward until we find
|
||||
// only commits we've aleady seen.
|
||||
while (true) {
|
||||
$stream = new PhabricatorMercurialGraphStream($repository);
|
||||
|
||||
// For all the new commits at the branch heads, walk backward until we
|
||||
// find only commits we've aleady seen.
|
||||
while ($discover) {
|
||||
$target = array_pop($discover);
|
||||
list($stdout) = $repository->execxLocalCommand(
|
||||
'parents --rev %s --template %s',
|
||||
$target,
|
||||
'{node}\n');
|
||||
$parents = array_filter(explode("\n", trim($stdout)));
|
||||
|
||||
$parents = $stream->getParents($target);
|
||||
|
||||
foreach ($parents as $parent) {
|
||||
if (isset($seen_parent[$parent])) {
|
||||
continue;
|
||||
|
@ -684,24 +684,11 @@ final class PhabricatorRepositoryPullLocalDaemon
|
|||
$insert[] = $parent;
|
||||
}
|
||||
}
|
||||
if (empty($discover)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (true) {
|
||||
$target = array_pop($insert);
|
||||
list($stdout) = $repository->execxLocalCommand(
|
||||
'log --rev %s --template %s',
|
||||
$target,
|
||||
'{date|rfc822date}');
|
||||
$epoch = strtotime($stdout);
|
||||
|
||||
foreach ($insert as $target) {
|
||||
$epoch = $stream->getCommitDate($target);
|
||||
self::recordCommit($repository, $target, $epoch);
|
||||
|
||||
if (empty($insert)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,10 @@ phutil_require_module('phabricator', 'storage/queryfx');
|
|||
|
||||
phutil_require_module('phutil', 'error');
|
||||
phutil_require_module('phutil', 'filesystem');
|
||||
phutil_require_module('phutil', 'filesystem/linesofalarge/execfuture');
|
||||
phutil_require_module('phutil', 'parser/argument/parser');
|
||||
phutil_require_module('phutil', 'utils');
|
||||
|
||||
|
||||
phutil_require_source('PhabricatorMercurialGraphStream.php');
|
||||
phutil_require_source('PhabricatorRepositoryPullLocalDaemon.php');
|
||||
|
|
Loading…
Reference in a new issue