1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-18 18:51:12 +01:00

Improve performance of repository discovery under Mercurial

Summary: Prelminary, I want to test this a bit more when I'm better rested. Provide a "bulk" import mode for Mercurial so we can do initial discovery more quickly.

Test Plan: Imported the Mercurial repository in 2m45s, blocked on MySQL I/O rather than Mercurial I/O.

Reviewers: csilvers, btrahan

Reviewed By: csilvers

CC: aran

Differential Revision: https://secure.phabricator.com/D2457
This commit is contained in:
epriestley 2012-05-11 18:29:14 -07:00
parent b20ae2a07f
commit 820a6d407a
5 changed files with 186 additions and 23 deletions

View file

@ -698,6 +698,7 @@ phutil_register_library_map(array(
'PhabricatorMailImplementationTestAdapter' => 'applications/metamta/adapter/test',
'PhabricatorMailReplyHandler' => 'applications/metamta/replyhandler/base',
'PhabricatorMarkupEngine' => 'applications/markup/engine',
'PhabricatorMercurialGraphStream' => 'applications/repository/daemon/pulllocal',
'PhabricatorMetaMTAAttachment' => 'applications/metamta/storage/mail',
'PhabricatorMetaMTAController' => 'applications/metamta/controller/base',
'PhabricatorMetaMTADAO' => 'applications/metamta/storage/base',
@ -1360,6 +1361,7 @@ phutil_register_library_map(array(
'DiffusionRawDiffQuery' => 'DiffusionQuery',
'DiffusionRenameHistoryQuery' => 'DiffusionQuery',
'DiffusionRepositoryController' => 'DiffusionController',
'DiffusionSetupException' => 'AphrontUsageException',
'DiffusionSvnBrowseQuery' => 'DiffusionBrowseQuery',
'DiffusionSvnCommitParentsQuery' => 'DiffusionCommitParentsQuery',
'DiffusionSvnCommitTagsQuery' => 'DiffusionCommitTagsQuery',

View file

@ -78,6 +78,7 @@ final class DiffusionMercurialBrowseQuery extends DiffusionBrowseQuery {
$result = new DiffusionRepositoryPath();
$result->setPath($key);
$result->setFileType($type);
$result->setFullPath(ltrim($match_against.'/', '/').$key);
$results[$key] = $result;
}

View file

@ -0,0 +1,171 @@
<?php
/*
* Copyright 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Streaming interface on top of "hg log" that gives us performant access to
* the Mercurial commit graph with one nonblocking invocation of "hg". See
* @{class:PhabricatorRepositoryPullLocalDaemon}.
*/
final class PhabricatorMercurialGraphStream {
private $repository;
private $iterator;
private $parents = array();
private $dates = array();
private $local = array();
private $localParents = array();
public function __construct(PhabricatorRepository $repository) {
$this->repository = $repository;
$future = $repository->getLocalCommandFuture(
"log --template '{rev}\1{node}\1{date}\1{parents}\2'");
$this->iterator = new LinesOfALargeExecFuture($future);
$this->iterator->setDelimiter("\2");
$this->iterator->rewind();
}
public function getParents($commit) {
if (!isset($this->parents[$commit])) {
$this->parseUntil('node', $commit);
$local = $this->localParents[$commit];
// The normal parsing pass gives us the local revision numbers of the
// parents, but since we've decided we care about this data, we need to
// convert them into full hashes. To do this, we parse to the deepest
// one and then just look them up.
$parents = array();
if ($local) {
$this->parseUntil('rev', min($local));
foreach ($local as $rev) {
$parents[] = $this->local[$rev];
}
}
$this->parents[$commit] = $parents;
// Throw away the local info for this commit, we no longer need it.
unset($this->localParents[$commit]);
}
return $this->parents[$commit];
}
public function getCommitDate($commit) {
if (!isset($this->dates[$commit])) {
$this->parseUntil('node', $commit);
}
return $this->dates[$commit];
}
/**
* Parse until we have consumed some object. There are two types of parses:
* parse until we find a commit hash ($until_type = "node"), or parse until we
* find a local commit number ($until_type = "rev"). We use the former when
* looking up commits, and the latter when resolving parents.
*/
private function parseUntil($until_type, $until_name) {
if ($this->isParsed($until_type, $until_name)) {
return;
}
$hglog = $this->iterator;
while ($hglog->valid()) {
$line = $hglog->current();
$hglog->next();
$line = trim($line);
if (!strlen($line)) {
break;
}
list($rev, $node, $date, $parents) = explode("\1", $line);
$rev = (int)$rev;
$date = (int)head(explode('.', $date));
$this->dates[$node] = $date;
$this->local[$rev] = $node;
$this->localParents[$node] = $this->parseParents($parents, $rev);
if ($this->isParsed($until_type, $until_name)) {
return;
}
}
throw new Exception(
"No such {$until_type} '{$until_name}' in repository!");
}
/**
* Parse a {parents} template, returning the local commit numbers.
*/
private function parseParents($parents, $target_rev) {
// The hg '{parents}' token is empty if there is one "natural" parent
// (predecessor local commit ID). Othwerwise, it may have one or two
// parents. The string looks like this:
//
// 151:1f6c61a60586 154:1d5f799ebe1e
$parents = trim($parents);
if (strlen($parents)) {
$local = array();
$parents = explode(' ', $parents);
foreach ($parents as $key => $parent) {
$parent = (int)head(explode(':', $parent));
if ($parent == -1) {
// Initial commits will sometimes have "-1" as a parent.
continue;
}
$local[] = $parent;
}
} else if ($target_rev) {
// We have empty parents. If there's a predecessor, that's the local
// parent number.
$local = array($target_rev - 1);
} else {
// Initial commits will sometimes have no parents.
$local = array();
}
return $local;
}
/**
* Returns true if the object specified by $type ('rev' or 'node') and
* $name (rev or node name) has been consumed from the hg process.
*/
private function isParsed($type, $name) {
switch ($type) {
case 'rev':
return isset($this->local[$name]);
case 'node':
return isset($this->dates[$name]);
}
}
}

View file

@ -665,15 +665,15 @@ final class PhabricatorRepositoryPullLocalDaemon
$seen_parent = array();
// For all the new commits at the branch heads, walk backward until we find
// only commits we've aleady seen.
while (true) {
$stream = new PhabricatorMercurialGraphStream($repository);
// For all the new commits at the branch heads, walk backward until we
// find only commits we've aleady seen.
while ($discover) {
$target = array_pop($discover);
list($stdout) = $repository->execxLocalCommand(
'parents --rev %s --template %s',
$target,
'{node}\n');
$parents = array_filter(explode("\n", trim($stdout)));
$parents = $stream->getParents($target);
foreach ($parents as $parent) {
if (isset($seen_parent[$parent])) {
continue;
@ -684,24 +684,11 @@ final class PhabricatorRepositoryPullLocalDaemon
$insert[] = $parent;
}
}
if (empty($discover)) {
break;
}
}
while (true) {
$target = array_pop($insert);
list($stdout) = $repository->execxLocalCommand(
'log --rev %s --template %s',
$target,
'{date|rfc822date}');
$epoch = strtotime($stdout);
foreach ($insert as $target) {
$epoch = $stream->getCommitDate($target);
self::recordCommit($repository, $target, $epoch);
if (empty($insert)) {
break;
}
}
}

View file

@ -20,8 +20,10 @@ phutil_require_module('phabricator', 'storage/queryfx');
phutil_require_module('phutil', 'error');
phutil_require_module('phutil', 'filesystem');
phutil_require_module('phutil', 'filesystem/linesofalarge/execfuture');
phutil_require_module('phutil', 'parser/argument/parser');
phutil_require_module('phutil', 'utils');
phutil_require_source('PhabricatorMercurialGraphStream.php');
phutil_require_source('PhabricatorRepositoryPullLocalDaemon.php');