From 820a6d407a9ec4bbc037c030a431124d757153cb Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 11 May 2012 18:29:14 -0700 Subject: [PATCH] Improve performance of repository discovery under Mercurial Summary: Prelminary, I want to test this a bit more when I'm better rested. Provide a "bulk" import mode for Mercurial so we can do initial discovery more quickly. Test Plan: Imported the Mercurial repository in 2m45s, blocked on MySQL I/O rather than Mercurial I/O. Reviewers: csilvers, btrahan Reviewed By: csilvers CC: aran Differential Revision: https://secure.phabricator.com/D2457 --- src/__phutil_library_map__.php | 2 + .../DiffusionMercurialBrowseQuery.php | 1 + .../PhabricatorMercurialGraphStream.php | 171 ++++++++++++++++++ .../PhabricatorRepositoryPullLocalDaemon.php | 33 +--- .../repository/daemon/pulllocal/__init__.php | 2 + 5 files changed, 186 insertions(+), 23 deletions(-) create mode 100644 src/applications/repository/daemon/pulllocal/PhabricatorMercurialGraphStream.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index 8475bf720d..42eac5d012 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -698,6 +698,7 @@ phutil_register_library_map(array( 'PhabricatorMailImplementationTestAdapter' => 'applications/metamta/adapter/test', 'PhabricatorMailReplyHandler' => 'applications/metamta/replyhandler/base', 'PhabricatorMarkupEngine' => 'applications/markup/engine', + 'PhabricatorMercurialGraphStream' => 'applications/repository/daemon/pulllocal', 'PhabricatorMetaMTAAttachment' => 'applications/metamta/storage/mail', 'PhabricatorMetaMTAController' => 'applications/metamta/controller/base', 'PhabricatorMetaMTADAO' => 'applications/metamta/storage/base', @@ -1360,6 +1361,7 @@ phutil_register_library_map(array( 'DiffusionRawDiffQuery' => 'DiffusionQuery', 'DiffusionRenameHistoryQuery' => 'DiffusionQuery', 'DiffusionRepositoryController' => 'DiffusionController', + 'DiffusionSetupException' => 'AphrontUsageException', 'DiffusionSvnBrowseQuery' => 'DiffusionBrowseQuery', 'DiffusionSvnCommitParentsQuery' => 'DiffusionCommitParentsQuery', 'DiffusionSvnCommitTagsQuery' => 'DiffusionCommitTagsQuery', diff --git a/src/applications/diffusion/query/browse/mercurial/DiffusionMercurialBrowseQuery.php b/src/applications/diffusion/query/browse/mercurial/DiffusionMercurialBrowseQuery.php index ebb18e1353..92eb0c65ef 100644 --- a/src/applications/diffusion/query/browse/mercurial/DiffusionMercurialBrowseQuery.php +++ b/src/applications/diffusion/query/browse/mercurial/DiffusionMercurialBrowseQuery.php @@ -78,6 +78,7 @@ final class DiffusionMercurialBrowseQuery extends DiffusionBrowseQuery { $result = new DiffusionRepositoryPath(); $result->setPath($key); $result->setFileType($type); + $result->setFullPath(ltrim($match_against.'/', '/').$key); $results[$key] = $result; } diff --git a/src/applications/repository/daemon/pulllocal/PhabricatorMercurialGraphStream.php b/src/applications/repository/daemon/pulllocal/PhabricatorMercurialGraphStream.php new file mode 100644 index 0000000000..98f5846b70 --- /dev/null +++ b/src/applications/repository/daemon/pulllocal/PhabricatorMercurialGraphStream.php @@ -0,0 +1,171 @@ +repository = $repository; + + $future = $repository->getLocalCommandFuture( + "log --template '{rev}\1{node}\1{date}\1{parents}\2'"); + + $this->iterator = new LinesOfALargeExecFuture($future); + $this->iterator->setDelimiter("\2"); + $this->iterator->rewind(); + } + + public function getParents($commit) { + if (!isset($this->parents[$commit])) { + $this->parseUntil('node', $commit); + + $local = $this->localParents[$commit]; + + // The normal parsing pass gives us the local revision numbers of the + // parents, but since we've decided we care about this data, we need to + // convert them into full hashes. To do this, we parse to the deepest + // one and then just look them up. + + $parents = array(); + if ($local) { + $this->parseUntil('rev', min($local)); + foreach ($local as $rev) { + $parents[] = $this->local[$rev]; + } + } + + $this->parents[$commit] = $parents; + + // Throw away the local info for this commit, we no longer need it. + unset($this->localParents[$commit]); + } + + return $this->parents[$commit]; + } + + public function getCommitDate($commit) { + if (!isset($this->dates[$commit])) { + $this->parseUntil('node', $commit); + } + return $this->dates[$commit]; + } + + /** + * Parse until we have consumed some object. There are two types of parses: + * parse until we find a commit hash ($until_type = "node"), or parse until we + * find a local commit number ($until_type = "rev"). We use the former when + * looking up commits, and the latter when resolving parents. + */ + private function parseUntil($until_type, $until_name) { + if ($this->isParsed($until_type, $until_name)) { + return; + } + + $hglog = $this->iterator; + + while ($hglog->valid()) { + $line = $hglog->current(); + $hglog->next(); + + $line = trim($line); + if (!strlen($line)) { + break; + } + list($rev, $node, $date, $parents) = explode("\1", $line); + + $rev = (int)$rev; + $date = (int)head(explode('.', $date)); + + $this->dates[$node] = $date; + $this->local[$rev] = $node; + $this->localParents[$node] = $this->parseParents($parents, $rev); + + if ($this->isParsed($until_type, $until_name)) { + return; + } + } + + throw new Exception( + "No such {$until_type} '{$until_name}' in repository!"); + } + + + /** + * Parse a {parents} template, returning the local commit numbers. + */ + private function parseParents($parents, $target_rev) { + + // The hg '{parents}' token is empty if there is one "natural" parent + // (predecessor local commit ID). Othwerwise, it may have one or two + // parents. The string looks like this: + // + // 151:1f6c61a60586 154:1d5f799ebe1e + + $parents = trim($parents); + if (strlen($parents)) { + $local = array(); + + $parents = explode(' ', $parents); + foreach ($parents as $key => $parent) { + $parent = (int)head(explode(':', $parent)); + if ($parent == -1) { + // Initial commits will sometimes have "-1" as a parent. + continue; + } + $local[] = $parent; + } + } else if ($target_rev) { + // We have empty parents. If there's a predecessor, that's the local + // parent number. + $local = array($target_rev - 1); + } else { + // Initial commits will sometimes have no parents. + $local = array(); + } + + return $local; + } + + + /** + * Returns true if the object specified by $type ('rev' or 'node') and + * $name (rev or node name) has been consumed from the hg process. + */ + private function isParsed($type, $name) { + switch ($type) { + case 'rev': + return isset($this->local[$name]); + case 'node': + return isset($this->dates[$name]); + } + } + + +} diff --git a/src/applications/repository/daemon/pulllocal/PhabricatorRepositoryPullLocalDaemon.php b/src/applications/repository/daemon/pulllocal/PhabricatorRepositoryPullLocalDaemon.php index 6c48bf4612..a7663517dd 100644 --- a/src/applications/repository/daemon/pulllocal/PhabricatorRepositoryPullLocalDaemon.php +++ b/src/applications/repository/daemon/pulllocal/PhabricatorRepositoryPullLocalDaemon.php @@ -665,15 +665,15 @@ final class PhabricatorRepositoryPullLocalDaemon $seen_parent = array(); - // For all the new commits at the branch heads, walk backward until we find - // only commits we've aleady seen. - while (true) { + $stream = new PhabricatorMercurialGraphStream($repository); + + // For all the new commits at the branch heads, walk backward until we + // find only commits we've aleady seen. + while ($discover) { $target = array_pop($discover); - list($stdout) = $repository->execxLocalCommand( - 'parents --rev %s --template %s', - $target, - '{node}\n'); - $parents = array_filter(explode("\n", trim($stdout))); + + $parents = $stream->getParents($target); + foreach ($parents as $parent) { if (isset($seen_parent[$parent])) { continue; @@ -684,24 +684,11 @@ final class PhabricatorRepositoryPullLocalDaemon $insert[] = $parent; } } - if (empty($discover)) { - break; - } } - while (true) { - $target = array_pop($insert); - list($stdout) = $repository->execxLocalCommand( - 'log --rev %s --template %s', - $target, - '{date|rfc822date}'); - $epoch = strtotime($stdout); - + foreach ($insert as $target) { + $epoch = $stream->getCommitDate($target); self::recordCommit($repository, $target, $epoch); - - if (empty($insert)) { - break; - } } } diff --git a/src/applications/repository/daemon/pulllocal/__init__.php b/src/applications/repository/daemon/pulllocal/__init__.php index 63a971cc33..2ef58c59ba 100644 --- a/src/applications/repository/daemon/pulllocal/__init__.php +++ b/src/applications/repository/daemon/pulllocal/__init__.php @@ -20,8 +20,10 @@ phutil_require_module('phabricator', 'storage/queryfx'); phutil_require_module('phutil', 'error'); phutil_require_module('phutil', 'filesystem'); +phutil_require_module('phutil', 'filesystem/linesofalarge/execfuture'); phutil_require_module('phutil', 'parser/argument/parser'); phutil_require_module('phutil', 'utils'); +phutil_require_source('PhabricatorMercurialGraphStream.php'); phutil_require_source('PhabricatorRepositoryPullLocalDaemon.php');