mirror of
https://we.phorge.it/source/arcanist.git
synced 2025-01-10 23:01:04 +01:00
Improve hg arc diff perf when image files are uploaded
Summary: arc diff called 'hg cat' twice for every image binary in the diff. This turns out to take 1 second per call on a large repo because mercurial has to parse the manifest every time. Now arc diff batches up all the files and does only two 'hg cat' commands. This makes the cost constant relative to the number of images being uploaded. Test Plan: Ran arc diff on a diff with 30 images on both git and hg. Verified that it was fast and that the images showed up in the web ui. Reviewers: epriestley Reviewed By: epriestley CC: sid0, dschleimer, bos, aran, Korvin Differential Revision: https://secure.phabricator.com/D5144
This commit is contained in:
parent
0cb1b7efad
commit
8412f7cfd7
3 changed files with 89 additions and 2 deletions
|
@ -1142,6 +1142,8 @@ final class ArcanistDiffParser {
|
|||
return;
|
||||
}
|
||||
|
||||
$imagechanges = array();
|
||||
|
||||
$changes = $this->changes;
|
||||
foreach ($changes as $change) {
|
||||
$path = $change->getCurrentPath();
|
||||
|
@ -1183,8 +1185,22 @@ final class ArcanistDiffParser {
|
|||
continue;
|
||||
}
|
||||
|
||||
$change->setOriginalFileData($repository_api->getOriginalFileData($path));
|
||||
$change->setCurrentFileData($repository_api->getCurrentFileData($path));
|
||||
$imagechanges[$path] = $change;
|
||||
}
|
||||
|
||||
// Fetch the actual file contents in batches so repositories
|
||||
// that have slow random file accesses (i.e. mercurial) can
|
||||
// optimize the retrieval.
|
||||
$paths = array_keys($imagechanges);
|
||||
|
||||
$filedata = $repository_api->getBulkOriginalFileData($paths);
|
||||
foreach ($filedata as $path => $data) {
|
||||
$imagechanges[$path]->setOriginalFileData($data);
|
||||
}
|
||||
|
||||
$filedata = $repository_api->getBulkCurrentFileData($paths);
|
||||
foreach ($filedata as $path => $data) {
|
||||
$imagechanges[$path]->setCurrentFileData($data);
|
||||
}
|
||||
|
||||
$this->changes = $changes;
|
||||
|
|
|
@ -408,6 +408,50 @@ final class ArcanistMercurialAPI extends ArcanistRepositoryAPI {
|
|||
$this->getWorkingCopyRevision());
|
||||
}
|
||||
|
||||
public function getBulkOriginalFileData($paths) {
|
||||
return $this->getBulkFileDataAtRevision($paths, $this->getBaseCommit());
|
||||
}
|
||||
|
||||
public function getBulkCurrentFileData($paths) {
|
||||
return $this->getBulkFileDataAtRevision(
|
||||
$paths,
|
||||
$this->getWorkingCopyRevision());
|
||||
}
|
||||
|
||||
private function getBulkFileDataAtRevision($paths, $revision) {
|
||||
// Calling 'hg cat' on each file individually is slow (1 second per file
|
||||
// on a large repo) because mercurial has to decompress and parse the
|
||||
// entire manifest every time. Do it in one large batch instead.
|
||||
|
||||
// hg cat will write the file data to files in a temp directory
|
||||
$tmpdir = Filesystem::createTemporaryDirectory();
|
||||
|
||||
// Mercurial doesn't create the directories for us :(
|
||||
foreach ($paths as $path) {
|
||||
$tmppath = $tmpdir.'/'.$path;
|
||||
Filesystem::createDirectory(dirname($tmppath), 0755, true);
|
||||
}
|
||||
|
||||
list($err, $stdout) = $this->execManualLocal(
|
||||
'cat --rev %s --output %s -- %C',
|
||||
$revision,
|
||||
// %p is the formatter for the repo-relative filepath
|
||||
$tmpdir.'/%p',
|
||||
implode(' ', $paths));
|
||||
|
||||
$filedata = array();
|
||||
foreach ($paths as $path) {
|
||||
$tmppath = $tmpdir.'/'.$path;
|
||||
if (Filesystem::pathExists($tmppath)) {
|
||||
$filedata[$path] = Filesystem::readFile($tmppath);
|
||||
}
|
||||
}
|
||||
|
||||
Filesystem::remove($tmpdir);
|
||||
|
||||
return $filedata;
|
||||
}
|
||||
|
||||
private function getFileDataAtRevision($path, $revision) {
|
||||
list($err, $stdout) = $this->execManualLocal(
|
||||
'cat --rev %s -- %s',
|
||||
|
|
|
@ -296,6 +296,33 @@ abstract class ArcanistRepositoryAPI {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the original file data for each path provided.
|
||||
*
|
||||
* @return map<string, string> Map from path to file data.
|
||||
*/
|
||||
public function getBulkOriginalFileData($paths) {
|
||||
$filedata = array();
|
||||
foreach ($paths as $path) {
|
||||
$filedata[$path] = $this->getOriginalFileData($path);
|
||||
}
|
||||
|
||||
return $filedata;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the current file data for each path provided.
|
||||
*
|
||||
* @return map<string, string> Map from path to file data.
|
||||
*/
|
||||
public function getBulkCurrentFileData($paths) {
|
||||
$filedata = array();
|
||||
foreach ($paths as $path) {
|
||||
$filedata[$path] = $this->getCurrentFileData($path);
|
||||
}
|
||||
|
||||
return $filedata;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Traversable
|
||||
|
|
Loading…
Reference in a new issue