mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-12-23 05:50:54 +01:00
Accelerate working tree operations in git
Summary: Currently, `arc` on `git` uses the following commands to examine the state of the working tree and history; example times for a no-op diff in a 165k-file working tree are also shown: ``` 1) git diff --no-ext-diff --no-textconv --submodule=short --raw 'fb062d4ecce5d9c1786b7bfc8a0dedf6b11fdd96' -- = 1,722,514 us 2a) git diff --no-ext-diff --no-textconv --submodule=short --raw 'HEAD' -- = 1,715,507 us 2b) git ls-files --others --exclude-standard = 2,359,202 us 3) git diff-files --name-only = 1,333,274 us ``` Steps (2a) and (2b) are run concurrently; this results in a total elapsed wallclock time of approximately 5.4 seconds. This is inefficient -- all four of the above steps must both load the index and examine the working copy, which may be slow operations when large repositories are used. Additionally, none of the effort of those stat calls on the working tree, or load time of the index, is shared across the processes. Step (1) is called from `getCommitRangeStatus`, which was split out in D4095; it is currently never called on its own, only ever from `getWorkingCopyStatus`, where it it combined with `getUncommittedStatus`. The current behavior of the method is to return the set of changes //either// in local commits //or// uncommitted in the working tree, which duplicates work that `getUncommittedStatus` is intended to do. Changing the behavior of this method (in Git, and other VCSes) to only examine _committed_ status seems both inline with the name of the method and the original description of it in D4095 -- and also serves to make it much faster, as it is an operation that need not inspect the working tree at all. Steps (2a), (2b), and (3) attempt to gather the state of the working copy, and as such are all I/O bound but must examine nearly identical data. For git 2.11.0 and higher, we can instead rely on the machine-parseable `git status --porcelain=2` format, which provides the information from all of these commands at once. It also allows additional performance improvements, as `git status` has been the focus of several optimizations in the latest versions of git (the untracked cache and fsmonitor services, for instance), which are not available in the lower-level `diff`, `ls-files`, and `diff-files` commands. This has the added benefit of fixing a bug noticed in T9455, in that uncommitted or unstaged changes in modules can now be detected, regardless of if they also have changed their base commit. It further resolves a bug where `.gitmodules` appeared to have unstaged changes, when in reality the unstaged changes were in submodules elsewhere in the tree. For backwards compatibility with versions of git < 2.11.0, the old code is left in place. It is possible that the simpler output from v1 of `git status --porcelain` would also suffice for some of the above benefits, but the payoff of parsing yet another format is deemed insufficient; users wishing improved performance should simply upgrade `git`. Alltogether, these result in the following, for a no-op diff in a 165k-working-file tree: ``` 1) git diff --no-ext-diff --no-textconv --submodule=short --raw 'fb062d4ecce5d9c1786b7bfc8a0dedf6b11fdd96' HEAD -- = 9,227 us 2) git status --porcelain=2 -z = 739,964 us ``` ...for a total of 749ms, an improvement of 4.7s. Depends on D18841. Test Plan: Existing tests. Reviewers: #blessed_reviewers, epriestley Reviewed By: #blessed_reviewers, epriestley Subscribers: Korvin, epriestley Differential Revision: https://secure.phabricator.com/D18842
This commit is contained in:
parent
f34467914c
commit
9144658e69
4 changed files with 132 additions and 47 deletions
|
@ -52,8 +52,12 @@ final class ArcanistGitAPI extends ArcanistRepositoryAPI {
|
|||
}
|
||||
|
||||
public function getGitVersion() {
|
||||
list($stdout) = $this->execxLocal('--version');
|
||||
return rtrim(str_replace('git version ', '', $stdout));
|
||||
static $version = null;
|
||||
if ($version === null) {
|
||||
list($stdout) = $this->execxLocal('--version');
|
||||
$version = rtrim(str_replace('git version ', '', $stdout));
|
||||
}
|
||||
return $version;
|
||||
}
|
||||
|
||||
public function getMetadataPath() {
|
||||
|
@ -645,8 +649,70 @@ final class ArcanistGitAPI extends ArcanistRepositoryAPI {
|
|||
return $this->executeSVNFindRev($hash, 'SVN');
|
||||
}
|
||||
|
||||
private function buildUncommittedStatusViaStatus() {
|
||||
$status = $this->buildLocalFuture(
|
||||
array(
|
||||
'status --porcelain=2 -z',
|
||||
));
|
||||
list($stdout) = $status->resolvex();
|
||||
|
||||
$result = new PhutilArrayWithDefaultValue();
|
||||
$parts = explode("\0", $stdout);
|
||||
while (count($parts) > 1) {
|
||||
$entry = array_shift($parts);
|
||||
$entry_parts = explode(' ', $entry);
|
||||
if ($entry_parts[0] == '1') {
|
||||
$path = $entry_parts[8];
|
||||
} else if ($entry_parts[0] == '2') {
|
||||
$path = $entry_parts[9];
|
||||
} else if ($entry_parts[0] == 'u') {
|
||||
$path = $entry_parts[10];
|
||||
} else if ($entry_parts[0] == '?') {
|
||||
$result[$entry_parts[1]] = self::FLAG_UNTRACKED;
|
||||
continue;
|
||||
}
|
||||
|
||||
$result[$path] |= self::FLAG_UNCOMMITTED;
|
||||
$index_state = substr($entry_parts[1], 0, 1);
|
||||
$working_state = substr($entry_parts[1], 1, 1);
|
||||
if ($index_state == 'A') {
|
||||
$result[$path] |= self::FLAG_ADDED;
|
||||
} else if ($index_state == 'M') {
|
||||
$result[$path] |= self::FLAG_MODIFIED;
|
||||
} else if ($index_state == 'D') {
|
||||
$result[$path] |= self::FLAG_DELETED;
|
||||
}
|
||||
if ($working_state != '.') {
|
||||
$result[$path] |= self::FLAG_UNSTAGED;
|
||||
if ($index_state == '.') {
|
||||
if ($working_state == 'A') {
|
||||
$result[$path] |= self::FLAG_ADDED;
|
||||
} else if ($working_state == 'M') {
|
||||
$result[$path] |= self::FLAG_MODIFIED;
|
||||
} else if ($working_state == 'D') {
|
||||
$result[$path] |= self::FLAG_DELETED;
|
||||
}
|
||||
}
|
||||
}
|
||||
$submodule_tracked = substr($entry_parts[2], 2, 1);
|
||||
$submodule_untracked = substr($entry_parts[2], 3, 1);
|
||||
if ($submodule_tracked == 'M' || $submodule_untracked == 'U') {
|
||||
$result[$path] |= self::FLAG_EXTERNALS;
|
||||
}
|
||||
|
||||
if ($entry_parts[0] == '2') {
|
||||
$result[array_shift($parts)] = $result[$path] | self::FLAG_DELETED;
|
||||
$result[$path] |= self::FLAG_ADDED;
|
||||
}
|
||||
}
|
||||
return $result->toArray();
|
||||
}
|
||||
|
||||
protected function buildUncommittedStatus() {
|
||||
if (version_compare($this->getGitVersion(), '2.11.0', '>=')) {
|
||||
return $this->buildUncommittedStatusViaStatus();
|
||||
}
|
||||
|
||||
$diff_options = $this->getDiffBaseOptions();
|
||||
|
||||
if ($this->repositoryHasNoCommits) {
|
||||
|
@ -719,7 +785,7 @@ final class ArcanistGitAPI extends ArcanistRepositoryAPI {
|
|||
|
||||
protected function buildCommitRangeStatus() {
|
||||
list($stdout, $stderr) = $this->execxLocal(
|
||||
'diff %C --raw %s --',
|
||||
'diff %C --raw %s HEAD --',
|
||||
$this->getDiffBaseOptions(),
|
||||
$this->getBaseCommit());
|
||||
|
||||
|
|
|
@ -372,41 +372,18 @@ final class ArcanistMercurialAPI extends ArcanistRepositoryAPI {
|
|||
}
|
||||
|
||||
protected function buildCommitRangeStatus() {
|
||||
// TODO: Possibly we should use "hg status --rev X --rev ." for this
|
||||
// instead, but we must run "hg diff" later anyway in most cases, so
|
||||
// building and caching it shouldn't hurt us.
|
||||
list($stdout) = $this->execxLocal(
|
||||
'status --rev %s --rev tip',
|
||||
$this->getBaseCommit());
|
||||
|
||||
$diff = $this->getFullMercurialDiff();
|
||||
if (!$diff) {
|
||||
return array();
|
||||
$results = new PhutilArrayWithDefaultValue();
|
||||
|
||||
$working_status = ArcanistMercurialParser::parseMercurialStatus($stdout);
|
||||
foreach ($working_status as $path => $mask) {
|
||||
$results[$path] |= $mask;
|
||||
}
|
||||
|
||||
$parser = new ArcanistDiffParser();
|
||||
$changes = $parser->parseDiff($diff);
|
||||
|
||||
$status_map = array();
|
||||
foreach ($changes as $change) {
|
||||
$flags = 0;
|
||||
switch ($change->getType()) {
|
||||
case ArcanistDiffChangeType::TYPE_ADD:
|
||||
case ArcanistDiffChangeType::TYPE_MOVE_HERE:
|
||||
case ArcanistDiffChangeType::TYPE_COPY_HERE:
|
||||
$flags |= self::FLAG_ADDED;
|
||||
break;
|
||||
case ArcanistDiffChangeType::TYPE_CHANGE:
|
||||
case ArcanistDiffChangeType::TYPE_COPY_AWAY: // Check for changes?
|
||||
$flags |= self::FLAG_MODIFIED;
|
||||
break;
|
||||
case ArcanistDiffChangeType::TYPE_DELETE:
|
||||
case ArcanistDiffChangeType::TYPE_MOVE_AWAY:
|
||||
case ArcanistDiffChangeType::TYPE_MULTICOPY:
|
||||
$flags |= self::FLAG_DELETED;
|
||||
break;
|
||||
}
|
||||
$status_map[$change->getCurrentPath()] = $flags;
|
||||
}
|
||||
|
||||
return $status_map;
|
||||
return $results->toArray();
|
||||
}
|
||||
|
||||
protected function didReloadWorkingCopy() {
|
||||
|
|
|
@ -45,9 +45,9 @@ final class ArcanistSubversionAPI extends ArcanistRepositoryAPI {
|
|||
}
|
||||
|
||||
protected function buildCommitRangeStatus() {
|
||||
// In SVN, the commit range is always "uncommitted changes", so these
|
||||
// statuses are equivalent.
|
||||
return $this->getUncommittedStatus();
|
||||
// In SVN, there are never any previous commits in the range -- it is all in
|
||||
// the uncommitted status.
|
||||
return array();
|
||||
}
|
||||
|
||||
protected function buildUncommittedStatus() {
|
||||
|
|
|
@ -56,6 +56,16 @@ final class ArcanistRepositoryAPIStateTestCase extends PhutilTestCase {
|
|||
}
|
||||
|
||||
private function assertCorrectState($test, ArcanistRepositoryAPI $api) {
|
||||
if ($api instanceof ArcanistGitAPI) {
|
||||
$version = $api->getGitVersion();
|
||||
if (version_compare($version, '2.11.0', '<')) {
|
||||
// Behavior differs slightly on older versions of git; rather than code
|
||||
// both variants, skip the tests in the presence of such a git.
|
||||
$this->assertSkipped(pht('Behavior differs slightly on git < 2.11.0'));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$f_mod = ArcanistRepositoryAPI::FLAG_MODIFIED;
|
||||
$f_add = ArcanistRepositoryAPI::FLAG_ADDED;
|
||||
$f_del = ArcanistRepositoryAPI::FLAG_DELETED;
|
||||
|
@ -70,7 +80,7 @@ final class ArcanistRepositoryAPIStateTestCase extends PhutilTestCase {
|
|||
|
||||
switch ($test) {
|
||||
case 'svn_basic.svn.tgz':
|
||||
$expect = array(
|
||||
$expect_uncommitted = array(
|
||||
'ADDED' => $f_add,
|
||||
'COPIED_TO' => $f_add,
|
||||
'DELETED' => $f_del,
|
||||
|
@ -80,8 +90,22 @@ final class ArcanistRepositoryAPIStateTestCase extends PhutilTestCase {
|
|||
'PROPCHANGE' => $f_mod,
|
||||
'UNTRACKED' => $f_unt,
|
||||
);
|
||||
$this->assertEqual($expect, $api->getUncommittedStatus());
|
||||
$this->assertEqual($expect, $api->getCommitRangeStatus());
|
||||
$this->assertEqual($expect_uncommitted, $api->getUncommittedStatus());
|
||||
|
||||
$expect_range = array();
|
||||
$this->assertEqual($expect_range, $api->getCommitRangeStatus());
|
||||
|
||||
$expect_working = array(
|
||||
'ADDED' => $f_add,
|
||||
'COPIED_TO' => $f_add,
|
||||
'DELETED' => $f_del,
|
||||
'MODIFIED' => $f_mod,
|
||||
'MOVED' => $f_del,
|
||||
'MOVED_TO' => $f_add,
|
||||
'PROPCHANGE' => $f_mod,
|
||||
'UNTRACKED' => $f_unt,
|
||||
);
|
||||
$this->assertEqual($expect_working, $api->getWorkingCopyStatus());
|
||||
break;
|
||||
case 'git_basic.git.tgz':
|
||||
$expect_uncommitted = array(
|
||||
|
@ -96,10 +120,19 @@ final class ArcanistRepositoryAPIStateTestCase extends PhutilTestCase {
|
|||
'ADDED' => $f_add,
|
||||
'DELETED' => $f_del,
|
||||
'MODIFIED' => $f_mod,
|
||||
'UNCOMMITTED' => $f_add,
|
||||
'UNSTAGED' => $f_add,
|
||||
);
|
||||
$this->assertEqual($expect_range, $api->getCommitRangeStatus());
|
||||
|
||||
$expect_working = array(
|
||||
'ADDED' => $f_add,
|
||||
'DELETED' => $f_del,
|
||||
'MODIFIED' => $f_mod,
|
||||
'UNCOMMITTED' => $f_add | $f_unc,
|
||||
'UNSTAGED' => $f_add | $f_mod | $f_uns | $f_unc,
|
||||
'UNTRACKED' => $f_unt,
|
||||
);
|
||||
$this->assertEqual($expect_working, $api->getWorkingCopyStatus());
|
||||
break;
|
||||
case 'git_submodules_dirty.git.tgz':
|
||||
$expect_uncommitted = array(
|
||||
|
@ -107,19 +140,19 @@ final class ArcanistRepositoryAPIStateTestCase extends PhutilTestCase {
|
|||
'added/' => $f_unt,
|
||||
'deleted' => $f_del | $f_uns | $f_unc,
|
||||
'modified-commit' => $f_mod | $f_uns | $f_unc,
|
||||
'modified-commit-dirty' => $f_mod | $f_uns | $f_unc,
|
||||
'modified-dirty' => $f_ext | $f_uns | $f_unc,
|
||||
'modified-commit-dirty' => $f_ext | $f_mod | $f_uns | $f_unc,
|
||||
'modified-dirty' => $f_ext | $f_mod | $f_uns | $f_unc,
|
||||
);
|
||||
$this->assertEqual($expect_uncommitted, $api->getUncommittedStatus());
|
||||
break;
|
||||
case 'git_submodules_staged.git.tgz':
|
||||
$expect_uncommitted = array(
|
||||
'.gitmodules' => $f_mod | $f_uns | $f_unc,
|
||||
'.gitmodules' => $f_mod | $f_unc,
|
||||
'added' => $f_add | $f_unc,
|
||||
'deleted' => $f_del | $f_unc,
|
||||
'modified-commit' => $f_mod | $f_unc,
|
||||
'modified-commit-dirty' => $f_mod | $f_uns | $f_unc,
|
||||
'modified-dirty' => $f_ext | $f_uns | $f_unc,
|
||||
'modified-commit-dirty' => $f_ext | $f_mod | $f_uns | $f_unc,
|
||||
'modified-dirty' => $f_ext | $f_mod | $f_uns | $f_unc,
|
||||
);
|
||||
$this->assertEqual($expect_uncommitted, $api->getUncommittedStatus());
|
||||
break;
|
||||
|
@ -137,6 +170,15 @@ final class ArcanistRepositoryAPIStateTestCase extends PhutilTestCase {
|
|||
'UNCOMMITTED' => $f_add,
|
||||
);
|
||||
$this->assertEqual($expect_range, $api->getCommitRangeStatus());
|
||||
|
||||
$expect_working = array(
|
||||
'ADDED' => $f_add,
|
||||
'DELETED' => $f_del,
|
||||
'MODIFIED' => $f_mod,
|
||||
'UNCOMMITTED' => $f_add | $f_mod | $f_unc,
|
||||
'UNTRACKED' => $f_unt,
|
||||
);
|
||||
$this->assertEqual($expect_working, $api->getWorkingCopyStatus());
|
||||
break;
|
||||
default:
|
||||
throw new Exception(
|
||||
|
|
Loading…
Reference in a new issue