mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-22 20:51:10 +01:00
Pull Git filesize logic into a separate LowLevel query and use more Iterators
Summary: Depends on D19829. Ref T13216. See PHI908. The current implementation is kind of a lot to live in `CommitHookEngine` and will likely fail if `git diff-tree` produces more than 2GB of output. Pull it out and make it slightly more robust against enormous commits. It's probably limited by this, now: ``` implode("\n", $every_path) ``` We could replace that with some `PhutilReverseRopeSource` primitive or something but since we don't have one of those and it seems unlikely that we'll hit this case in practice, I left it here for now with just the easy stuff converted to be stream-oriented. Test Plan: Used this script to test the query against various commits, got good results: ``` <?php require_once 'scripts/init/init-script.php'; $viewer = PhabricatorUser::getOmnipotentUser(); $repository = id(new PhabricatorRepositoryQuery()) ->setViewer($viewer) ->withCallsigns(array('P')) ->executeOne(); var_dump( id(new DiffusionLowLevelFilesizeQuery()) ->setRepository($repository) ->withIdentifier($argv[1]) ->execute()); ``` Used this to find large commits in history and pull filesizes (worked great, although our largest commit only touches a couple thousand paths): ``` for hash in `git log --format=%H`; do echo -n $hash; echo -n ' '; git diff-tree -r --no-commit-id $hash | wc -l | awk '{print $1}'; done | awk '{print $2 " " $1}' | sort -n ``` Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13216 Differential Revision: https://secure.phabricator.com/D19830
This commit is contained in:
parent
fd12b37d16
commit
c6fc05ee09
3 changed files with 131 additions and 90 deletions
|
@ -839,6 +839,7 @@ phutil_register_library_map(array(
|
|||
'DiffusionLookSoonConduitAPIMethod' => 'applications/diffusion/conduit/DiffusionLookSoonConduitAPIMethod.php',
|
||||
'DiffusionLowLevelCommitFieldsQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelCommitFieldsQuery.php',
|
||||
'DiffusionLowLevelCommitQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelCommitQuery.php',
|
||||
'DiffusionLowLevelFilesizeQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelFilesizeQuery.php',
|
||||
'DiffusionLowLevelGitRefQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelGitRefQuery.php',
|
||||
'DiffusionLowLevelMercurialBranchesQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelMercurialBranchesQuery.php',
|
||||
'DiffusionLowLevelMercurialPathsQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelMercurialPathsQuery.php',
|
||||
|
@ -6250,6 +6251,7 @@ phutil_register_library_map(array(
|
|||
'DiffusionLookSoonConduitAPIMethod' => 'DiffusionConduitAPIMethod',
|
||||
'DiffusionLowLevelCommitFieldsQuery' => 'DiffusionLowLevelQuery',
|
||||
'DiffusionLowLevelCommitQuery' => 'DiffusionLowLevelQuery',
|
||||
'DiffusionLowLevelFilesizeQuery' => 'DiffusionLowLevelQuery',
|
||||
'DiffusionLowLevelGitRefQuery' => 'DiffusionLowLevelQuery',
|
||||
'DiffusionLowLevelMercurialBranchesQuery' => 'DiffusionLowLevelQuery',
|
||||
'DiffusionLowLevelMercurialPathsQuery' => 'DiffusionLowLevelQuery',
|
||||
|
|
|
@ -1305,97 +1305,11 @@ final class DiffusionCommitHookEngine extends Phobject {
|
|||
|
||||
public function loadFileSizesForCommit($identifier) {
|
||||
$repository = $this->getRepository();
|
||||
$vcs = $repository->getVersionControlSystem();
|
||||
|
||||
$path_sizes = array();
|
||||
|
||||
switch ($vcs) {
|
||||
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
|
||||
list($paths_raw) = $repository->execxLocalCommand(
|
||||
'diff-tree -z -r --no-commit-id %s --',
|
||||
$identifier);
|
||||
|
||||
// With "-z" we get "<fields>\0<filename>\0" for each line. Group the
|
||||
// delimited text into "<fields>, <filename>" pairs.
|
||||
$paths_raw = trim($paths_raw, "\0");
|
||||
$paths_raw = explode("\0", $paths_raw);
|
||||
if (count($paths_raw) % 2) {
|
||||
throw new Exception(
|
||||
pht(
|
||||
'Unexpected number of output lines from "git diff-tree" when '.
|
||||
'processing commit ("%s"): got %s lines, expected an even '.
|
||||
'number.',
|
||||
$identifier,
|
||||
phutil_count($paths_raw)));
|
||||
}
|
||||
$paths_raw = array_chunk($paths_raw, 2);
|
||||
|
||||
$paths = array();
|
||||
foreach ($paths_raw as $path_raw) {
|
||||
list($fields, $pathname) = $path_raw;
|
||||
$fields = explode(' ', $fields);
|
||||
|
||||
// Fields are:
|
||||
//
|
||||
// :100644 100644 aaaa bbbb M
|
||||
//
|
||||
// [0] Old file mode.
|
||||
// [1] New file mode.
|
||||
// [2] Old object hash.
|
||||
// [3] New object hash.
|
||||
// [4] Change mode.
|
||||
|
||||
$paths[] = array(
|
||||
'path' => $pathname,
|
||||
'newHash' => $fields[3],
|
||||
);
|
||||
}
|
||||
|
||||
if ($paths) {
|
||||
$check_paths = array();
|
||||
foreach ($paths as $path) {
|
||||
if ($path['newHash'] === self::EMPTY_HASH) {
|
||||
$path_sizes[$path['path']] = 0;
|
||||
continue;
|
||||
}
|
||||
$check_paths[$path['newHash']][] = $path['path'];
|
||||
}
|
||||
|
||||
if ($check_paths) {
|
||||
$future = $repository->getLocalCommandFuture(
|
||||
'cat-file --batch-check=%s',
|
||||
'%(objectsize)')
|
||||
->write(implode("\n", array_keys($check_paths)));
|
||||
|
||||
list($sizes) = $future->resolvex();
|
||||
$sizes = trim($sizes);
|
||||
$sizes = phutil_split_lines($sizes, false);
|
||||
if (count($sizes) !== count($check_paths)) {
|
||||
throw new Exception(
|
||||
pht(
|
||||
'Unexpected number of output lines from "git cat-file" when '.
|
||||
'processing commit ("%s"): got %s lines, expected %s.',
|
||||
$identifier,
|
||||
phutil_count($sizes),
|
||||
phutil_count($check_paths)));
|
||||
}
|
||||
|
||||
foreach ($check_paths as $object_hash => $path_names) {
|
||||
$object_size = (int)array_shift($sizes);
|
||||
foreach ($path_names as $path_name) {
|
||||
$path_sizes[$path_name] = $object_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new Exception(
|
||||
pht(
|
||||
'File size limits are not supported for this VCS.'));
|
||||
}
|
||||
|
||||
return $path_sizes;
|
||||
return id(new DiffusionLowLevelFilesizeQuery())
|
||||
->setRepository($repository)
|
||||
->withIdentifier($identifier)
|
||||
->execute();
|
||||
}
|
||||
|
||||
public function loadCommitRefForCommit($identifier) {
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
<?php
|
||||
|
||||
final class DiffusionLowLevelFilesizeQuery
|
||||
extends DiffusionLowLevelQuery {
|
||||
|
||||
private $identifier;
|
||||
|
||||
public function withIdentifier($identifier) {
|
||||
$this->identifier = $identifier;
|
||||
return $this;
|
||||
}
|
||||
|
||||
protected function executeQuery() {
|
||||
if (!strlen($this->identifier)) {
|
||||
throw new PhutilInvalidStateException('withIdentifier');
|
||||
}
|
||||
|
||||
$type = $this->getRepository()->getVersionControlSystem();
|
||||
switch ($type) {
|
||||
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
|
||||
$result = $this->loadGitFilesizes();
|
||||
break;
|
||||
default:
|
||||
throw new Exception(pht('Unsupported repository type "%s"!', $type));
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
private function loadGitFilesizes() {
|
||||
$repository = $this->getRepository();
|
||||
$identifier = $this->identifier;
|
||||
|
||||
$paths_future = $repository->getLocalCommandFuture(
|
||||
'diff-tree -z -r --no-commit-id %s --',
|
||||
$identifier);
|
||||
|
||||
// With "-z" we get "<fields>\0<filename>\0" for each line. Process the
|
||||
// delimited text as "<fields>, <filename>" pairs.
|
||||
|
||||
$path_lines = id(new LinesOfALargeExecFuture($paths_future))
|
||||
->setDelimiter("\0");
|
||||
|
||||
$paths = array();
|
||||
|
||||
$path_pairs = new PhutilChunkedIterator($path_lines, 2);
|
||||
foreach ($path_pairs as $path_pair) {
|
||||
if (count($path_pair) != 2) {
|
||||
throw new Exception(
|
||||
pht(
|
||||
'Unexpected number of output lines from "git diff-tree" when '.
|
||||
'processing commit ("%s"): expected an even number of lines.',
|
||||
$identifier));
|
||||
}
|
||||
|
||||
list($fields, $pathname) = array_values($path_pair);
|
||||
$fields = explode(' ', $fields);
|
||||
|
||||
// Fields are:
|
||||
//
|
||||
// :100644 100644 aaaa bbbb M
|
||||
//
|
||||
// [0] Old file mode.
|
||||
// [1] New file mode.
|
||||
// [2] Old object hash.
|
||||
// [3] New object hash.
|
||||
// [4] Change mode.
|
||||
|
||||
$paths[] = array(
|
||||
'path' => $pathname,
|
||||
'newHash' => $fields[3],
|
||||
);
|
||||
}
|
||||
|
||||
$path_sizes = array();
|
||||
|
||||
if (!$paths) {
|
||||
return $path_sizes;
|
||||
}
|
||||
|
||||
$check_paths = array();
|
||||
foreach ($paths as $path) {
|
||||
if ($path['newHash'] === DiffusionCommitHookEngine::EMPTY_HASH) {
|
||||
$path_sizes[$path['path']] = 0;
|
||||
continue;
|
||||
}
|
||||
$check_paths[$path['newHash']][] = $path['path'];
|
||||
}
|
||||
|
||||
if (!$check_paths) {
|
||||
return $path_sizes;
|
||||
}
|
||||
|
||||
$future = $repository->getLocalCommandFuture(
|
||||
'cat-file --batch-check=%s',
|
||||
'%(objectsize)');
|
||||
|
||||
$future->write(implode("\n", array_keys($check_paths)));
|
||||
|
||||
$size_lines = id(new LinesOfALargeExecFuture($future))
|
||||
->setDelimiter("\n");
|
||||
foreach ($size_lines as $line) {
|
||||
$object_size = (int)$line;
|
||||
|
||||
$object_hash = head_key($check_paths);
|
||||
$path_names = $check_paths[$object_hash];
|
||||
unset($check_paths[$object_hash]);
|
||||
|
||||
foreach ($path_names as $path_name) {
|
||||
$path_sizes[$path_name] = $object_size;
|
||||
}
|
||||
}
|
||||
|
||||
if ($check_paths) {
|
||||
throw new Exception(
|
||||
pht(
|
||||
'Unexpected number of output lines from "git cat-file" when '.
|
||||
'processing commit ("%s").',
|
||||
$identifier));
|
||||
}
|
||||
|
||||
return $path_sizes;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in a new issue