1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2024-11-25 16:22:42 +01:00

Allow diff generation via ArcanistBundle to be limited to an approximate maximum byte size

Summary:
Ref T13137. See PHI592. When you have a diff with 600MB of videos, we want to bail out of diff generation early (as soon as we realize what we're dealing with), not build an 800MB text diff in memory and then throw it away.

Support bailout //during// diff generation once we realize we're in over our heads.

This is approximate, but since the limit is fairly large (512KB by default) it isn't too important to be precise.

Test Plan: Rigged some callers to set various byte limits, generated diffs including diffs with large binaries. Got an appropriate diff or exeception depending on how low the limit was.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13137

Differential Revision: https://secure.phabricator.com/D19444
This commit is contained in:
epriestley 2018-05-10 09:43:16 -07:00
parent a1aec701e3
commit d581c453b8
3 changed files with 54 additions and 1 deletions

View file

@ -106,6 +106,7 @@ phutil_register_library_map(array(
'ArcanistDefaultParametersXHPASTLinterRuleTestCase' => 'lint/linter/xhpast/rules/__tests__/ArcanistDefaultParametersXHPASTLinterRuleTestCase.php',
'ArcanistDeprecationXHPASTLinterRule' => 'lint/linter/xhpast/rules/ArcanistDeprecationXHPASTLinterRule.php',
'ArcanistDeprecationXHPASTLinterRuleTestCase' => 'lint/linter/xhpast/rules/__tests__/ArcanistDeprecationXHPASTLinterRuleTestCase.php',
'ArcanistDiffByteSizeException' => 'exception/ArcanistDiffByteSizeException.php',
'ArcanistDiffChange' => 'parser/diff/ArcanistDiffChange.php',
'ArcanistDiffChangeType' => 'parser/diff/ArcanistDiffChangeType.php',
'ArcanistDiffHunk' => 'parser/diff/ArcanistDiffHunk.php',
@ -525,6 +526,7 @@ phutil_register_library_map(array(
'ArcanistDefaultParametersXHPASTLinterRuleTestCase' => 'ArcanistXHPASTLinterRuleTestCase',
'ArcanistDeprecationXHPASTLinterRule' => 'ArcanistXHPASTLinterRule',
'ArcanistDeprecationXHPASTLinterRuleTestCase' => 'ArcanistXHPASTLinterRuleTestCase',
'ArcanistDiffByteSizeException' => 'Exception',
'ArcanistDiffChange' => 'Phobject',
'ArcanistDiffChangeType' => 'Phobject',
'ArcanistDiffHunk' => 'Phobject',

View file

@ -0,0 +1,3 @@
<?php
final class ArcanistDiffByteSizeException extends Exception {}

View file

@ -15,6 +15,8 @@ final class ArcanistBundle extends Phobject {
private $loadFileDataCallback;
private $authorName;
private $authorEmail;
private $byteLimit;
private $reservedBytes;
public function setAuthorEmail($author_email) {
$this->authorEmail = $author_email;
@ -76,6 +78,15 @@ final class ArcanistBundle extends Phobject {
return $this->encoding;
}
public function setByteLimit($byte_limit) {
$this->byteLimit = $byte_limit;
return $this;
}
public function getByteLimit() {
return $this->byteLimit;
}
public function getBaseRevision() {
return $this->baseRevision;
}
@ -241,12 +252,13 @@ final class ArcanistBundle extends Phobject {
}
public function toUnifiedDiff() {
$this->reservedBytes = 0;
$eol = $this->getEOL('unified');
$result = array();
$changes = $this->getChanges();
foreach ($changes as $change) {
$hunk_changes = $this->buildHunkChanges($change->getHunks(), $eol);
if (!$hunk_changes) {
continue;
@ -299,6 +311,8 @@ final class ArcanistBundle extends Phobject {
}
public function toGitPatch() {
$this->reservedBytes = 0;
$eol = $this->getEOL('git');
$result = array();
@ -649,6 +663,8 @@ final class ArcanistBundle extends Phobject {
$n_len = $small_hunk->getNewLength();
$corpus = $small_hunk->getCorpus();
$this->reserveBytes(strlen($corpus));
// NOTE: If the length is 1 it can be omitted. Since git does this,
// we also do it so that "arc export --git" diffs are as similar to
// real git diffs as possible, which helps debug issues.
@ -740,6 +756,20 @@ final class ArcanistBundle extends Phobject {
$old_length = strlen($old_data);
// Here, and below, the binary will be emitted with base85 encoding. This
// encoding encodes each 4 bytes of input in 5 bytes of output, so we may
// need up to 5/4ths as many bytes to represent it.
// We reserve space up front because base85 encoding isn't super cheap. If
// the blob is enormous, we'd rather just bail out now before doing a ton
// of work and then throwing it away anyway.
// However, the data is compressed before it is emitted so we may actually
// end up using fewer bytes. For now, the allocator just assumes the worst
// case since it isn't important to be precise, but we could do a more
// exact job of this.
$this->reserveBytes($old_length * 5 / 4);
if ($old_data === null) {
$old_data = '';
$old_sha1 = str_repeat('0', 40);
@ -758,6 +788,7 @@ final class ArcanistBundle extends Phobject {
}
$new_length = strlen($new_data);
$this->reserveBytes($new_length * 5 / 4);
if ($new_data === null) {
$new_data = '';
@ -1003,4 +1034,21 @@ final class ArcanistBundle extends Phobject {
return $buf;
}
private function reserveBytes($bytes) {
$this->reservedBytes += $bytes;
if ($this->byteLimit) {
if ($this->reservedBytes > $this->byteLimit) {
throw new ArcanistDiffByteSizeException(
pht(
'This large diff requires more space than it is allowed to '.
'use (limited to %s bytes; needs more than %s bytes).',
new PhutilNumber($this->byteLimit),
new PhutilNumber($this->reservedBytes)));
}
}
return $this;
}
}