From a1aec701e3ea246b9f64f2c5d474a24ba768fef1 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 9 May 2018 13:29:03 -0700 Subject: [PATCH 1/2] Raise the intraline diff hard limit from 80 to 100 characters Summary: Fixes T1246. See PHI637. See T13137. Computers have gotten a bit faster so we can probably bump this up a little and see if it causes problems. This is `O(N^2)` so the this should be less than twice as expensive in the worst case. Test Plan: Created a diff affecting characters on a very long line separated by more than 80 but fewer than 100 characters, got a good intraline diff out of it: {F5605162} Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T1246 Differential Revision: https://secure.phabricator.com/D19442 --- src/difference/ArcanistDiffUtils.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php index 8282248c..a68a0532 100644 --- a/src/difference/ArcanistDiffUtils.php +++ b/src/difference/ArcanistDiffUtils.php @@ -63,7 +63,7 @@ final class ArcanistDiffUtils extends Phobject { $ol = strlen($o); $nl = strlen($n); - $max_glyphs = 80; + $max_glyphs = 100; // This has some wiggle room for multi-byte UTF8 characters, and the // fact that we're testing the sum of the lengths of both strings. It can From d581c453b83c515f3acac963bbc117e8dd0d1ef4 Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 10 May 2018 09:43:16 -0700 Subject: [PATCH 2/2] Allow diff generation via ArcanistBundle to be limited to an approximate maximum byte size Summary: Ref T13137. See PHI592. When you have a diff with 600MB of videos, we want to bail out of diff generation early (as soon as we realize what we're dealing with), not build an 800MB text diff in memory and then throw it away. Support bailout //during// diff generation once we realize we're in over our heads. This is approximate, but since the limit is fairly large (512KB by default) it isn't too important to be precise. Test Plan: Rigged some callers to set various byte limits, generated diffs including diffs with large binaries. Got an appropriate diff or exeception depending on how low the limit was. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13137 Differential Revision: https://secure.phabricator.com/D19444 --- src/__phutil_library_map__.php | 2 + .../ArcanistDiffByteSizeException.php | 3 ++ src/parser/ArcanistBundle.php | 50 ++++++++++++++++++- 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 src/exception/ArcanistDiffByteSizeException.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index d8afe21c..e88a8621 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -106,6 +106,7 @@ phutil_register_library_map(array( 'ArcanistDefaultParametersXHPASTLinterRuleTestCase' => 'lint/linter/xhpast/rules/__tests__/ArcanistDefaultParametersXHPASTLinterRuleTestCase.php', 'ArcanistDeprecationXHPASTLinterRule' => 'lint/linter/xhpast/rules/ArcanistDeprecationXHPASTLinterRule.php', 'ArcanistDeprecationXHPASTLinterRuleTestCase' => 'lint/linter/xhpast/rules/__tests__/ArcanistDeprecationXHPASTLinterRuleTestCase.php', + 'ArcanistDiffByteSizeException' => 'exception/ArcanistDiffByteSizeException.php', 'ArcanistDiffChange' => 'parser/diff/ArcanistDiffChange.php', 'ArcanistDiffChangeType' => 'parser/diff/ArcanistDiffChangeType.php', 'ArcanistDiffHunk' => 'parser/diff/ArcanistDiffHunk.php', @@ -525,6 +526,7 @@ phutil_register_library_map(array( 'ArcanistDefaultParametersXHPASTLinterRuleTestCase' => 'ArcanistXHPASTLinterRuleTestCase', 'ArcanistDeprecationXHPASTLinterRule' => 'ArcanistXHPASTLinterRule', 'ArcanistDeprecationXHPASTLinterRuleTestCase' => 'ArcanistXHPASTLinterRuleTestCase', + 'ArcanistDiffByteSizeException' => 'Exception', 'ArcanistDiffChange' => 'Phobject', 'ArcanistDiffChangeType' => 'Phobject', 'ArcanistDiffHunk' => 'Phobject', diff --git a/src/exception/ArcanistDiffByteSizeException.php b/src/exception/ArcanistDiffByteSizeException.php new file mode 100644 index 00000000..e41ed53d --- /dev/null +++ b/src/exception/ArcanistDiffByteSizeException.php @@ -0,0 +1,3 @@ +authorEmail = $author_email; @@ -76,6 +78,15 @@ final class ArcanistBundle extends Phobject { return $this->encoding; } + public function setByteLimit($byte_limit) { + $this->byteLimit = $byte_limit; + return $this; + } + + public function getByteLimit() { + return $this->byteLimit; + } + public function getBaseRevision() { return $this->baseRevision; } @@ -241,12 +252,13 @@ final class ArcanistBundle extends Phobject { } public function toUnifiedDiff() { + $this->reservedBytes = 0; + $eol = $this->getEOL('unified'); $result = array(); $changes = $this->getChanges(); foreach ($changes as $change) { - $hunk_changes = $this->buildHunkChanges($change->getHunks(), $eol); if (!$hunk_changes) { continue; @@ -299,6 +311,8 @@ final class ArcanistBundle extends Phobject { } public function toGitPatch() { + $this->reservedBytes = 0; + $eol = $this->getEOL('git'); $result = array(); @@ -649,6 +663,8 @@ final class ArcanistBundle extends Phobject { $n_len = $small_hunk->getNewLength(); $corpus = $small_hunk->getCorpus(); + $this->reserveBytes(strlen($corpus)); + // NOTE: If the length is 1 it can be omitted. Since git does this, // we also do it so that "arc export --git" diffs are as similar to // real git diffs as possible, which helps debug issues. @@ -740,6 +756,20 @@ final class ArcanistBundle extends Phobject { $old_length = strlen($old_data); + // Here, and below, the binary will be emitted with base85 encoding. This + // encoding encodes each 4 bytes of input in 5 bytes of output, so we may + // need up to 5/4ths as many bytes to represent it. + + // We reserve space up front because base85 encoding isn't super cheap. If + // the blob is enormous, we'd rather just bail out now before doing a ton + // of work and then throwing it away anyway. + + // However, the data is compressed before it is emitted so we may actually + // end up using fewer bytes. For now, the allocator just assumes the worst + // case since it isn't important to be precise, but we could do a more + // exact job of this. + $this->reserveBytes($old_length * 5 / 4); + if ($old_data === null) { $old_data = ''; $old_sha1 = str_repeat('0', 40); @@ -758,6 +788,7 @@ final class ArcanistBundle extends Phobject { } $new_length = strlen($new_data); + $this->reserveBytes($new_length * 5 / 4); if ($new_data === null) { $new_data = ''; @@ -1003,4 +1034,21 @@ final class ArcanistBundle extends Phobject { return $buf; } + private function reserveBytes($bytes) { + $this->reservedBytes += $bytes; + + if ($this->byteLimit) { + if ($this->reservedBytes > $this->byteLimit) { + throw new ArcanistDiffByteSizeException( + pht( + 'This large diff requires more space than it is allowed to '. + 'use (limited to %s bytes; needs more than %s bytes).', + new PhutilNumber($this->byteLimit), + new PhutilNumber($this->reservedBytes))); + } + } + + return $this; + } + }