mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-11-10 00:42:40 +01:00
(stable) Promote 2016 Week 41
This commit is contained in:
commit
71132dbee0
1 changed files with 26 additions and 3 deletions
|
@ -56,7 +56,30 @@ final class ArcanistDiffUtils extends Phobject {
|
|||
);
|
||||
}
|
||||
|
||||
return self::computeIntralineEdits($o, $n);
|
||||
// Do a fast check for certainly-too-long inputs before splitting the
|
||||
// lines. Inputs take ~200x more memory to represent as lists than as
|
||||
// strings, so we can run out of memory quickly if we try to split huge
|
||||
// inputs. See T11744.
|
||||
$ol = strlen($o);
|
||||
$nl = strlen($n);
|
||||
|
||||
$max_glyphs = 80;
|
||||
|
||||
// This has some wiggle room for multi-byte UTF8 characters, and the
|
||||
// fact that we're testing the sum of the lengths of both strings. It can
|
||||
// still generate false positives for, say, Chinese text liberally
|
||||
// slathered with combining characters, but this kind of text should be
|
||||
// vitually nonexistent in real data.
|
||||
$too_many_bytes = (16 * $max_glyphs);
|
||||
|
||||
if ($ol + $nl > $too_many_bytes) {
|
||||
return array(
|
||||
array(array(1, $ol)),
|
||||
array(array(1, $nl)),
|
||||
);
|
||||
}
|
||||
|
||||
return self::computeIntralineEdits($o, $n, $max_glyphs);
|
||||
}
|
||||
|
||||
public static function applyIntralineDiff($str, $intra_stack) {
|
||||
|
@ -155,7 +178,7 @@ final class ArcanistDiffUtils extends Phobject {
|
|||
->getEditString();
|
||||
}
|
||||
|
||||
public static function computeIntralineEdits($o, $n) {
|
||||
private static function computeIntralineEdits($o, $n, $max_glyphs) {
|
||||
if (preg_match('/[\x80-\xFF]/', $o.$n)) {
|
||||
$ov = phutil_utf8v_combined($o);
|
||||
$nv = phutil_utf8v_combined($n);
|
||||
|
@ -166,7 +189,7 @@ final class ArcanistDiffUtils extends Phobject {
|
|||
$multibyte = false;
|
||||
}
|
||||
|
||||
$result = self::generateEditString($ov, $nv);
|
||||
$result = self::generateEditString($ov, $nv, $max_glyphs);
|
||||
|
||||
// Now we have a character-based description of the edit. We need to
|
||||
// convert into a byte-based description. Walk through the edit string and
|
||||
|
|
Loading…
Reference in a new issue