mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-11 07:11:04 +01:00
Fix an issue where prose diffing may fail after hitting the PCRE backtracking limit
Summary: Fixes T13554. For certain prose diff inputs and PCRE backtracking limits, this regular expression may back track too often and fail. A characteristic input is "x x x x ...", i.e. many sequences where `(.*?)\s*\z` looks like it may be able to match but actually can not. I think writing an expression which has all the behavior we'd like without this backtracking issue isn't trivial (at least, I don't think I know how to do it offhand); just use a strategy based on "trim()" insetad, which avoids any PCRE complexities here. Test Plan: Locally, this passes the "x x x ..." test which the previous code failed. I'm not including that test because it won't reproduce across values of "pcre.backtrac_limit", PCRE versions, etc. Maniphest Tasks: T13554 Differential Revision: https://secure.phabricator.com/D21422
This commit is contained in:
parent
8f9ba48528
commit
fcb75d0503
2 changed files with 68 additions and 16 deletions
|
@ -142,22 +142,9 @@ final class PhutilProseDifferenceEngine extends Phobject {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($level < 2) {
|
if ($level < 2) {
|
||||||
// Split pieces into separate text and whitespace sections: make one
|
$trimmed_pieces = $this->trimApart($result);
|
||||||
// piece out of all the whitespace at the beginning, one piece out of
|
foreach ($trimmed_pieces as $trimmed_piece) {
|
||||||
// all the actual text in the middle, and one piece out of all the
|
$results[] = $trimmed_piece;
|
||||||
// whitespace at the end.
|
|
||||||
|
|
||||||
$matches = null;
|
|
||||||
preg_match('/^(\s*)(.*?)(\s*)\z/s', $result, $matches);
|
|
||||||
|
|
||||||
if (strlen($matches[1])) {
|
|
||||||
$results[] = $matches[1];
|
|
||||||
}
|
|
||||||
if (strlen($matches[2])) {
|
|
||||||
$results[] = $matches[2];
|
|
||||||
}
|
|
||||||
if (strlen($matches[3])) {
|
|
||||||
$results[] = $matches[3];
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$results[] = $result;
|
$results[] = $result;
|
||||||
|
@ -272,4 +259,36 @@ final class PhutilProseDifferenceEngine extends Phobject {
|
||||||
return $blocks;
|
return $blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function trimApart($input) {
|
||||||
|
// Split pieces into separate text and whitespace sections: make one
|
||||||
|
// piece out of all the whitespace at the beginning, one piece out of
|
||||||
|
// all the actual text in the middle, and one piece out of all the
|
||||||
|
// whitespace at the end.
|
||||||
|
|
||||||
|
$parts = array();
|
||||||
|
|
||||||
|
$length = strlen($input);
|
||||||
|
|
||||||
|
$corpus = ltrim($input);
|
||||||
|
$l_length = strlen($corpus);
|
||||||
|
if ($l_length !== $length) {
|
||||||
|
$parts[] = substr($input, 0, $length - $l_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
$corpus = rtrim($corpus);
|
||||||
|
$lr_length = strlen($corpus);
|
||||||
|
|
||||||
|
if ($lr_length) {
|
||||||
|
$parts[] = $corpus;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($lr_length !== $l_length) {
|
||||||
|
// NOTE: This will be a negative value; we're slicing from the end of
|
||||||
|
// the input string.
|
||||||
|
$parts[] = substr($input, $lr_length - $l_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $parts;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,39 @@
|
||||||
final class PhutilProseDiffTestCase
|
final class PhutilProseDiffTestCase
|
||||||
extends PhabricatorTestCase {
|
extends PhabricatorTestCase {
|
||||||
|
|
||||||
|
public function testTrimApart() {
|
||||||
|
$map = array(
|
||||||
|
'' => array(),
|
||||||
|
'a' => array('a'),
|
||||||
|
' a ' => array(
|
||||||
|
' ',
|
||||||
|
'a',
|
||||||
|
' ',
|
||||||
|
),
|
||||||
|
' a' => array(
|
||||||
|
' ',
|
||||||
|
'a',
|
||||||
|
),
|
||||||
|
'a ' => array(
|
||||||
|
'a',
|
||||||
|
' ',
|
||||||
|
),
|
||||||
|
' a b ' => array(
|
||||||
|
' ',
|
||||||
|
'a b',
|
||||||
|
' ',
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach ($map as $input => $expect) {
|
||||||
|
$actual = PhutilProseDifferenceEngine::trimApart($input);
|
||||||
|
$this->assertEqual(
|
||||||
|
$expect,
|
||||||
|
$actual,
|
||||||
|
pht('Trim Apart: %s', $input));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public function testProseDiffsDistance() {
|
public function testProseDiffsDistance() {
|
||||||
$this->assertProseParts(
|
$this->assertProseParts(
|
||||||
'',
|
'',
|
||||||
|
|
Loading…
Reference in a new issue