From fcb75d0503c6d3e21461cdee91c01022bb6b0bab Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 23 Jul 2020 07:35:57 -0700 Subject: [PATCH] Fix an issue where prose diffing may fail after hitting the PCRE backtracking limit Summary: Fixes T13554. For certain prose diff inputs and PCRE backtracking limits, this regular expression may back track too often and fail. A characteristic input is "x x x x ...", i.e. many sequences where `(.*?)\s*\z` looks like it may be able to match but actually can not. I think writing an expression which has all the behavior we'd like without this backtracking issue isn't trivial (at least, I don't think I know how to do it offhand); just use a strategy based on "trim()" insetad, which avoids any PCRE complexities here. Test Plan: Locally, this passes the "x x x ..." test which the previous code failed. I'm not including that test because it won't reproduce across values of "pcre.backtrac_limit", PCRE versions, etc. Maniphest Tasks: T13554 Differential Revision: https://secure.phabricator.com/D21422 --- .../prose/PhutilProseDifferenceEngine.php | 51 +++++++++++++------ .../__tests__/PhutilProseDiffTestCase.php | 33 ++++++++++++ 2 files changed, 68 insertions(+), 16 deletions(-) diff --git a/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php b/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php index a5494aa05f..b8bee73961 100644 --- a/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php +++ b/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php @@ -142,22 +142,9 @@ final class PhutilProseDifferenceEngine extends Phobject { } if ($level < 2) { - // Split pieces into separate text and whitespace sections: make one - // piece out of all the whitespace at the beginning, one piece out of - // all the actual text in the middle, and one piece out of all the - // whitespace at the end. - - $matches = null; - preg_match('/^(\s*)(.*?)(\s*)\z/s', $result, $matches); - - if (strlen($matches[1])) { - $results[] = $matches[1]; - } - if (strlen($matches[2])) { - $results[] = $matches[2]; - } - if (strlen($matches[3])) { - $results[] = $matches[3]; + $trimmed_pieces = $this->trimApart($result); + foreach ($trimmed_pieces as $trimmed_piece) { + $results[] = $trimmed_piece; } } else { $results[] = $result; @@ -272,4 +259,36 @@ final class PhutilProseDifferenceEngine extends Phobject { return $blocks; } + public static function trimApart($input) { + // Split pieces into separate text and whitespace sections: make one + // piece out of all the whitespace at the beginning, one piece out of + // all the actual text in the middle, and one piece out of all the + // whitespace at the end. + + $parts = array(); + + $length = strlen($input); + + $corpus = ltrim($input); + $l_length = strlen($corpus); + if ($l_length !== $length) { + $parts[] = substr($input, 0, $length - $l_length); + } + + $corpus = rtrim($corpus); + $lr_length = strlen($corpus); + + if ($lr_length) { + $parts[] = $corpus; + } + + if ($lr_length !== $l_length) { + // NOTE: This will be a negative value; we're slicing from the end of + // the input string. + $parts[] = substr($input, $lr_length - $l_length); + } + + return $parts; + } + } diff --git a/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php b/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php index 823fef650a..d2e11cac5b 100644 --- a/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php +++ b/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php @@ -3,6 +3,39 @@ final class PhutilProseDiffTestCase extends PhabricatorTestCase { + public function testTrimApart() { + $map = array( + '' => array(), + 'a' => array('a'), + ' a ' => array( + ' ', + 'a', + ' ', + ), + ' a' => array( + ' ', + 'a', + ), + 'a ' => array( + 'a', + ' ', + ), + ' a b ' => array( + ' ', + 'a b', + ' ', + ), + ); + + foreach ($map as $input => $expect) { + $actual = PhutilProseDifferenceEngine::trimApart($input); + $this->assertEqual( + $expect, + $actual, + pht('Trim Apart: %s', $input)); + } + } + public function testProseDiffsDistance() { $this->assertProseParts( '',