From 8cfd22c5fe88cf81ce5b2c188b207538aab04f7a Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 18 Feb 2021 11:55:20 -0800 Subject: [PATCH] Add a negative lookbehind to the Remarkup "bare URI" regular expression pattern Summary: Ref T13608. Building on D21562, further anchor this pattern by adding a negative lookbehind. Test Plan: Ran unit tests. Maniphest Tasks: T13608 Differential Revision: https://secure.phabricator.com/D21568 --- .../markuprule/PhutilRemarkupHyperlinkRule.php | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/infrastructure/markup/markuprule/PhutilRemarkupHyperlinkRule.php b/src/infrastructure/markup/markuprule/PhutilRemarkupHyperlinkRule.php index 77168c97e3..560aa180c3 100644 --- a/src/infrastructure/markup/markuprule/PhutilRemarkupHyperlinkRule.php +++ b/src/infrastructure/markup/markuprule/PhutilRemarkupHyperlinkRule.php @@ -14,10 +14,13 @@ final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule { static $bare_pattern; if ($angle_pattern === null) { - // See T13608. Limit protocol matches to 32 characters to improve the - // performance of the "://" pattern, which can take a very long - // time to match against long inputs if the maximum length of a protocol - // sequence is unrestricted. + // See T13608. A previous version of this code matched bare URIs + // starting with "\w{3,}", which can take a very long time to match + // against long inputs. + // + // Use a protocol length limit in all patterns for general sanity, + // and a negative lookbehind in the bare pattern to avoid explosive + // complexity during expression evaluation. $protocol_fragment = '\w{3,32}'; $uri_fragment = '[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+'; @@ -33,7 +36,7 @@ final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule { $uri_fragment); $bare_pattern = sprintf( - '(%s://%s)', + '((?