mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-10 14:51:06 +01:00
Limit remarkup URI protocol length to 32 characters to avoid expensive regex behavior
Summary: Ref T13608. When searching for bare URIs in remarkup text, don't look for URIs with a protocol string longer than 32 characters. This avoids a case where the regexp engine may be tricked into executing at `O(N^2)` or some similar complexity. Test Plan: - Applied remarkup to "AAAA..." (512KB). - Before: 64 seconds to process. - After: <10ms to process. - Ran unit tests. Maniphest Tasks: T13608 Differential Revision: https://secure.phabricator.com/D21562
This commit is contained in:
parent
6703fec3e2
commit
bd4d9d88f2
1 changed files with 35 additions and 5 deletions
|
@ -9,18 +9,47 @@ final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule {
|
||||||
}
|
}
|
||||||
|
|
||||||
public function apply($text) {
|
public function apply($text) {
|
||||||
|
static $angle_pattern;
|
||||||
|
static $curly_pattern;
|
||||||
|
static $bare_pattern;
|
||||||
|
|
||||||
|
if ($angle_pattern === null) {
|
||||||
|
// See T13608. Limit protocol matches to 32 characters to improve the
|
||||||
|
// performance of the "<protocol>://" pattern, which can take a very long
|
||||||
|
// time to match against long inputs if the maximum length of a protocol
|
||||||
|
// sequence is unrestricted.
|
||||||
|
|
||||||
|
$protocol_fragment = '\w{3,32}';
|
||||||
|
$uri_fragment = '[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+';
|
||||||
|
|
||||||
|
$angle_pattern = sprintf(
|
||||||
|
'(<(%s://%s?)>)',
|
||||||
|
$protocol_fragment,
|
||||||
|
$uri_fragment);
|
||||||
|
|
||||||
|
$curly_pattern = sprintf(
|
||||||
|
'({(%s://%s?)})',
|
||||||
|
$protocol_fragment,
|
||||||
|
$uri_fragment);
|
||||||
|
|
||||||
|
$bare_pattern = sprintf(
|
||||||
|
'(%s://%s)',
|
||||||
|
$protocol_fragment,
|
||||||
|
$uri_fragment);
|
||||||
|
}
|
||||||
|
|
||||||
// Hyperlinks with explicit "<>" around them get linked exactly, without
|
// Hyperlinks with explicit "<>" around them get linked exactly, without
|
||||||
// the "<>". Angle brackets are basically special and mean "this is a URL
|
// the "<>". Angle brackets are basically special and mean "this is a URL
|
||||||
// with weird characters". This is assumed to be reasonable because they
|
// with weird characters". This is assumed to be reasonable because they
|
||||||
// don't appear in normal text or normal URLs.
|
// don't appear in most normal text or most normal URLs.
|
||||||
$text = preg_replace_callback(
|
$text = preg_replace_callback(
|
||||||
'@<(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+?)>@',
|
$angle_pattern,
|
||||||
array($this, 'markupHyperlinkAngle'),
|
array($this, 'markupHyperlinkAngle'),
|
||||||
$text);
|
$text);
|
||||||
|
|
||||||
// We match "{uri}", but do not link it by default.
|
// We match "{uri}", but do not link it by default.
|
||||||
$text = preg_replace_callback(
|
$text = preg_replace_callback(
|
||||||
'@{(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+?)}@',
|
$curly_pattern,
|
||||||
array($this, 'markupHyperlinkCurly'),
|
array($this, 'markupHyperlinkCurly'),
|
||||||
$text);
|
$text);
|
||||||
|
|
||||||
|
@ -31,8 +60,9 @@ final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule {
|
||||||
|
|
||||||
// NOTE: We're explicitly avoiding capturing stored blocks, so text like
|
// NOTE: We're explicitly avoiding capturing stored blocks, so text like
|
||||||
// `http://www.example.com/[[x | y]]` doesn't get aggressively captured.
|
// `http://www.example.com/[[x | y]]` doesn't get aggressively captured.
|
||||||
|
|
||||||
$text = preg_replace_callback(
|
$text = preg_replace_callback(
|
||||||
'@(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+)@',
|
$bare_pattern,
|
||||||
array($this, 'markupHyperlinkUngreedy'),
|
array($this, 'markupHyperlinkUngreedy'),
|
||||||
$text);
|
$text);
|
||||||
|
|
||||||
|
@ -110,7 +140,7 @@ final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function markupHyperlinkUngreedy($matches) {
|
protected function markupHyperlinkUngreedy($matches) {
|
||||||
$match = $matches[1];
|
$match = $matches[0];
|
||||||
$tail = null;
|
$tail = null;
|
||||||
$trailing = null;
|
$trailing = null;
|
||||||
if (preg_match('/[;,.:!?]+$/', $match, $trailing)) {
|
if (preg_match('/[;,.:!?]+$/', $match, $trailing)) {
|
||||||
|
|
Loading…
Reference in a new issue