From fda0b086b565aab6f8486ac5a57585320d63f16d Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 1 Oct 2014 12:45:31 -0700 Subject: [PATCH] =?UTF-8?q?Make=20`#=F0=9F=90=B3`=20work=20properly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Ref T6223. Two issues: - We don't use `/u` mode on these regexps. Without `/u`, the `\w`/`\W`/`\s`/`\S` modifiers have bad behavior on non-ASCII bytes. Add the flag to use unicode mode, making `\w` and `\s` behave like we expect. - We might possibly want to do something different here eventually (for example, if the `/u` flag has some huge performance penalty) but this seems OK for now. - We use `\b` (word boundary) to terminate the match, but `🐳` is not a word character. Use `(?!\w)` instead ("don't match before a word character") which is what we mean. Test Plan: {F211498} Reviewers: btrahan, chad Reviewed By: chad Subscribers: epriestley Maniphest Tasks: T6223 Differential Revision: https://secure.phabricator.com/D10618 --- .../markup/rule/PhabricatorObjectRemarkupRule.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php b/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php index 5c81b58a4f..fb5571dcbe 100644 --- a/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php +++ b/src/infrastructure/markup/rule/PhabricatorObjectRemarkupRule.php @@ -100,7 +100,7 @@ abstract class PhabricatorObjectRemarkupRule extends PhutilRemarkupRule { $id = $this->getObjectIDPattern(); $text = preg_replace_callback( - '@\B{'.$prefix.'('.$id.')((?:[^}\\\\]|\\\\.)*)}\B@', + '@\B{'.$prefix.'('.$id.')((?:[^}\\\\]|\\\\.)*)}\B@u', array($this, 'markupObjectEmbed'), $text); @@ -122,7 +122,7 @@ abstract class PhabricatorObjectRemarkupRule extends PhutilRemarkupRule { // in the middle of words. $text = preg_replace_callback( - '((?