From f69793184e0bb02fd44925113238937124f66a0e Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 6 Dec 2013 15:47:37 -0800 Subject: [PATCH] Fix over-matching of quoted text for message bodies beginning with "On..." Summary: A user sent a message to Phabricator which looked like: On blah blah blah ? On , wrote: > blah blah blah The current algorithm is too aggressive and thinks lines 1-3 are //all// the "On ... wrote:" string. Instead, patch only the most recent "On". Test Plan: Added a failing test and made it pass. Reviewers: btrahan, zeeg Reviewed By: zeeg CC: aran Differential Revision: https://secure.phabricator.com/D7732 --- .../PhabricatorMetaMTAEmailBodyParser.php | 27 ++++++++++++++++--- ...bricatorMetaMTAEmailBodyParserTestCase.php | 14 ++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php index 3218695cc5..41a237aca1 100644 --- a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php +++ b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php @@ -50,10 +50,29 @@ final class PhabricatorMetaMTAEmailBodyParser { } private function stripQuotedText($body) { - $body = preg_replace( - '/^\s*>?\s*On\b.*\bwrote:.*?/msU', - '', - $body); + + // Look for "On , wrote:". This may be split across multiple + // lines. We need to be careful not to remove all of a message like this: + // + // On which day do you want to meet? + // + // On , wrote: + // > Let's set up a meeting. + + $start = null; + $lines = phutil_split_lines($body); + foreach ($lines as $key => $line) { + if (preg_match('/^\s*>?\s*On\b/', $line)) { + $start = $key; + } + if ($start !== null) { + if (preg_match('/\bwrote:/', $line)) { + $lines = array_slice($lines, 0, $start); + $body = implode('', $lines); + break; + } + } + } // Outlook english $body = preg_replace( diff --git a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php index d4361c6e16..86cc547b4e 100644 --- a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php +++ b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php @@ -31,6 +31,20 @@ final class PhabricatorMetaMTAEmailBodyParserTestCase } } + public function testFalsePositiveForOnWrote() { + $body = << Hey bro do you want to go ride horses tomorrow? +EOEMAIL; + + $parser = new PhabricatorMetaMTAEmailBodyParser(); + $stripped = $parser->stripTextBody($body); + $this->assertEqual("On which horse shall you ride?", $stripped); + } + private function getEmailBodiesWithFullCommands() { $bodies = $this->getEmailBodies(); $with_commands = array();