From 23fd936b4726ae95ef196ba9dda9bade4e2066ca Mon Sep 17 00:00:00 2001 From: epriestley Date: Sun, 8 Apr 2012 15:04:12 -0700 Subject: [PATCH] Add some basic signature stripping Summary: See discussion in T789. Covered the obvious cases, at least. We can refine this as we get a larger sample size. Test Plan: Unit test coverage. Reviewers: btrahan, vrana, jungejason Reviewed By: btrahan CC: aran Maniphest Tasks: T789 Differential Revision: https://secure.phabricator.com/D2154 --- .../PhabricatorMetaMTAEmailBodyParser.php | 27 ++++++++++++++----- ...bricatorMetaMTAEmailBodyParserTestCase.php | 21 +++++++++++++-- .../PhabricatorMetaMTAReceivedMail.php | 4 +-- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php index 4b2b5a398c..2701631a98 100644 --- a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php +++ b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php @@ -18,13 +18,11 @@ final class PhabricatorMetaMTAEmailBodyParser { - public function __construct($corpus) { - $this->corpus = $corpus; + public function stripTextBody($body) { + return $this->stripSignature($this->stripQuotedText($body)); } - public function stripQuotedText() { - $body = $this->corpus; - + private function stripQuotedText($body) { $body = preg_replace( '/^\s*On\b.*\bwrote:.*?/msU', '', @@ -42,9 +40,26 @@ final class PhabricatorMetaMTAEmailBodyParser { '', $body); + return rtrim($body); + } + + private function stripSignature($body) { + // Quasi-"standard" delimiter, for lols see: + // https://bugzilla.mozilla.org/show_bug.cgi?id=58406 + $body = preg_replace( + '/^-- +$.*/sm', + '', + $body); + // HTC Mail application (mobile) $body = preg_replace( - '/^\s*Sent from my HTC smartphone.*?/msU', + '/^\s*^Sent from my HTC smartphone.*/sm', + '', + $body); + + // Apple iPhone + $body = preg_replace( + '/^\s*^Sent from my iPhone\s*$.*/sm', '', $body); diff --git a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php index a85ec1f3e2..5f0a74eaa4 100644 --- a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php +++ b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php @@ -22,13 +22,15 @@ final class PhabricatorMetaMTAEmailBodyParserTestCase public function testQuotedTextStripping() { $bodies = $this->getEmailBodies(); foreach ($bodies as $body) { - $parser = new PhabricatorMetaMTAEmailBodyParser($body); - $stripped = $parser->stripQuotedText(); + $parser = new PhabricatorMetaMTAEmailBodyParser(); + $stripped = $parser->stripTextBody($body); $this->assertEqual("OKAY", $stripped); } } private function getEmailBodies() { + $trailing_space = ' '; + return array( << Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm > ... EOEMAIL +, +<<bodies, 'text'); - $parser = new PhabricatorMetaMTAEmailBodyParser($body); - return $parser->stripQuotedText(); + $parser = new PhabricatorMetaMTAEmailBodyParser(); + return $parser->stripTextBody($body); } public static function loadReceiverObject($receiver_name) {