1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-25 14:08:19 +01:00

Add some basic signature stripping

Summary: See discussion in T789. Covered the obvious cases, at least. We can refine this as we get a larger sample size.

Test Plan: Unit test coverage.

Reviewers: btrahan, vrana, jungejason

Reviewed By: btrahan

CC: aran

Maniphest Tasks: T789

Differential Revision: https://secure.phabricator.com/D2154
This commit is contained in:
epriestley 2012-04-08 15:04:12 -07:00
parent 056fd755da
commit 23fd936b47
3 changed files with 42 additions and 10 deletions

View file

@ -18,13 +18,11 @@
final class PhabricatorMetaMTAEmailBodyParser { final class PhabricatorMetaMTAEmailBodyParser {
public function __construct($corpus) { public function stripTextBody($body) {
$this->corpus = $corpus; return $this->stripSignature($this->stripQuotedText($body));
} }
public function stripQuotedText() { private function stripQuotedText($body) {
$body = $this->corpus;
$body = preg_replace( $body = preg_replace(
'/^\s*On\b.*\bwrote:.*?/msU', '/^\s*On\b.*\bwrote:.*?/msU',
'', '',
@ -42,9 +40,26 @@ final class PhabricatorMetaMTAEmailBodyParser {
'', '',
$body); $body);
return rtrim($body);
}
private function stripSignature($body) {
// Quasi-"standard" delimiter, for lols see:
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
$body = preg_replace(
'/^-- +$.*/sm',
'',
$body);
// HTC Mail application (mobile) // HTC Mail application (mobile)
$body = preg_replace( $body = preg_replace(
'/^\s*Sent from my HTC smartphone.*?/msU', '/^\s*^Sent from my HTC smartphone.*/sm',
'',
$body);
// Apple iPhone
$body = preg_replace(
'/^\s*^Sent from my iPhone\s*$.*/sm',
'', '',
$body); $body);

View file

@ -22,13 +22,15 @@ final class PhabricatorMetaMTAEmailBodyParserTestCase
public function testQuotedTextStripping() { public function testQuotedTextStripping() {
$bodies = $this->getEmailBodies(); $bodies = $this->getEmailBodies();
foreach ($bodies as $body) { foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser($body); $parser = new PhabricatorMetaMTAEmailBodyParser();
$stripped = $parser->stripQuotedText(); $stripped = $parser->stripTextBody($body);
$this->assertEqual("OKAY", $stripped); $this->assertEqual("OKAY", $stripped);
} }
} }
private function getEmailBodies() { private function getEmailBodies() {
$trailing_space = ' ';
return array( return array(
<<<EOEMAIL <<<EOEMAIL
OKAY OKAY
@ -86,6 +88,21 @@ To: <somebody@somewhere.com>
Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm
> ... > ...
EOEMAIL EOEMAIL
,
<<<EOEMAIL
OKAY
--{$trailing_space}
Abraham Lincoln
Supreme Galactic Emperor
EOEMAIL
,
<<<EOEMAIL
OKAY
Sent from my iPhone
EOEMAIL
,
); );
} }

View file

@ -205,8 +205,8 @@ final class PhabricatorMetaMTAReceivedMail extends PhabricatorMetaMTADAO {
public function getCleanTextBody() { public function getCleanTextBody() {
$body = idx($this->bodies, 'text'); $body = idx($this->bodies, 'text');
$parser = new PhabricatorMetaMTAEmailBodyParser($body); $parser = new PhabricatorMetaMTAEmailBodyParser();
return $parser->stripQuotedText(); return $parser->stripTextBody($body);
} }
public static function loadReceiverObject($receiver_name) { public static function loadReceiverObject($receiver_name) {