1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-09-20 01:08:50 +02:00

Add some basic signature stripping

Summary: See discussion in T789. Covered the obvious cases, at least. We can refine this as we get a larger sample size.

Test Plan: Unit test coverage.

Reviewers: btrahan, vrana, jungejason

Reviewed By: btrahan

CC: aran

Maniphest Tasks: T789

Differential Revision: https://secure.phabricator.com/D2154
This commit is contained in:
epriestley 2012-04-08 15:04:12 -07:00
parent 056fd755da
commit 23fd936b47
3 changed files with 42 additions and 10 deletions

View file

@ -18,13 +18,11 @@
final class PhabricatorMetaMTAEmailBodyParser {
public function __construct($corpus) {
$this->corpus = $corpus;
public function stripTextBody($body) {
return $this->stripSignature($this->stripQuotedText($body));
}
public function stripQuotedText() {
$body = $this->corpus;
private function stripQuotedText($body) {
$body = preg_replace(
'/^\s*On\b.*\bwrote:.*?/msU',
'',
@ -42,9 +40,26 @@ final class PhabricatorMetaMTAEmailBodyParser {
'',
$body);
return rtrim($body);
}
private function stripSignature($body) {
// Quasi-"standard" delimiter, for lols see:
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
$body = preg_replace(
'/^-- +$.*/sm',
'',
$body);
// HTC Mail application (mobile)
$body = preg_replace(
'/^\s*Sent from my HTC smartphone.*?/msU',
'/^\s*^Sent from my HTC smartphone.*/sm',
'',
$body);
// Apple iPhone
$body = preg_replace(
'/^\s*^Sent from my iPhone\s*$.*/sm',
'',
$body);

View file

@ -22,13 +22,15 @@ final class PhabricatorMetaMTAEmailBodyParserTestCase
public function testQuotedTextStripping() {
$bodies = $this->getEmailBodies();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser($body);
$stripped = $parser->stripQuotedText();
$parser = new PhabricatorMetaMTAEmailBodyParser();
$stripped = $parser->stripTextBody($body);
$this->assertEqual("OKAY", $stripped);
}
}
private function getEmailBodies() {
$trailing_space = ' ';
return array(
<<<EOEMAIL
OKAY
@ -86,6 +88,21 @@ To: <somebody@somewhere.com>
Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
--{$trailing_space}
Abraham Lincoln
Supreme Galactic Emperor
EOEMAIL
,
<<<EOEMAIL
OKAY
Sent from my iPhone
EOEMAIL
,
);
}

View file

@ -205,8 +205,8 @@ final class PhabricatorMetaMTAReceivedMail extends PhabricatorMetaMTADAO {
public function getCleanTextBody() {
$body = idx($this->bodies, 'text');
$parser = new PhabricatorMetaMTAEmailBodyParser($body);
return $parser->stripQuotedText();
$parser = new PhabricatorMetaMTAEmailBodyParser();
return $parser->stripTextBody($body);
}
public static function loadReceiverObject($receiver_name) {