mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-25 14:08:19 +01:00
Add some basic signature stripping
Summary: See discussion in T789. Covered the obvious cases, at least. We can refine this as we get a larger sample size. Test Plan: Unit test coverage. Reviewers: btrahan, vrana, jungejason Reviewed By: btrahan CC: aran Maniphest Tasks: T789 Differential Revision: https://secure.phabricator.com/D2154
This commit is contained in:
parent
056fd755da
commit
23fd936b47
3 changed files with 42 additions and 10 deletions
|
@ -18,13 +18,11 @@
|
||||||
|
|
||||||
final class PhabricatorMetaMTAEmailBodyParser {
|
final class PhabricatorMetaMTAEmailBodyParser {
|
||||||
|
|
||||||
public function __construct($corpus) {
|
public function stripTextBody($body) {
|
||||||
$this->corpus = $corpus;
|
return $this->stripSignature($this->stripQuotedText($body));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function stripQuotedText() {
|
private function stripQuotedText($body) {
|
||||||
$body = $this->corpus;
|
|
||||||
|
|
||||||
$body = preg_replace(
|
$body = preg_replace(
|
||||||
'/^\s*On\b.*\bwrote:.*?/msU',
|
'/^\s*On\b.*\bwrote:.*?/msU',
|
||||||
'',
|
'',
|
||||||
|
@ -42,9 +40,26 @@ final class PhabricatorMetaMTAEmailBodyParser {
|
||||||
'',
|
'',
|
||||||
$body);
|
$body);
|
||||||
|
|
||||||
|
return rtrim($body);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function stripSignature($body) {
|
||||||
|
// Quasi-"standard" delimiter, for lols see:
|
||||||
|
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
|
||||||
|
$body = preg_replace(
|
||||||
|
'/^-- +$.*/sm',
|
||||||
|
'',
|
||||||
|
$body);
|
||||||
|
|
||||||
// HTC Mail application (mobile)
|
// HTC Mail application (mobile)
|
||||||
$body = preg_replace(
|
$body = preg_replace(
|
||||||
'/^\s*Sent from my HTC smartphone.*?/msU',
|
'/^\s*^Sent from my HTC smartphone.*/sm',
|
||||||
|
'',
|
||||||
|
$body);
|
||||||
|
|
||||||
|
// Apple iPhone
|
||||||
|
$body = preg_replace(
|
||||||
|
'/^\s*^Sent from my iPhone\s*$.*/sm',
|
||||||
'',
|
'',
|
||||||
$body);
|
$body);
|
||||||
|
|
||||||
|
|
|
@ -22,13 +22,15 @@ final class PhabricatorMetaMTAEmailBodyParserTestCase
|
||||||
public function testQuotedTextStripping() {
|
public function testQuotedTextStripping() {
|
||||||
$bodies = $this->getEmailBodies();
|
$bodies = $this->getEmailBodies();
|
||||||
foreach ($bodies as $body) {
|
foreach ($bodies as $body) {
|
||||||
$parser = new PhabricatorMetaMTAEmailBodyParser($body);
|
$parser = new PhabricatorMetaMTAEmailBodyParser();
|
||||||
$stripped = $parser->stripQuotedText();
|
$stripped = $parser->stripTextBody($body);
|
||||||
$this->assertEqual("OKAY", $stripped);
|
$this->assertEqual("OKAY", $stripped);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function getEmailBodies() {
|
private function getEmailBodies() {
|
||||||
|
$trailing_space = ' ';
|
||||||
|
|
||||||
return array(
|
return array(
|
||||||
<<<EOEMAIL
|
<<<EOEMAIL
|
||||||
OKAY
|
OKAY
|
||||||
|
@ -86,6 +88,21 @@ To: <somebody@somewhere.com>
|
||||||
Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm
|
Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm
|
||||||
> ...
|
> ...
|
||||||
EOEMAIL
|
EOEMAIL
|
||||||
|
,
|
||||||
|
<<<EOEMAIL
|
||||||
|
OKAY
|
||||||
|
|
||||||
|
--{$trailing_space}
|
||||||
|
Abraham Lincoln
|
||||||
|
Supreme Galactic Emperor
|
||||||
|
EOEMAIL
|
||||||
|
,
|
||||||
|
<<<EOEMAIL
|
||||||
|
OKAY
|
||||||
|
|
||||||
|
Sent from my iPhone
|
||||||
|
EOEMAIL
|
||||||
|
,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -205,8 +205,8 @@ final class PhabricatorMetaMTAReceivedMail extends PhabricatorMetaMTADAO {
|
||||||
public function getCleanTextBody() {
|
public function getCleanTextBody() {
|
||||||
$body = idx($this->bodies, 'text');
|
$body = idx($this->bodies, 'text');
|
||||||
|
|
||||||
$parser = new PhabricatorMetaMTAEmailBodyParser($body);
|
$parser = new PhabricatorMetaMTAEmailBodyParser();
|
||||||
return $parser->stripQuotedText();
|
return $parser->stripTextBody($body);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function loadReceiverObject($receiver_name) {
|
public static function loadReceiverObject($receiver_name) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue