mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-10 14:51:06 +01:00
Add some basic signature stripping
Summary: See discussion in T789. Covered the obvious cases, at least. We can refine this as we get a larger sample size. Test Plan: Unit test coverage. Reviewers: btrahan, vrana, jungejason Reviewed By: btrahan CC: aran Maniphest Tasks: T789 Differential Revision: https://secure.phabricator.com/D2154
This commit is contained in:
parent
056fd755da
commit
23fd936b47
3 changed files with 42 additions and 10 deletions
|
@ -18,13 +18,11 @@
|
|||
|
||||
final class PhabricatorMetaMTAEmailBodyParser {
|
||||
|
||||
public function __construct($corpus) {
|
||||
$this->corpus = $corpus;
|
||||
public function stripTextBody($body) {
|
||||
return $this->stripSignature($this->stripQuotedText($body));
|
||||
}
|
||||
|
||||
public function stripQuotedText() {
|
||||
$body = $this->corpus;
|
||||
|
||||
private function stripQuotedText($body) {
|
||||
$body = preg_replace(
|
||||
'/^\s*On\b.*\bwrote:.*?/msU',
|
||||
'',
|
||||
|
@ -42,9 +40,26 @@ final class PhabricatorMetaMTAEmailBodyParser {
|
|||
'',
|
||||
$body);
|
||||
|
||||
return rtrim($body);
|
||||
}
|
||||
|
||||
private function stripSignature($body) {
|
||||
// Quasi-"standard" delimiter, for lols see:
|
||||
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
|
||||
$body = preg_replace(
|
||||
'/^-- +$.*/sm',
|
||||
'',
|
||||
$body);
|
||||
|
||||
// HTC Mail application (mobile)
|
||||
$body = preg_replace(
|
||||
'/^\s*Sent from my HTC smartphone.*?/msU',
|
||||
'/^\s*^Sent from my HTC smartphone.*/sm',
|
||||
'',
|
||||
$body);
|
||||
|
||||
// Apple iPhone
|
||||
$body = preg_replace(
|
||||
'/^\s*^Sent from my iPhone\s*$.*/sm',
|
||||
'',
|
||||
$body);
|
||||
|
||||
|
|
|
@ -22,13 +22,15 @@ final class PhabricatorMetaMTAEmailBodyParserTestCase
|
|||
public function testQuotedTextStripping() {
|
||||
$bodies = $this->getEmailBodies();
|
||||
foreach ($bodies as $body) {
|
||||
$parser = new PhabricatorMetaMTAEmailBodyParser($body);
|
||||
$stripped = $parser->stripQuotedText();
|
||||
$parser = new PhabricatorMetaMTAEmailBodyParser();
|
||||
$stripped = $parser->stripTextBody($body);
|
||||
$this->assertEqual("OKAY", $stripped);
|
||||
}
|
||||
}
|
||||
|
||||
private function getEmailBodies() {
|
||||
$trailing_space = ' ';
|
||||
|
||||
return array(
|
||||
<<<EOEMAIL
|
||||
OKAY
|
||||
|
@ -86,6 +88,21 @@ To: <somebody@somewhere.com>
|
|||
Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm
|
||||
> ...
|
||||
EOEMAIL
|
||||
,
|
||||
<<<EOEMAIL
|
||||
OKAY
|
||||
|
||||
--{$trailing_space}
|
||||
Abraham Lincoln
|
||||
Supreme Galactic Emperor
|
||||
EOEMAIL
|
||||
,
|
||||
<<<EOEMAIL
|
||||
OKAY
|
||||
|
||||
Sent from my iPhone
|
||||
EOEMAIL
|
||||
,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -205,8 +205,8 @@ final class PhabricatorMetaMTAReceivedMail extends PhabricatorMetaMTADAO {
|
|||
public function getCleanTextBody() {
|
||||
$body = idx($this->bodies, 'text');
|
||||
|
||||
$parser = new PhabricatorMetaMTAEmailBodyParser($body);
|
||||
return $parser->stripQuotedText();
|
||||
$parser = new PhabricatorMetaMTAEmailBodyParser();
|
||||
return $parser->stripTextBody($body);
|
||||
}
|
||||
|
||||
public static function loadReceiverObject($receiver_name) {
|
||||
|
|
Loading…
Reference in a new issue