1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-13 18:32:41 +01:00
phorge-phorge/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
epriestley f69793184e Fix over-matching of quoted text for message bodies beginning with "On..."
Summary:
A user sent a message to Phabricator which looked like:

  On blah blah blah ?

  On <date>, <user> wrote:
  > blah blah blah

The current algorithm is too aggressive and thinks lines 1-3 are //all// the "On ... wrote:" string. Instead, patch only the most recent "On".

Test Plan: Added a failing test and made it pass.

Reviewers: btrahan, zeeg

Reviewed By: zeeg

CC: aran

Differential Revision: https://secure.phabricator.com/D7732
2013-12-06 15:47:37 -08:00

121 lines
2.8 KiB
PHP

<?php
final class PhabricatorMetaMTAEmailBodyParser {
/**
* Mails can have bodies such as
*
* !claim
*
* taking this task
*
* Or
*
* !assign epriestley
*
* please, take this task I took; its hard
*
* This function parses such an email body and returns a dictionary
* containing a clean body text (e.g. "taking this task"), a $command
* (e.g. !claim, !assign) and a $command_value (e.g. "epriestley" in the
* !assign example.)
*
* @return dict
*/
public function parseBody($body) {
$body = $this->stripTextBody($body);
$lines = explode("\n", trim($body));
$first_line = head($lines);
$command = null;
$command_value = null;
$matches = null;
if (preg_match('/^!(\w+)\s*(\S+)?/', $first_line, $matches)) {
$lines = array_slice($lines, 1);
$body = implode("\n", $lines);
$body = trim($body);
$command = $matches[1];
$command_value = idx($matches, 2);
}
return array(
'body' => $body,
'command' => $command,
'command_value' => $command_value);
}
public function stripTextBody($body) {
return trim($this->stripSignature($this->stripQuotedText($body)));
}
private function stripQuotedText($body) {
// Look for "On <date>, <user> wrote:". This may be split across multiple
// lines. We need to be careful not to remove all of a message like this:
//
// On which day do you want to meet?
//
// On <date>, <user> wrote:
// > Let's set up a meeting.
$start = null;
$lines = phutil_split_lines($body);
foreach ($lines as $key => $line) {
if (preg_match('/^\s*>?\s*On\b/', $line)) {
$start = $key;
}
if ($start !== null) {
if (preg_match('/\bwrote:/', $line)) {
$lines = array_slice($lines, 0, $start);
$body = implode('', $lines);
break;
}
}
}
// Outlook english
$body = preg_replace(
'/^\s*-----Original Message-----.*?/imsU',
'',
$body);
// Outlook danish
$body = preg_replace(
'/^\s*-----Oprindelig Meddelelse-----.*?/imsU',
'',
$body);
// See example in T3217.
$body = preg_replace(
'/^________________________________________\s+From:.*?/imsU',
'',
$body);
return rtrim($body);
}
private function stripSignature($body) {
// Quasi-"standard" delimiter, for lols see:
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
$body = preg_replace(
'/^-- +$.*/sm',
'',
$body);
// HTC Mail application (mobile)
$body = preg_replace(
'/^\s*^Sent from my HTC smartphone.*/sm',
'',
$body);
// Apple iPhone
$body = preg_replace(
'/^\s*^Sent from my iPhone\s*$.*/sm',
'',
$body);
return rtrim($body);
}
}