2014-03-08 02:44:35 +01:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
2014-07-23 02:03:09 +02:00
|
|
|
* Parses commit messages (containing relatively freeform text with textual
|
2014-03-08 02:44:35 +01:00
|
|
|
* field labels) into a dictionary of fields.
|
|
|
|
*
|
|
|
|
* $parser = id(new DifferentialCommitMessageParser())
|
|
|
|
* ->setLabelMap($label_map)
|
|
|
|
* ->setTitleKey($key_title)
|
|
|
|
* ->setSummaryKey($key_summary);
|
|
|
|
*
|
|
|
|
* $fields = $parser->parseCorpus($corpus);
|
|
|
|
* $errors = $parser->getErrors();
|
|
|
|
*
|
|
|
|
* This is used by Differential to parse messages entered from the command line.
|
|
|
|
*
|
|
|
|
* @task config Configuring the Parser
|
|
|
|
* @task parse Parsing Messages
|
|
|
|
* @task support Support Methods
|
|
|
|
* @task internal Internals
|
|
|
|
*/
|
2015-06-15 10:02:26 +02:00
|
|
|
final class DifferentialCommitMessageParser extends Phobject {
|
2014-03-08 02:44:35 +01:00
|
|
|
|
2016-12-14 16:59:14 +01:00
|
|
|
private $viewer;
|
2014-03-08 02:44:35 +01:00
|
|
|
private $labelMap;
|
|
|
|
private $titleKey;
|
|
|
|
private $summaryKey;
|
|
|
|
private $errors;
|
2016-12-14 17:14:52 +01:00
|
|
|
private $commitMessageFields;
|
2016-12-14 16:59:14 +01:00
|
|
|
private $raiseMissingFieldErrors = true;
|
2014-03-08 02:44:35 +01:00
|
|
|
|
2016-11-12 16:04:01 +01:00
|
|
|
public static function newStandardParser(PhabricatorUser $viewer) {
|
2016-12-14 17:14:52 +01:00
|
|
|
$key_title = DifferentialTitleCommitMessageField::FIELDKEY;
|
|
|
|
$key_summary = DifferentialSummaryCommitMessageField::FIELDKEY;
|
2016-11-12 16:04:01 +01:00
|
|
|
|
2016-12-14 17:14:52 +01:00
|
|
|
$field_list = DifferentialCommitMessageField::newEnabledFields($viewer);
|
2016-11-12 16:04:01 +01:00
|
|
|
|
|
|
|
return id(new self())
|
2016-12-14 16:59:14 +01:00
|
|
|
->setViewer($viewer)
|
2016-12-14 17:14:52 +01:00
|
|
|
->setCommitMessageFields($field_list)
|
2016-11-12 16:04:01 +01:00
|
|
|
->setTitleKey($key_title)
|
|
|
|
->setSummaryKey($key_summary);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-08 02:44:35 +01:00
|
|
|
/* -( Configuring the Parser )--------------------------------------------- */
|
|
|
|
|
|
|
|
|
2016-12-14 16:59:14 +01:00
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function setViewer(PhabricatorUser $viewer) {
|
|
|
|
$this->viewer = $viewer;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function getViewer() {
|
|
|
|
return $this->viewer;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-12-14 17:14:52 +01:00
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function setCommitMessageFields($fields) {
|
|
|
|
assert_instances_of($fields, 'DifferentialCommitMessageField');
|
|
|
|
$fields = mpull($fields, null, 'getCommitMessageFieldKey');
|
|
|
|
$this->commitMessageFields = $fields;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function getCommitMessageFields() {
|
|
|
|
return $this->commitMessageFields;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-12-14 16:59:14 +01:00
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function setRaiseMissingFieldErrors($raise) {
|
|
|
|
$this->raiseMissingFieldErrors = $raise;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function getRaiseMissingFieldErrors() {
|
|
|
|
return $this->raiseMissingFieldErrors;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-08 02:44:35 +01:00
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function setLabelMap(array $label_map) {
|
|
|
|
$this->labelMap = $label_map;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function setTitleKey($title_key) {
|
|
|
|
$this->titleKey = $title_key;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @task config
|
|
|
|
*/
|
|
|
|
public function setSummaryKey($summary_key) {
|
|
|
|
$this->summaryKey = $summary_key;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* -( Parsing Messages )--------------------------------------------------- */
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @task parse
|
|
|
|
*/
|
|
|
|
public function parseCorpus($corpus) {
|
|
|
|
$this->errors = array();
|
|
|
|
|
2016-12-14 17:14:52 +01:00
|
|
|
$label_map = $this->getLabelMap();
|
2014-03-08 02:44:35 +01:00
|
|
|
$key_title = $this->titleKey;
|
|
|
|
$key_summary = $this->summaryKey;
|
|
|
|
|
|
|
|
if (!$key_title || !$key_summary || ($label_map === null)) {
|
|
|
|
throw new Exception(
|
|
|
|
pht(
|
2015-05-22 09:27:56 +02:00
|
|
|
'Expected %s, %s and %s to be set before parsing a corpus.',
|
|
|
|
'labelMap',
|
|
|
|
'summaryKey',
|
|
|
|
'titleKey'));
|
2014-03-08 02:44:35 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
$label_regexp = $this->buildLabelRegexp($label_map);
|
|
|
|
|
|
|
|
// NOTE: We're special casing things here to make the "Title:" label
|
|
|
|
// optional in the message.
|
|
|
|
$field = $key_title;
|
|
|
|
|
|
|
|
$seen = array();
|
|
|
|
$lines = explode("\n", trim($corpus));
|
|
|
|
$field_map = array();
|
|
|
|
foreach ($lines as $key => $line) {
|
|
|
|
$match = null;
|
|
|
|
if (preg_match($label_regexp, $line, $match)) {
|
|
|
|
$lines[$key] = trim($match['text']);
|
|
|
|
$field = $label_map[self::normalizeFieldLabel($match['field'])];
|
|
|
|
if (!empty($seen[$field])) {
|
|
|
|
$this->errors[] = pht(
|
|
|
|
'Field "%s" occurs twice in commit message!',
|
|
|
|
$field);
|
|
|
|
}
|
|
|
|
$seen[$field] = true;
|
|
|
|
}
|
|
|
|
$field_map[$key] = $field;
|
|
|
|
}
|
|
|
|
|
|
|
|
$fields = array();
|
|
|
|
foreach ($lines as $key => $line) {
|
|
|
|
$fields[$field_map[$key]][] = $line;
|
|
|
|
}
|
|
|
|
|
|
|
|
// This is a piece of special-cased magic which allows you to omit the
|
|
|
|
// field labels for "title" and "summary". If the user enters a large block
|
|
|
|
// of text at the beginning of the commit message with an empty line in it,
|
|
|
|
// treat everything before the blank line as "title" and everything after
|
|
|
|
// as "summary".
|
|
|
|
if (isset($fields[$key_title]) && empty($fields[$key_summary])) {
|
|
|
|
$lines = $fields[$key_title];
|
|
|
|
for ($ii = 0; $ii < count($lines); $ii++) {
|
|
|
|
if (strlen(trim($lines[$ii])) == 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ($ii != count($lines)) {
|
|
|
|
$fields[$key_title] = array_slice($lines, 0, $ii);
|
|
|
|
$summary = array_slice($lines, $ii);
|
|
|
|
if (strlen(trim(implode("\n", $summary)))) {
|
|
|
|
$fields[$key_summary] = $summary;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Implode all the lines back into chunks of text.
|
|
|
|
foreach ($fields as $name => $lines) {
|
|
|
|
$data = rtrim(implode("\n", $lines));
|
|
|
|
$data = ltrim($data, "\n");
|
|
|
|
$fields[$name] = $data;
|
|
|
|
}
|
|
|
|
|
|
|
|
// This is another piece of special-cased magic which allows you to
|
|
|
|
// enter a ridiculously long title, or just type a big block of stream
|
|
|
|
// of consciousness text, and have some sort of reasonable result conjured
|
|
|
|
// from it.
|
|
|
|
if (isset($fields[$key_title])) {
|
|
|
|
$terminal = '...';
|
|
|
|
$title = $fields[$key_title];
|
2014-08-30 00:15:13 +02:00
|
|
|
$short = id(new PhutilUTF8StringTruncator())
|
Fix string truncation calls all over the codebase.
Summary: Fixes T6608, though I'll also clean up the comment for PhutilStringTruncator in another diff. If I understand correctly, before T1191, MySQL column length was by character count and post T1191 its by byte count. Ergo, most of these changes are going from codepoint -> bytes. See test plan for complete list of what was and was not done.
Test Plan:
Thought very carefully about each callsite and made changes as appropos. "Display" means the string is clearly used for display-only purposes and correctly uses "glyph" already.
grep -rn PhutilUTF8StringTruncator *
applications/calendar/query/PhabricatorCalendarEventSearchEngine.php:217: ->addAttribute(id(new PhutilUTF8StringTruncator()) -- display
applications/chatlog/controller/PhabricatorChatLogChannelLogController.php:111: $author = id(new PhutilUTF8StringTruncator()) -- display
applications/conduit/method/ConduitConnectConduitAPIMethod.php:62: $client_description = id(new PhutilUTF8StringTruncator()) -- was codepoint, changed to bytes
applications/conpherence/view/ConpherenceFileWidgetView.php:22: ->setFileName(id(new PhutilUTF8StringTruncator()) -- display
applications/differential/controller/DifferentialDiffViewController.php:65: id(new PhutilUTF8StringTruncator()) -- display
applications/differential/event/DifferentialHovercardEventListener.php:69: id(new PhutilUTF8StringTruncator()) -- display
applications/differential/parser/DifferentialCommitMessageParser.php:144: $short = id(new PhutilUTF8StringTruncator()) -- was glyphs, made to bytes
applications/differential/view/DifferentialLocalCommitsView.php:80: $summary = id(new PhutilUTF8StringTruncator()) -- display
applications/diffusion/controller/DiffusionBrowseFileController.php:686: id(new PhutilUTF8StringTruncator()) -- display
applications/feed/story/PhabricatorFeedStory.php:392: $text = id(new PhutilUTF8StringTruncator()) -- display, unless people are saving the results of renderSummary() somewhere...
applications/harbormaster/storage/build/HarbormasterBuild.php:216: $log_source = id(new PhutilUTF8StringTruncator()) -- was codepoints now bytes
applications/herald/storage/transcript/HeraldObjectTranscript.php:55: // NOTE: PhutilUTF8StringTruncator has huge runtime for giant strings. -- not applicable
applications/maniphest/export/ManiphestExcelDefaultFormat.php:107: id(new PhutilUTF8StringTruncator()) -- bytes
applications/metamta/storage/PhabricatorMetaMTAMail.php:587: $body = id(new PhutilUTF8StringTruncator()) -- bytes
applications/people/event/PhabricatorPeopleHovercardEventListener.php:62: id(new PhutilUTF8StringTruncator()) -- display
applications/phame/conduit/PhameCreatePostConduitAPIMethod.php:93: id(new PhutilUTF8StringTruncator()) -- was codepoints, now bytes
applications/pholio/storage/PholioTransaction.php:300: id(new PhutilUTF8StringTruncator()) -- display
applications/phortune/provider/PhortuneBalancedPaymentProvider.php:147: $charge_as = id(new PhutilUTF8StringTruncator()) -- bytes
applications/ponder/storage/PonderAnswerTransaction.php:86: id(new PhutilUTF8StringTruncator()) -- display
applications/ponder/storage/PonderQuestionTransaction.php:267: id(new PhutilUTF8StringTruncator()) -- display
applications/ponder/storage/PonderQuestionTransaction.php:276: id(new PhutilUTF8StringTruncator()) -- display
applications/repository/storage/PhabricatorRepositoryCommitData.php:43: $summary = id(new PhutilUTF8StringTruncator()) -- was codepoints, now bytes
applications/repository/worker/commitmessageparser/PhabricatorRepositoryCommitMessageParserWorker.php:20: $data->setAuthorName(id(new PhutilUTF8StringTruncator()) -- was codepoints, now bytes
applications/slowvote/query/PhabricatorSlowvoteSearchEngine.php:158: $item->addAttribute(id(new PhutilUTF8StringTruncator()) -- display
infrastructure/daemon/workers/query/PhabricatorWorkerLeaseQuery.php:317: $host = id(new PhutilUTF8StringTruncator()) -- bytes
view/form/control/AphrontFormPolicyControl.php:61: $policy_short_name = id(new PhutilUTF8StringTruncator()) -- glyphs, probably display only
Reviewers: epriestley
Reviewed By: epriestley
Subscribers: Korvin, epriestley
Maniphest Tasks: T6608
Differential Revision: https://secure.phabricator.com/D11219
2015-01-05 20:14:54 +01:00
|
|
|
->setMaximumBytes(250)
|
2014-08-30 00:15:13 +02:00
|
|
|
->setTerminator($terminal)
|
|
|
|
->truncateString($title);
|
|
|
|
|
2014-03-08 02:44:35 +01:00
|
|
|
if ($short != $title) {
|
|
|
|
|
|
|
|
// If we shortened the title, split the rest into the summary, so
|
|
|
|
// we end up with a title like:
|
|
|
|
//
|
|
|
|
// Title title tile title title...
|
|
|
|
//
|
|
|
|
// ...and a summary like:
|
|
|
|
//
|
|
|
|
// ...title title title.
|
|
|
|
//
|
|
|
|
// Summary summary summary summary.
|
|
|
|
|
|
|
|
$summary = idx($fields, $key_summary, '');
|
|
|
|
$offset = strlen($short) - strlen($terminal);
|
|
|
|
$remainder = ltrim(substr($fields[$key_title], $offset));
|
|
|
|
$summary = '...'.$remainder."\n\n".$summary;
|
|
|
|
$summary = rtrim($summary, "\n");
|
|
|
|
|
|
|
|
$fields[$key_title] = $short;
|
|
|
|
$fields[$key_summary] = $summary;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $fields;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-12-14 16:59:14 +01:00
|
|
|
/**
|
|
|
|
* @task parse
|
|
|
|
*/
|
|
|
|
public function parseFields($corpus) {
|
|
|
|
$viewer = $this->getViewer();
|
|
|
|
$text_map = $this->parseCorpus($corpus);
|
|
|
|
|
2016-12-14 17:14:52 +01:00
|
|
|
$field_map = $this->getCommitMessageFields();
|
2016-12-14 16:59:14 +01:00
|
|
|
|
|
|
|
$result_map = array();
|
|
|
|
foreach ($text_map as $field_key => $text_value) {
|
|
|
|
$field = idx($field_map, $field_key);
|
|
|
|
if (!$field) {
|
|
|
|
// This is a strict error, since we only parse fields which we have
|
|
|
|
// been told are valid. The caller probably handed us an invalid label
|
|
|
|
// map.
|
|
|
|
throw new Exception(
|
|
|
|
pht(
|
|
|
|
'Parser emitted a field with key "%s", but no corresponding '.
|
|
|
|
'field definition exists.',
|
|
|
|
$field_key));
|
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
2016-12-14 17:14:52 +01:00
|
|
|
$result = $field->parseFieldValue($text_value);
|
2016-12-14 16:59:14 +01:00
|
|
|
$result_map[$field_key] = $result;
|
|
|
|
} catch (DifferentialFieldParseException $ex) {
|
|
|
|
$this->errors[] = pht(
|
|
|
|
'Error parsing field "%s": %s',
|
2016-12-15 20:23:15 +01:00
|
|
|
$field->getFieldName(),
|
2016-12-14 16:59:14 +01:00
|
|
|
$ex->getMessage());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($this->getRaiseMissingFieldErrors()) {
|
|
|
|
foreach ($field_map as $key => $field) {
|
|
|
|
try {
|
2016-12-14 17:14:52 +01:00
|
|
|
$field->validateFieldValue(idx($result_map, $key));
|
2016-12-14 16:59:14 +01:00
|
|
|
} catch (DifferentialFieldValidationException $ex) {
|
|
|
|
$this->errors[] = pht(
|
|
|
|
'Invalid or missing field "%s": %s',
|
2016-12-15 20:23:15 +01:00
|
|
|
$field->getFieldName(),
|
2016-12-14 16:59:14 +01:00
|
|
|
$ex->getMessage());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $result_map;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-08 02:44:35 +01:00
|
|
|
/**
|
|
|
|
* @task parse
|
|
|
|
*/
|
|
|
|
public function getErrors() {
|
|
|
|
return $this->errors;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* -( Support Methods )---------------------------------------------------- */
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @task support
|
|
|
|
*/
|
|
|
|
public static function normalizeFieldLabel($label) {
|
|
|
|
return phutil_utf8_strtolower($label);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* -( Internals )---------------------------------------------------------- */
|
|
|
|
|
|
|
|
|
2016-12-14 17:14:52 +01:00
|
|
|
private function getLabelMap() {
|
|
|
|
if ($this->labelMap === null) {
|
|
|
|
$field_list = $this->getCommitMessageFields();
|
|
|
|
|
|
|
|
$label_map = array();
|
|
|
|
foreach ($field_list as $field_key => $field) {
|
|
|
|
$labels = $field->getFieldAliases();
|
|
|
|
$labels[] = $field->getFieldName();
|
|
|
|
|
|
|
|
foreach ($labels as $label) {
|
|
|
|
$normal_label = self::normalizeFieldLabel($label);
|
|
|
|
if (!empty($label_map[$normal_label])) {
|
|
|
|
throw new Exception(
|
|
|
|
pht(
|
|
|
|
'Field label "%s" is parsed by two custom fields: "%s" and '.
|
|
|
|
'"%s". Each label must be parsed by only one field.',
|
|
|
|
$label,
|
|
|
|
$field_key,
|
|
|
|
$label_map[$normal_label]));
|
|
|
|
}
|
|
|
|
|
|
|
|
$label_map[$normal_label] = $field_key;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->labelMap = $label_map;
|
|
|
|
}
|
|
|
|
|
|
|
|
return $this->labelMap;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-08 02:44:35 +01:00
|
|
|
/**
|
|
|
|
* @task internal
|
|
|
|
*/
|
|
|
|
private function buildLabelRegexp(array $label_map) {
|
|
|
|
$field_labels = array_keys($label_map);
|
|
|
|
foreach ($field_labels as $key => $label) {
|
|
|
|
$field_labels[$key] = preg_quote($label, '/');
|
|
|
|
}
|
|
|
|
$field_labels = implode('|', $field_labels);
|
|
|
|
|
|
|
|
$field_pattern = '/^(?P<field>'.$field_labels.'):(?P<text>.*)$/i';
|
|
|
|
|
|
|
|
return $field_pattern;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|