1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-18 18:51:12 +01:00

Move Differential commit message parsing to a separate, tested class

Summary: Ref T2222. We have a hunk of logic that purely does text parsing here; separate it and get coverage on it.

Test Plan:
  - Ran new unit tests.
  - Used `differential.parsecommitmessage`.

Reviewers: btrahan

Reviewed By: btrahan

CC: aran

Maniphest Tasks: T2222

Differential Revision: https://secure.phabricator.com/D8444
This commit is contained in:
epriestley 2014-03-07 17:44:35 -08:00
parent f25cce1e69
commit aff34077c5
10 changed files with 376 additions and 95 deletions

View file

@ -356,6 +356,8 @@ phutil_register_library_map(array(
'DifferentialCommentPreviewController' => 'applications/differential/controller/DifferentialCommentPreviewController.php',
'DifferentialCommentQuery' => 'applications/differential/query/DifferentialCommentQuery.php',
'DifferentialCommentSaveController' => 'applications/differential/controller/DifferentialCommentSaveController.php',
'DifferentialCommitMessageParser' => 'applications/differential/parser/DifferentialCommitMessageParser.php',
'DifferentialCommitMessageParserTestCase' => 'applications/differential/parser/__tests__/DifferentialCommitMessageParserTestCase.php',
'DifferentialCommitsField' => 'applications/differential/customfield/DifferentialCommitsField.php',
'DifferentialCommitsFieldSpecification' => 'applications/differential/field/specification/DifferentialCommitsFieldSpecification.php',
'DifferentialConflictsFieldSpecification' => 'applications/differential/field/specification/DifferentialConflictsFieldSpecification.php',
@ -2919,6 +2921,7 @@ phutil_register_library_map(array(
'DifferentialCommentPreviewController' => 'DifferentialController',
'DifferentialCommentQuery' => 'PhabricatorOffsetPagedQuery',
'DifferentialCommentSaveController' => 'DifferentialController',
'DifferentialCommitMessageParserTestCase' => 'PhabricatorTestCase',
'DifferentialCommitsField' => 'DifferentialCustomField',
'DifferentialCommitsFieldSpecification' => 'DifferentialFieldSpecification',
'DifferentialConflictsFieldSpecification' => 'DifferentialFieldSpecification',

View file

@ -89,7 +89,8 @@ final class ConduitAPI_differential_parsecommitmessage_Method
foreach ($aux_fields as $key => $aux_field) {
$labels = $aux_field->getSupportedCommitMessageLabels();
foreach ($labels as $label) {
$normal_label = strtolower($label);
$normal_label = DifferentialCommitMessageParser::normalizeFieldLabel(
$label);
if (!empty($label_map[$normal_label])) {
$previous = $label_map[$normal_label];
throw new Exception(
@ -102,106 +103,20 @@ final class ConduitAPI_differential_parsecommitmessage_Method
return $label_map;
}
private function buildLabelRegexp(array $label_map) {
$field_labels = array_keys($label_map);
foreach ($field_labels as $key => $label) {
$field_labels[$key] = preg_quote($label, '/');
}
$field_labels = implode('|', $field_labels);
$field_pattern = '/^(?P<field>'.$field_labels.'):(?P<text>.*)$/i';
return $field_pattern;
}
private function parseCommitMessage($corpus, array $label_map) {
$label_regexp = $this->buildLabelRegexp($label_map);
$parser = id(new DifferentialCommitMessageParser())
->setLabelMap($label_map)
->setTitleKey('title')
->setSummaryKey('summary');
// Note, deliberately not populating $seen with 'title' because it is
// optional to include the 'Title:' label. We're doing a little special
// casing to consume the first line as the title regardless of whether you
// label it as such or not.
$field = 'title';
$result = $parser->parseCorpus($corpus);
$seen = array();
$lines = explode("\n", trim($corpus));
$field_map = array();
foreach ($lines as $key => $line) {
$match = null;
if (preg_match($label_regexp, $line, $match)) {
$lines[$key] = trim($match['text']);
$field = $label_map[strtolower($match['field'])];
if (!empty($seen[$field])) {
$this->errors[] = "Field '{$field}' occurs twice in commit message!";
}
$seen[$field] = true;
}
$field_map[$key] = $field;
foreach ($parser->getErrors() as $error) {
$this->errors[] = $error;
}
$fields = array();
foreach ($lines as $key => $line) {
$fields[$field_map[$key]][] = $line;
}
// This is a piece of special-cased magic which allows you to omit the
// field labels for "title" and "summary". If the user enters a large block
// of text at the beginning of the commit message with an empty line in it,
// treat everything before the blank line as "title" and everything after
// as "summary".
if (isset($fields['title']) && empty($fields['summary'])) {
$lines = $fields['title'];
for ($ii = 0; $ii < count($lines); $ii++) {
if (strlen(trim($lines[$ii])) == 0) {
break;
}
}
if ($ii != count($lines)) {
$fields['title'] = array_slice($lines, 0, $ii);
$fields['summary'] = array_slice($lines, $ii);
}
}
// Implode all the lines back into chunks of text.
foreach ($fields as $name => $lines) {
$data = rtrim(implode("\n", $lines));
$data = ltrim($data, "\n");
$fields[$name] = $data;
}
// This is another piece of special-cased magic which allows you to
// enter a ridiculously long title, or just type a big block of stream
// of consciousness text, and have some sort of reasonable result conjured
// from it.
if (isset($fields['title'])) {
$terminal = '...';
$title = $fields['title'];
$short = phutil_utf8_shorten($title, 250, $terminal);
if ($short != $title) {
// If we shortened the title, split the rest into the summary, so
// we end up with a title like:
//
// Title title tile title title...
//
// ...and a summary like:
//
// ...title title title.
//
// Summary summary summary summary.
$summary = idx($fields, 'summary', '');
$offset = strlen($short) - strlen($terminal);
$remainder = ltrim(substr($fields['title'], $offset));
$summary = '...'.$remainder."\n\n".$summary;
$summary = rtrim($summary, "\n");
$fields['title'] = $short;
$fields['summary'] = $summary;
}
}
return $fields;
return $result;
}

View file

@ -0,0 +1,210 @@
<?php
/**
* Parses commit messages (containing relaively freeform text with textual
* field labels) into a dictionary of fields.
*
* $parser = id(new DifferentialCommitMessageParser())
* ->setLabelMap($label_map)
* ->setTitleKey($key_title)
* ->setSummaryKey($key_summary);
*
* $fields = $parser->parseCorpus($corpus);
* $errors = $parser->getErrors();
*
* This is used by Differential to parse messages entered from the command line.
*
* @task config Configuring the Parser
* @task parse Parsing Messages
* @task support Support Methods
* @task internal Internals
*/
final class DifferentialCommitMessageParser {
private $labelMap;
private $titleKey;
private $summaryKey;
private $errors;
/* -( Configuring the Parser )--------------------------------------------- */
/**
* @task config
*/
public function setLabelMap(array $label_map) {
$this->labelMap = $label_map;
return $this;
}
/**
* @task config
*/
public function setTitleKey($title_key) {
$this->titleKey = $title_key;
return $this;
}
/**
* @task config
*/
public function setSummaryKey($summary_key) {
$this->summaryKey = $summary_key;
return $this;
}
/* -( Parsing Messages )--------------------------------------------------- */
/**
* @task parse
*/
public function parseCorpus($corpus) {
$this->errors = array();
$label_map = $this->labelMap;
$key_title = $this->titleKey;
$key_summary = $this->summaryKey;
if (!$key_title || !$key_summary || ($label_map === null)) {
throw new Exception(
pht(
'Expected labelMap, summaryKey and titleKey to be set before '.
'parsing a corpus.'));
}
$label_regexp = $this->buildLabelRegexp($label_map);
// NOTE: We're special casing things here to make the "Title:" label
// optional in the message.
$field = $key_title;
$seen = array();
$lines = explode("\n", trim($corpus));
$field_map = array();
foreach ($lines as $key => $line) {
$match = null;
if (preg_match($label_regexp, $line, $match)) {
$lines[$key] = trim($match['text']);
$field = $label_map[self::normalizeFieldLabel($match['field'])];
if (!empty($seen[$field])) {
$this->errors[] = pht(
'Field "%s" occurs twice in commit message!',
$field);
}
$seen[$field] = true;
}
$field_map[$key] = $field;
}
$fields = array();
foreach ($lines as $key => $line) {
$fields[$field_map[$key]][] = $line;
}
// This is a piece of special-cased magic which allows you to omit the
// field labels for "title" and "summary". If the user enters a large block
// of text at the beginning of the commit message with an empty line in it,
// treat everything before the blank line as "title" and everything after
// as "summary".
if (isset($fields[$key_title]) && empty($fields[$key_summary])) {
$lines = $fields[$key_title];
for ($ii = 0; $ii < count($lines); $ii++) {
if (strlen(trim($lines[$ii])) == 0) {
break;
}
}
if ($ii != count($lines)) {
$fields[$key_title] = array_slice($lines, 0, $ii);
$summary = array_slice($lines, $ii);
if (strlen(trim(implode("\n", $summary)))) {
$fields[$key_summary] = $summary;
}
}
}
// Implode all the lines back into chunks of text.
foreach ($fields as $name => $lines) {
$data = rtrim(implode("\n", $lines));
$data = ltrim($data, "\n");
$fields[$name] = $data;
}
// This is another piece of special-cased magic which allows you to
// enter a ridiculously long title, or just type a big block of stream
// of consciousness text, and have some sort of reasonable result conjured
// from it.
if (isset($fields[$key_title])) {
$terminal = '...';
$title = $fields[$key_title];
$short = phutil_utf8_shorten($title, 250, $terminal);
if ($short != $title) {
// If we shortened the title, split the rest into the summary, so
// we end up with a title like:
//
// Title title tile title title...
//
// ...and a summary like:
//
// ...title title title.
//
// Summary summary summary summary.
$summary = idx($fields, $key_summary, '');
$offset = strlen($short) - strlen($terminal);
$remainder = ltrim(substr($fields[$key_title], $offset));
$summary = '...'.$remainder."\n\n".$summary;
$summary = rtrim($summary, "\n");
$fields[$key_title] = $short;
$fields[$key_summary] = $summary;
}
}
return $fields;
}
/**
* @task parse
*/
public function getErrors() {
return $this->errors;
}
/* -( Support Methods )---------------------------------------------------- */
/**
* @task support
*/
public static function normalizeFieldLabel($label) {
return phutil_utf8_strtolower($label);
}
/* -( Internals )---------------------------------------------------------- */
/**
* @task internal
*/
private function buildLabelRegexp(array $label_map) {
$field_labels = array_keys($label_map);
foreach ($field_labels as $key => $label) {
$field_labels[$key] = preg_quote($label, '/');
}
$field_labels = implode('|', $field_labels);
$field_pattern = '/^(?P<field>'.$field_labels.'):(?P<text>.*)$/i';
return $field_pattern;
}
}

View file

@ -0,0 +1,65 @@
<?php
final class DifferentialCommitMessageParserTestCase
extends PhabricatorTestCase {
public function testDifferentialCommitMessageParser() {
$dir = dirname(__FILE__).'/messages/';
$list = Filesystem::listDirectory($dir, $include_hidden = false);
foreach ($list as $file) {
if (!preg_match('/.txt$/', $file)) {
continue;
}
$data = Filesystem::readFile($dir.$file);
$divider = "~~~~~~~~~~\n";
$parts = explode($divider, $data);
if (count($parts) !== 4) {
throw new Exception(
pht(
'Expected test file "%s" to contain four parts (message, fields, '.
'output, errors) divided by "~~~~~~~~~~".',
$file));
}
list($message, $fields, $output, $errors) = $parts;
$fields = phutil_json_decode($fields, null);
$output = phutil_json_decode($output, null);
$errors = phutil_json_decode($errors, null);
if ($fields === null || $output === null || $errors === null) {
throw new Exception(
pht(
'Expected test file "%s" to contain valid JSON in its sections.',
$file));
}
$parser = id(new DifferentialCommitMessageParser())
->setLabelMap($fields)
->setTitleKey('title')
->setSummaryKey('summary');
$result_output = $parser->parseCorpus($message);
$result_errors = $parser->getErrors();
$this->assertEqual($output, $result_output);
$this->assertEqual($errors, $result_errors);
}
}
public function testDifferentialCommitMessageParserNormalization() {
$map = array(
'Test Plan' => 'test plan',
'REVIEWERS' => 'reviewers',
'sUmmArY' => 'summary',
);
foreach ($map as $input => $expect) {
$this->assertEqual(
$expect,
DifferentialCommitMessageParser::normalizeFieldLabel($input),
pht('Field normalization of label "%s".', $input));
}
}
}

View file

@ -0,0 +1,17 @@
fix bug
color: red
color: blue
~~~~~~~~~~
{
"color": "color"
}
~~~~~~~~~~
{
"title": "fix bug",
"color": "red\nblue"
}
~~~~~~~~~~
[
"Field \"color\" occurs twice in commit message!"
]

View file

@ -0,0 +1,11 @@
123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
~~~~~~~~~~
{}
~~~~~~~~~~
{
"title": "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567...",
"summary": "...89012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"
}
~~~~~~~~~~
[]

View file

@ -0,0 +1,15 @@
fix bug
hue: black
~~~~~~~~~~
{
"color": "color",
"hue": "color"
}
~~~~~~~~~~
{
"title": "fix bug",
"color": "black"
}
~~~~~~~~~~
[]

View file

@ -0,0 +1,24 @@
Title
Summary: This is the summary.
Test Plan: Tested things.
Reviewers: alincoln
~~~~~~~~~~
{
"title": "title",
"summary": "summary",
"test plan": "test plan",
"reviewers": "reviewers"
}
~~~~~~~~~~
{
"title": "Title",
"summary": "This is the summary.",
"test plan": "Tested things.",
"reviewers": "alincoln"
}
~~~~~~~~~~
[]

View file

@ -0,0 +1,12 @@
fix bug
it had a bug but I fixed it
~~~~~~~~~~
{}
~~~~~~~~~~
{
"title": "fix bug",
"summary": "it had a bug but I fixed it"
}
~~~~~~~~~~
[]

View file

@ -0,0 +1,9 @@
fix bug
~~~~~~~~~~
{}
~~~~~~~~~~
{
"title": "fix bug"
}
~~~~~~~~~~
[]