mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-18 18:51:12 +01:00
Move Differential commit message parsing to a separate, tested class
Summary: Ref T2222. We have a hunk of logic that purely does text parsing here; separate it and get coverage on it. Test Plan: - Ran new unit tests. - Used `differential.parsecommitmessage`. Reviewers: btrahan Reviewed By: btrahan CC: aran Maniphest Tasks: T2222 Differential Revision: https://secure.phabricator.com/D8444
This commit is contained in:
parent
f25cce1e69
commit
aff34077c5
10 changed files with 376 additions and 95 deletions
|
@ -356,6 +356,8 @@ phutil_register_library_map(array(
|
|||
'DifferentialCommentPreviewController' => 'applications/differential/controller/DifferentialCommentPreviewController.php',
|
||||
'DifferentialCommentQuery' => 'applications/differential/query/DifferentialCommentQuery.php',
|
||||
'DifferentialCommentSaveController' => 'applications/differential/controller/DifferentialCommentSaveController.php',
|
||||
'DifferentialCommitMessageParser' => 'applications/differential/parser/DifferentialCommitMessageParser.php',
|
||||
'DifferentialCommitMessageParserTestCase' => 'applications/differential/parser/__tests__/DifferentialCommitMessageParserTestCase.php',
|
||||
'DifferentialCommitsField' => 'applications/differential/customfield/DifferentialCommitsField.php',
|
||||
'DifferentialCommitsFieldSpecification' => 'applications/differential/field/specification/DifferentialCommitsFieldSpecification.php',
|
||||
'DifferentialConflictsFieldSpecification' => 'applications/differential/field/specification/DifferentialConflictsFieldSpecification.php',
|
||||
|
@ -2919,6 +2921,7 @@ phutil_register_library_map(array(
|
|||
'DifferentialCommentPreviewController' => 'DifferentialController',
|
||||
'DifferentialCommentQuery' => 'PhabricatorOffsetPagedQuery',
|
||||
'DifferentialCommentSaveController' => 'DifferentialController',
|
||||
'DifferentialCommitMessageParserTestCase' => 'PhabricatorTestCase',
|
||||
'DifferentialCommitsField' => 'DifferentialCustomField',
|
||||
'DifferentialCommitsFieldSpecification' => 'DifferentialFieldSpecification',
|
||||
'DifferentialConflictsFieldSpecification' => 'DifferentialFieldSpecification',
|
||||
|
|
|
@ -89,7 +89,8 @@ final class ConduitAPI_differential_parsecommitmessage_Method
|
|||
foreach ($aux_fields as $key => $aux_field) {
|
||||
$labels = $aux_field->getSupportedCommitMessageLabels();
|
||||
foreach ($labels as $label) {
|
||||
$normal_label = strtolower($label);
|
||||
$normal_label = DifferentialCommitMessageParser::normalizeFieldLabel(
|
||||
$label);
|
||||
if (!empty($label_map[$normal_label])) {
|
||||
$previous = $label_map[$normal_label];
|
||||
throw new Exception(
|
||||
|
@ -102,106 +103,20 @@ final class ConduitAPI_differential_parsecommitmessage_Method
|
|||
return $label_map;
|
||||
}
|
||||
|
||||
private function buildLabelRegexp(array $label_map) {
|
||||
$field_labels = array_keys($label_map);
|
||||
foreach ($field_labels as $key => $label) {
|
||||
$field_labels[$key] = preg_quote($label, '/');
|
||||
}
|
||||
$field_labels = implode('|', $field_labels);
|
||||
|
||||
$field_pattern = '/^(?P<field>'.$field_labels.'):(?P<text>.*)$/i';
|
||||
|
||||
return $field_pattern;
|
||||
}
|
||||
|
||||
private function parseCommitMessage($corpus, array $label_map) {
|
||||
$label_regexp = $this->buildLabelRegexp($label_map);
|
||||
$parser = id(new DifferentialCommitMessageParser())
|
||||
->setLabelMap($label_map)
|
||||
->setTitleKey('title')
|
||||
->setSummaryKey('summary');
|
||||
|
||||
// Note, deliberately not populating $seen with 'title' because it is
|
||||
// optional to include the 'Title:' label. We're doing a little special
|
||||
// casing to consume the first line as the title regardless of whether you
|
||||
// label it as such or not.
|
||||
$field = 'title';
|
||||
$result = $parser->parseCorpus($corpus);
|
||||
|
||||
$seen = array();
|
||||
$lines = explode("\n", trim($corpus));
|
||||
$field_map = array();
|
||||
foreach ($lines as $key => $line) {
|
||||
$match = null;
|
||||
if (preg_match($label_regexp, $line, $match)) {
|
||||
$lines[$key] = trim($match['text']);
|
||||
$field = $label_map[strtolower($match['field'])];
|
||||
if (!empty($seen[$field])) {
|
||||
$this->errors[] = "Field '{$field}' occurs twice in commit message!";
|
||||
}
|
||||
$seen[$field] = true;
|
||||
}
|
||||
$field_map[$key] = $field;
|
||||
foreach ($parser->getErrors() as $error) {
|
||||
$this->errors[] = $error;
|
||||
}
|
||||
|
||||
$fields = array();
|
||||
foreach ($lines as $key => $line) {
|
||||
$fields[$field_map[$key]][] = $line;
|
||||
}
|
||||
|
||||
// This is a piece of special-cased magic which allows you to omit the
|
||||
// field labels for "title" and "summary". If the user enters a large block
|
||||
// of text at the beginning of the commit message with an empty line in it,
|
||||
// treat everything before the blank line as "title" and everything after
|
||||
// as "summary".
|
||||
if (isset($fields['title']) && empty($fields['summary'])) {
|
||||
$lines = $fields['title'];
|
||||
for ($ii = 0; $ii < count($lines); $ii++) {
|
||||
if (strlen(trim($lines[$ii])) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($ii != count($lines)) {
|
||||
$fields['title'] = array_slice($lines, 0, $ii);
|
||||
$fields['summary'] = array_slice($lines, $ii);
|
||||
}
|
||||
}
|
||||
|
||||
// Implode all the lines back into chunks of text.
|
||||
foreach ($fields as $name => $lines) {
|
||||
$data = rtrim(implode("\n", $lines));
|
||||
$data = ltrim($data, "\n");
|
||||
$fields[$name] = $data;
|
||||
}
|
||||
|
||||
// This is another piece of special-cased magic which allows you to
|
||||
// enter a ridiculously long title, or just type a big block of stream
|
||||
// of consciousness text, and have some sort of reasonable result conjured
|
||||
// from it.
|
||||
if (isset($fields['title'])) {
|
||||
$terminal = '...';
|
||||
$title = $fields['title'];
|
||||
$short = phutil_utf8_shorten($title, 250, $terminal);
|
||||
if ($short != $title) {
|
||||
|
||||
// If we shortened the title, split the rest into the summary, so
|
||||
// we end up with a title like:
|
||||
//
|
||||
// Title title tile title title...
|
||||
//
|
||||
// ...and a summary like:
|
||||
//
|
||||
// ...title title title.
|
||||
//
|
||||
// Summary summary summary summary.
|
||||
|
||||
$summary = idx($fields, 'summary', '');
|
||||
$offset = strlen($short) - strlen($terminal);
|
||||
$remainder = ltrim(substr($fields['title'], $offset));
|
||||
$summary = '...'.$remainder."\n\n".$summary;
|
||||
$summary = rtrim($summary, "\n");
|
||||
|
||||
$fields['title'] = $short;
|
||||
$fields['summary'] = $summary;
|
||||
}
|
||||
}
|
||||
|
||||
return $fields;
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,210 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Parses commit messages (containing relaively freeform text with textual
|
||||
* field labels) into a dictionary of fields.
|
||||
*
|
||||
* $parser = id(new DifferentialCommitMessageParser())
|
||||
* ->setLabelMap($label_map)
|
||||
* ->setTitleKey($key_title)
|
||||
* ->setSummaryKey($key_summary);
|
||||
*
|
||||
* $fields = $parser->parseCorpus($corpus);
|
||||
* $errors = $parser->getErrors();
|
||||
*
|
||||
* This is used by Differential to parse messages entered from the command line.
|
||||
*
|
||||
* @task config Configuring the Parser
|
||||
* @task parse Parsing Messages
|
||||
* @task support Support Methods
|
||||
* @task internal Internals
|
||||
*/
|
||||
final class DifferentialCommitMessageParser {
|
||||
|
||||
private $labelMap;
|
||||
private $titleKey;
|
||||
private $summaryKey;
|
||||
private $errors;
|
||||
|
||||
|
||||
/* -( Configuring the Parser )--------------------------------------------- */
|
||||
|
||||
|
||||
/**
|
||||
* @task config
|
||||
*/
|
||||
public function setLabelMap(array $label_map) {
|
||||
$this->labelMap = $label_map;
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @task config
|
||||
*/
|
||||
public function setTitleKey($title_key) {
|
||||
$this->titleKey = $title_key;
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @task config
|
||||
*/
|
||||
public function setSummaryKey($summary_key) {
|
||||
$this->summaryKey = $summary_key;
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
/* -( Parsing Messages )--------------------------------------------------- */
|
||||
|
||||
|
||||
/**
|
||||
* @task parse
|
||||
*/
|
||||
public function parseCorpus($corpus) {
|
||||
$this->errors = array();
|
||||
|
||||
$label_map = $this->labelMap;
|
||||
$key_title = $this->titleKey;
|
||||
$key_summary = $this->summaryKey;
|
||||
|
||||
if (!$key_title || !$key_summary || ($label_map === null)) {
|
||||
throw new Exception(
|
||||
pht(
|
||||
'Expected labelMap, summaryKey and titleKey to be set before '.
|
||||
'parsing a corpus.'));
|
||||
}
|
||||
|
||||
$label_regexp = $this->buildLabelRegexp($label_map);
|
||||
|
||||
// NOTE: We're special casing things here to make the "Title:" label
|
||||
// optional in the message.
|
||||
$field = $key_title;
|
||||
|
||||
$seen = array();
|
||||
$lines = explode("\n", trim($corpus));
|
||||
$field_map = array();
|
||||
foreach ($lines as $key => $line) {
|
||||
$match = null;
|
||||
if (preg_match($label_regexp, $line, $match)) {
|
||||
$lines[$key] = trim($match['text']);
|
||||
$field = $label_map[self::normalizeFieldLabel($match['field'])];
|
||||
if (!empty($seen[$field])) {
|
||||
$this->errors[] = pht(
|
||||
'Field "%s" occurs twice in commit message!',
|
||||
$field);
|
||||
}
|
||||
$seen[$field] = true;
|
||||
}
|
||||
$field_map[$key] = $field;
|
||||
}
|
||||
|
||||
$fields = array();
|
||||
foreach ($lines as $key => $line) {
|
||||
$fields[$field_map[$key]][] = $line;
|
||||
}
|
||||
|
||||
// This is a piece of special-cased magic which allows you to omit the
|
||||
// field labels for "title" and "summary". If the user enters a large block
|
||||
// of text at the beginning of the commit message with an empty line in it,
|
||||
// treat everything before the blank line as "title" and everything after
|
||||
// as "summary".
|
||||
if (isset($fields[$key_title]) && empty($fields[$key_summary])) {
|
||||
$lines = $fields[$key_title];
|
||||
for ($ii = 0; $ii < count($lines); $ii++) {
|
||||
if (strlen(trim($lines[$ii])) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($ii != count($lines)) {
|
||||
$fields[$key_title] = array_slice($lines, 0, $ii);
|
||||
$summary = array_slice($lines, $ii);
|
||||
if (strlen(trim(implode("\n", $summary)))) {
|
||||
$fields[$key_summary] = $summary;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Implode all the lines back into chunks of text.
|
||||
foreach ($fields as $name => $lines) {
|
||||
$data = rtrim(implode("\n", $lines));
|
||||
$data = ltrim($data, "\n");
|
||||
$fields[$name] = $data;
|
||||
}
|
||||
|
||||
// This is another piece of special-cased magic which allows you to
|
||||
// enter a ridiculously long title, or just type a big block of stream
|
||||
// of consciousness text, and have some sort of reasonable result conjured
|
||||
// from it.
|
||||
if (isset($fields[$key_title])) {
|
||||
$terminal = '...';
|
||||
$title = $fields[$key_title];
|
||||
$short = phutil_utf8_shorten($title, 250, $terminal);
|
||||
if ($short != $title) {
|
||||
|
||||
// If we shortened the title, split the rest into the summary, so
|
||||
// we end up with a title like:
|
||||
//
|
||||
// Title title tile title title...
|
||||
//
|
||||
// ...and a summary like:
|
||||
//
|
||||
// ...title title title.
|
||||
//
|
||||
// Summary summary summary summary.
|
||||
|
||||
$summary = idx($fields, $key_summary, '');
|
||||
$offset = strlen($short) - strlen($terminal);
|
||||
$remainder = ltrim(substr($fields[$key_title], $offset));
|
||||
$summary = '...'.$remainder."\n\n".$summary;
|
||||
$summary = rtrim($summary, "\n");
|
||||
|
||||
$fields[$key_title] = $short;
|
||||
$fields[$key_summary] = $summary;
|
||||
}
|
||||
}
|
||||
|
||||
return $fields;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @task parse
|
||||
*/
|
||||
public function getErrors() {
|
||||
return $this->errors;
|
||||
}
|
||||
|
||||
|
||||
/* -( Support Methods )---------------------------------------------------- */
|
||||
|
||||
|
||||
/**
|
||||
* @task support
|
||||
*/
|
||||
public static function normalizeFieldLabel($label) {
|
||||
return phutil_utf8_strtolower($label);
|
||||
}
|
||||
|
||||
|
||||
/* -( Internals )---------------------------------------------------------- */
|
||||
|
||||
|
||||
/**
|
||||
* @task internal
|
||||
*/
|
||||
private function buildLabelRegexp(array $label_map) {
|
||||
$field_labels = array_keys($label_map);
|
||||
foreach ($field_labels as $key => $label) {
|
||||
$field_labels[$key] = preg_quote($label, '/');
|
||||
}
|
||||
$field_labels = implode('|', $field_labels);
|
||||
|
||||
$field_pattern = '/^(?P<field>'.$field_labels.'):(?P<text>.*)$/i';
|
||||
|
||||
return $field_pattern;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
<?php
|
||||
|
||||
final class DifferentialCommitMessageParserTestCase
|
||||
extends PhabricatorTestCase {
|
||||
|
||||
public function testDifferentialCommitMessageParser() {
|
||||
$dir = dirname(__FILE__).'/messages/';
|
||||
$list = Filesystem::listDirectory($dir, $include_hidden = false);
|
||||
foreach ($list as $file) {
|
||||
if (!preg_match('/.txt$/', $file)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$data = Filesystem::readFile($dir.$file);
|
||||
$divider = "~~~~~~~~~~\n";
|
||||
$parts = explode($divider, $data);
|
||||
if (count($parts) !== 4) {
|
||||
throw new Exception(
|
||||
pht(
|
||||
'Expected test file "%s" to contain four parts (message, fields, '.
|
||||
'output, errors) divided by "~~~~~~~~~~".',
|
||||
$file));
|
||||
}
|
||||
|
||||
list($message, $fields, $output, $errors) = $parts;
|
||||
$fields = phutil_json_decode($fields, null);
|
||||
$output = phutil_json_decode($output, null);
|
||||
$errors = phutil_json_decode($errors, null);
|
||||
|
||||
if ($fields === null || $output === null || $errors === null) {
|
||||
throw new Exception(
|
||||
pht(
|
||||
'Expected test file "%s" to contain valid JSON in its sections.',
|
||||
$file));
|
||||
}
|
||||
|
||||
$parser = id(new DifferentialCommitMessageParser())
|
||||
->setLabelMap($fields)
|
||||
->setTitleKey('title')
|
||||
->setSummaryKey('summary');
|
||||
|
||||
$result_output = $parser->parseCorpus($message);
|
||||
$result_errors = $parser->getErrors();
|
||||
|
||||
$this->assertEqual($output, $result_output);
|
||||
$this->assertEqual($errors, $result_errors);
|
||||
}
|
||||
}
|
||||
|
||||
public function testDifferentialCommitMessageParserNormalization() {
|
||||
$map = array(
|
||||
'Test Plan' => 'test plan',
|
||||
'REVIEWERS' => 'reviewers',
|
||||
'sUmmArY' => 'summary',
|
||||
);
|
||||
|
||||
foreach ($map as $input => $expect) {
|
||||
$this->assertEqual(
|
||||
$expect,
|
||||
DifferentialCommitMessageParser::normalizeFieldLabel($input),
|
||||
pht('Field normalization of label "%s".', $input));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
fix bug
|
||||
|
||||
color: red
|
||||
color: blue
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"color": "color"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"title": "fix bug",
|
||||
"color": "red\nblue"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
[
|
||||
"Field \"color\" occurs twice in commit message!"
|
||||
]
|
|
@ -0,0 +1,11 @@
|
|||
123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
|
||||
|
||||
~~~~~~~~~~
|
||||
{}
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"title": "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567...",
|
||||
"summary": "...89012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
[]
|
|
@ -0,0 +1,15 @@
|
|||
fix bug
|
||||
|
||||
hue: black
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"color": "color",
|
||||
"hue": "color"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"title": "fix bug",
|
||||
"color": "black"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
[]
|
|
@ -0,0 +1,24 @@
|
|||
Title
|
||||
|
||||
Summary: This is the summary.
|
||||
|
||||
Test Plan: Tested things.
|
||||
|
||||
Reviewers: alincoln
|
||||
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"title": "title",
|
||||
"summary": "summary",
|
||||
"test plan": "test plan",
|
||||
"reviewers": "reviewers"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"title": "Title",
|
||||
"summary": "This is the summary.",
|
||||
"test plan": "Tested things.",
|
||||
"reviewers": "alincoln"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
[]
|
|
@ -0,0 +1,12 @@
|
|||
fix bug
|
||||
|
||||
it had a bug but I fixed it
|
||||
~~~~~~~~~~
|
||||
{}
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"title": "fix bug",
|
||||
"summary": "it had a bug but I fixed it"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
[]
|
|
@ -0,0 +1,9 @@
|
|||
fix bug
|
||||
~~~~~~~~~~
|
||||
{}
|
||||
~~~~~~~~~~
|
||||
{
|
||||
"title": "fix bug"
|
||||
}
|
||||
~~~~~~~~~~
|
||||
[]
|
Loading…
Reference in a new issue