1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-10 14:51:06 +01:00

Simplify hunk parsing

Summary:
Simplify whitespace-ignoring diffing:

  - Currently, we generate two diffs (one ignoring whitespace, one normal) and then copy the //text content// from the normal one to the whitespace-ignoring one.
  - Instead, copy the //change types// from the ignoring one to the normal one.
    - This is cheaper and much simpler.
    - It means that we have the right change types in reparseHunksForSpecialAttributes(), and can simplify some other things.

Simplify whitespace changes, unchanged files, and deleted file detections:

  - Currently, we do this inline with a bunch of other stuff, in the reparse step.
  - Instead, do it separately. This is simpler.

Simplify intraline whitespace handling:

  - Currently, this is a complicated mess that makes roughly zero sense.
  - Instead, do it in reparse in a straightforward way.

Partially fix handling of files changed only by changing whitespace.
Partially fix handling of unchanged files.

Test Plan:
  - Ran unit tests.
  - Created context-less diffs, verified they rendered reasonably.
  - Generated a diff with prefix whitespace, suffix whitespace, intraline whitespace, and non-whitespace changes. Verified changes rendered correctly in "ignore most" and "show all" modes.
  - Verified unchanged files and files with only whitspace changes render with the correct masks.

Reviewers: btrahan

Reviewed By: btrahan

CC: aran

Maniphest Tasks: T2009

Differential Revision: https://secure.phabricator.com/D4398
This commit is contained in:
epriestley 2013-01-11 15:25:15 -08:00
parent dbeb60adbe
commit 039a5a6d86
2 changed files with 205 additions and 234 deletions

View file

@ -40,7 +40,7 @@ final class DifferentialChangesetParser {
private $markupEngine;
private $highlightErrors;
const CACHE_VERSION = 9;
const CACHE_VERSION = 10;
const CACHE_MAX_SIZE = 8e6;
const ATTR_GENERATED = 'attr:generated';
@ -497,68 +497,39 @@ final class DifferentialChangesetParser {
}
}
$old_text = array();
$new_text = array();
$is_unchanged = null;
$whitelines = null;
$hunk_parser = new DifferentialHunkParser();
$hunk_parser->setWhitespaceMode($whitespace_mode);
$hunk_parser->parseHunksForLineData($changeset->getHunks());
// Depending on the whitespace mode, we may need to compute a different
// set of changes than the set of changes in the hunk data (specificaly,
// we might want to consider changed lines which have only whitespace
// changes as unchanged).
if ($ignore_all) {
// Huge mess. Generate a "-bw" (ignore all whitespace changes) diff,
// parse it out, and then play a shell game with the parsed format
// later so we highlight only changed lines but render
// whitespace differences. If we don't do this, we either fail to
// render whitespace changes (which is incredibly confusing,
// especially for python) or often produce a much larger set of
// differences than necessary.
$engine = new PhabricatorDifferenceEngine();
$engine->setIgnoreWhitespace(true);
$no_whitespace_changeset = $engine->generateChangesetFromFileContent(
$old_file,
$new_file);
$hunk_parser = new DifferentialHunkParser();
$hunk_parser->setWhitespaceMode($this->whitespaceMode);
$hunk_parser->parseHunksForLineData($changeset->getHunks());
$hunk_parser->reparseHunksForSpecialAttributes();
$is_unchanged = $hunk_parser->getIsUnchanged();
$whitelines = $hunk_parser->getHasWhiteLines();
$type_parser = new DifferentialHunkParser();
$type_parser->parseHunksForLineData($no_whitespace_changeset->getHunks());
// While we aren't updating $this->changeset (since it has a bunch
// of metadata we need to preserve, so that headers like "this file
// was moved" render correctly), we're overwriting the local
// $changeset so that the block below will choose the synthetic
// hunks we've built instead of the original hunks.
$changeset = $no_whitespace_changeset;
// let the games continue - pull out the proper text so we can
// later accurately display the diff
$old_text = ipull($hunk_parser->getOldLines(), 'text', 'line');
$new_text = ipull($hunk_parser->getNewLines(), 'text', 'line');
$hunk_parser->setOldLineTypeMap($type_parser->getOldLineTypeMap());
$hunk_parser->setNewLineTypeMap($type_parser->getNewLineTypeMap());
}
// This either uses the real hunks, or synthetic hunks we built above.
// $is_unchanged, $whitelines, $old_text and $new_text are populated
// for synthetic hunks, otherwise they are default values.
$hunk_parser = new DifferentialHunkParser();
$hunk_parser->setWhitespaceMode($this->whitespaceMode);
$hunk_parser->parseHunksForLineData($changeset->getHunks());
$hunk_parser->reparseHunksForSpecialAttributes();
$unchanged = false;
// i.e. if we didn't have to play horrendous games above
if ($is_unchanged === null) {
if ($hunk_parser->getIsUnchanged()) {
$filetype = $this->changeset->getFileType();
if ($filetype == DifferentialChangeType::FILE_TEXT ||
$filetype == DifferentialChangeType::FILE_SYMLINK) {
$unchanged = true;
}
if (!$hunk_parser->getHasAnyChanges()) {
$filetype = $this->changeset->getFileType();
if ($filetype == DifferentialChangeType::FILE_TEXT ||
$filetype == DifferentialChangeType::FILE_SYMLINK) {
$unchanged = true;
}
$whitelines = $hunk_parser->getHasWhiteLines();
} else {
$unchanged = $is_unchanged;
}
$changetype = $this->changeset->getChangeType();
if ($changetype == DifferentialChangeType::TYPE_MOVE_AWAY) {
// sometimes we show moved files as unchanged, sometimes deleted,
@ -567,13 +538,13 @@ final class DifferentialChangesetParser {
// omit the 'not changed' notice if this is the source of a move
$unchanged = false;
}
$this->setSpecialAttributes(array(
self::ATTR_UNCHANGED => $unchanged,
self::ATTR_DELETED => $hunk_parser->getIsDeleted(),
self::ATTR_WHITELINES => $whitelines
self::ATTR_WHITELINES => !$hunk_parser->getHasTextChanges(),
));
$hunk_parser->updateParsedHunksText($old_text, $new_text);
$hunk_parser->generateIntraLineDiffs();
$hunk_parser->generateVisibileLinesMask();
@ -722,17 +693,14 @@ final class DifferentialChangesetParser {
'need to be reviewed.'),
true);
} else if ($this->isUnchanged()) {
if ($this->isWhitespaceOnly()) {
$shield = $renderer->renderShield(
pht(
'This file was changed only by adding or removing trailing '.
'whitespace.'),
false);
} else {
$shield = $renderer->renderShield(
pht("The contents of this file were not changed."),
false);
}
} else if ($this->isWhitespaceOnly()) {
$shield = $renderer->renderShield(
pht(
'This file was changed only by adding or removing whitespace.'),
false);
} else if ($this->isDeleted()) {
$shield = $renderer->renderShield(
pht("This file was completely deleted."),

View file

@ -2,15 +2,11 @@
final class DifferentialHunkParser {
private $isUnchanged;
private $hasWhiteLines;
private $isDeleted;
private $oldLines;
private $newLines;
private $skipIntraLines;
private $whitespaceMode;
private $intraLineDiffs;
private $visibleLinesMask;
private $whitespaceMode;
/**
* Get a map of lines on which hunks start, other than line 1. This
@ -59,32 +55,6 @@ final class DifferentialHunkParser {
return $this->intraLineDiffs;
}
public function setWhitespaceMode($white_space_mode) {
$this->whitespaceMode = $white_space_mode;
return $this;
}
private function getWhitespaceMode() {
if ($this->whitespaceMode === null) {
throw new Exception(
'You must setWhitespaceMode before accessing this data.'
);
}
return $this->whitespaceMode;
}
private function setSkipIntraLines($skip_intra_lines) {
$this->skipIntraLines = $skip_intra_lines;
return $this;
}
public function getSkipIntraLines() {
if ($this->skipIntraLines === null) {
throw new Exception(
'You must reparseHunksForSpecialAttributes before accessing this data.'
);
}
return $this->skipIntraLines;
}
private function setNewLines($new_lines) {
$this->newLines = $new_lines;
return $this;
@ -111,30 +81,137 @@ final class DifferentialHunkParser {
return $this->oldLines;
}
private function setIsDeleted($is_deleted) {
$this->isDeleted = $is_deleted;
public function getOldLineTypeMap() {
$map = array();
$old = $this->getOldLines();
foreach ($old as $o) {
if (!$o) {
continue;
}
$map[$o['line']] = $o['type'];
}
return $map;
}
public function setOldLineTypeMap(array $map) {
$lines = $this->getOldLines();
foreach ($lines as $key => $data) {
$lines[$key]['type'] = $map[$data['line']];
}
$this->oldLines = $lines;
return $this;
}
public function getNewLineTypeMap() {
$map = array();
$new = $this->getNewLines();
foreach ($new as $n) {
if (!$n) {
continue;
}
$map[$n['line']] = $n['type'];
}
return $map;
}
public function setNewLineTypeMap(array $map) {
$lines = $this->getNewLines();
foreach ($lines as $key => $data) {
$lines[$key]['type'] = $map[$data['line']];
}
$this->newLines = $lines;
return $this;
}
public function setWhitespaceMode($white_space_mode) {
$this->whitespaceMode = $white_space_mode;
return $this;
}
private function getWhitespaceMode() {
if ($this->whitespaceMode === null) {
throw new Exception(
'You must setWhitespaceMode before accessing this data.'
);
}
return $this->whitespaceMode;
}
public function getIsDeleted() {
return $this->isDeleted;
foreach ($this->getNewLines() as $line) {
if ($line) {
// At least one new line, so the entire file wasn't deleted.
return false;
}
}
foreach ($this->getOldLines() as $line) {
if ($line) {
// No new lines, at least one old line; the entire file was deleted.
return true;
}
}
// This is an empty file.
return false;
}
private function setHasWhiteLines($has_white_lines) {
$this->hasWhiteLines = $has_white_lines;
return $this;
}
public function getHasWhiteLines() {
return $this->hasWhiteLines;
/**
* Returns true if the hunks change any text, not just whitespace.
*/
public function getHasTextChanges() {
return $this->getHasChanges('text');
}
public function setIsUnchanged($is_unchanged) {
$this->isUnchanged = $is_unchanged;
return $this;
/**
* Returns true if the hunks change anything, including whitespace.
*/
public function getHasAnyChanges() {
return $this->getHasChanges('any');
}
public function getIsUnchanged() {
return $this->isUnchanged;
private function getHasChanges($filter) {
if ($filter !== 'any' && $filter !== 'text') {
throw new Exception("Unknown change filter '{$filter}'.");
}
$old = $this->getOldLines();
$new = $this->getNewLines();
$is_any = ($filter === 'any');
foreach ($old as $key => $o) {
$n = $new[$key];
if ($o === null || $n === null) {
// One side is missing, and it's impossible for both sides to be null,
// so the other side must have something, and thus the two sides are
// different and the file has been changed under any type of filter.
return true;
}
if ($o['type'] !== $n['type']) {
// The types are different, so either the underlying text is actually
// different or whatever whitespace rules we're using consider them
// different.
return true;
}
if ($o['text'] !== $n['text']) {
if ($is_any) {
// The text is different, so there's a change.
return true;
} else if (trim($o['text']) !== trim($n['text'])) {
return true;
}
}
}
// No changes anywhere in the file.
return false;
}
/**
* This function takes advantage of the parsing work done in
* @{method:parseHunksForLineData} and continues the struggle to hammer this
@ -142,30 +219,20 @@ final class DifferentialHunkParser {
*
* In particular, this function re-parses the hunks to make them equivalent
* in length for easy rendering, adding `null` as necessary to pad the
* length. Further, this re-parsing stage figures out various special
* properties about the changes such as if the change is a delete, has any
* whitelines, or has any changes whatsoever. Finally, this function
* calculates what lines - if any - should be skipped within a diff display,
* ostensibly because they don't have anything to do with the current set
* of changes with respect to display options.
* length.
*
* Anyhoo, this function is not particularly well-named but I try.
*
* NOTE: this function must be called after
* @{method:parseHunksForLineData}.
* NOTE: you must @{method:setWhitespaceMode} before calling this method.
*/
public function reparseHunksForSpecialAttributes() {
$rebuild_old = array();
$rebuild_new = array();
$skip_intra = array();
$old_lines = array_reverse($this->getOldLines());
$new_lines = array_reverse($this->getNewLines());
$whitelines = false;
$changed = false;
while (count($old_lines) || count($new_lines)) {
$old_line_data = array_pop($old_lines);
$new_line_data = array_pop($new_lines);
@ -188,7 +255,6 @@ final class DifferentialHunkParser {
if ($new_line_data) {
array_push($new_lines, $new_line_data);
}
$changed = true;
continue;
}
@ -198,52 +264,9 @@ final class DifferentialHunkParser {
if ($old_line_data) {
array_push($old_lines, $old_line_data);
}
$changed = true;
continue;
}
if ($this->getWhitespaceMode() !=
DifferentialChangesetParser::WHITESPACE_SHOW_ALL) {
$similar = false;
switch ($this->getWhitespaceMode()) {
case DifferentialChangesetParser::WHITESPACE_IGNORE_TRAILING:
if (rtrim($old_line_data['text']) ==
rtrim($new_line_data['text'])) {
if ($old_line_data['type']) {
// If we're converting this into an unchanged line because of
// a trailing whitespace difference, mark it as a whitespace
// change so we can show "This file was modified only by
// adding or removing trailing whitespace." instead of
// "This file was not modified.".
$whitelines = true;
}
$similar = true;
}
break;
default:
// In this case, the lines are similar if there is no change type
// (that is, just trust the diff algorithm).
if (!$old_line_data['type']) {
$similar = true;
}
break;
}
if ($similar) {
if ($old_line_data['type'] == '\\') {
// These are similar because they're "No newline at end of file"
// comments.
} else {
$old_line_data['type'] = null;
$new_line_data['type'] = null;
$skip_intra[count($rebuild_old)] = true;
}
} else {
$changed = true;
}
} else {
$changed = true;
}
$rebuild_old[] = $old_line_data;
$rebuild_new[] = $new_line_data;
}
@ -251,106 +274,86 @@ final class DifferentialHunkParser {
$this->setOldLines($rebuild_old);
$this->setNewLines($rebuild_new);
$this->setIsUnchanged(!$changed);
$this->setHasWhiteLines($whitelines);
$this->setIsDeleted(array_filter($this->getOldLines()) &&
!array_filter($this->getNewLines()));
$this->setSkipIntraLines($skip_intra);
$this->updateChangeTypesForWhitespaceMode();
return $this;
}
public function updateParsedHunksText($old_text, $new_text) {
if ($old_text || $new_text) {
private function updateChangeTypesForWhitespaceMode() {
$mode = $this->getWhitespaceMode();
// Use this parser's side-by-side line information -- notably, the
// change types -- but replace all the line text.
// This lets us render whitespace-only changes without marking them as
// different.
$mode_show_all = DifferentialChangesetParser::WHITESPACE_SHOW_ALL;
if ($mode === $mode_show_all) {
// If we're showing all whitespace, we don't need to perform any updates.
return;
}
$old = $this->getOldLines();
$new = $this->getNewLines();
$mode_trailing = DifferentialChangesetParser::WHITESPACE_IGNORE_TRAILING;
$is_trailing = ($mode === $mode_trailing);
foreach ($old as $k => $desc) {
if (empty($desc)) {
continue;
}
$old[$k]['text'] = idx($old_text, $desc['line']);
$new = $this->getNewLines();
$old = $this->getOldLines();
foreach ($old as $key => $o) {
$n = $new[$key];
if (!$o || !$n) {
continue;
}
$skip_intra = $this->getSkipIntraLines();
foreach ($new as $k => $desc) {
if (empty($desc)) {
continue;
if ($is_trailing) {
// In "trailing" mode, we need to identify lines which are marked
// changed but differ only by trailing whitespace. We mark these lines
// unchanged.
if ($o['type'] != $n['type']) {
if (rtrim($o['text']) === rtrim($n['text'])) {
$old[$key]['type'] = null;
$new[$key]['type'] = null;
}
}
$new[$k]['text'] = idx($new_text, $desc['line']);
if ($this->whitespaceMode ==
DifferentialChangesetParser::WHITESPACE_IGNORE_FORCE) {
// Under forced ignore mode, ignore even internal whitespace
// changes.
continue;
}
// If there's a corresponding "old" text and the line is marked as
// unchanged, test if there are internal whitespace changes between
// non-whitespace characters, e.g. spaces added to a string or spaces
// added around operators. If we find internal spaces, mark the line
// as changed.
//
// We only need to do this for "new" lines because any line that is
// missing either "old" or "new" text certainly can not have internal
// whitespace changes without also having non-whitespace changes,
// because characters had to be either added or removed to create the
// possibility of internal whitespace.
if (isset($old[$k]['text']) && empty($new[$k]['type'])) {
if (trim($old[$k]['text']) != trim($new[$k]['text'])) {
// The strings aren't the same when trimmed, so there are internal
// whitespace changes. Mark this line changed.
$old[$k]['type'] = '-';
$new[$k]['type'] = '+';
// Re-mark this line for intraline diffing.
unset($skip_intra[$k]);
} else {
// In "ignore most" and "ignore all" modes, we need to identify lines
// which are marked unchanged but have internal whitespace changes.
// We want to ignore leading and trailing whitespace changes only, not
// internal whitespace changes (`diff` doesn't have a mode for this, so
// we have to fix it here). If the text is marked unchanged but the
// old and new text differs by internal space, mark the lines changed.
if ($o['type'] === null && $n['type'] === null) {
if ($o['text'] !== $n['text']) {
if (trim($o['text']) !== trim($n['text'])) {
$old[$key]['type'] = '-';
$new[$key]['type'] = '+';
}
}
}
}
$this->setSkipIntraLines($skip_intra);
$this->setOldLines($old);
$this->setNewLines($new);
}
$this->setOldLines($old);
$this->setNewLines($new);
return $this;
}
public function generateIntraLineDiffs() {
$old = $this->getOldLines();
$new = $this->getNewLines();
$skip_intra = $this->getSkipIntraLines();
$intra_line_diffs = array();
$min_length = min(count($old), count($new));
for ($ii = 0; $ii < $min_length; $ii++) {
if ($old[$ii] || $new[$ii]) {
if (isset($old[$ii]['text'])) {
$otext = $old[$ii]['text'];
} else {
$otext = '';
}
if (isset($new[$ii]['text'])) {
$ntext = $new[$ii]['text'];
} else {
$ntext = '';
}
if ($otext != $ntext && empty($skip_intra[$ii])) {
$intra_line_diffs[$ii] = ArcanistDiffUtils::generateIntralineDiff(
$otext,
$ntext);
}
$diffs = array();
foreach ($old as $key => $o) {
$n = $new[$key];
if (!$o || !$n) {
continue;
}
if ($o['type'] != $n['type']) {
$diffs[$key] = ArcanistDiffUtils::generateIntralineDiff(
$o['text'],
$n['text']);
}
}
$this->setIntraLineDiffs($intra_line_diffs);
$this->setIntraLineDiffs($diffs);
return $this;
}