From 039a5a6d8618262c0fae4df781ba384f8e2dae43 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 11 Jan 2013 15:25:15 -0800 Subject: [PATCH] Simplify hunk parsing Summary: Simplify whitespace-ignoring diffing: - Currently, we generate two diffs (one ignoring whitespace, one normal) and then copy the //text content// from the normal one to the whitespace-ignoring one. - Instead, copy the //change types// from the ignoring one to the normal one. - This is cheaper and much simpler. - It means that we have the right change types in reparseHunksForSpecialAttributes(), and can simplify some other things. Simplify whitespace changes, unchanged files, and deleted file detections: - Currently, we do this inline with a bunch of other stuff, in the reparse step. - Instead, do it separately. This is simpler. Simplify intraline whitespace handling: - Currently, this is a complicated mess that makes roughly zero sense. - Instead, do it in reparse in a straightforward way. Partially fix handling of files changed only by changing whitespace. Partially fix handling of unchanged files. Test Plan: - Ran unit tests. - Created context-less diffs, verified they rendered reasonably. - Generated a diff with prefix whitespace, suffix whitespace, intraline whitespace, and non-whitespace changes. Verified changes rendered correctly in "ignore most" and "show all" modes. - Verified unchanged files and files with only whitspace changes render with the correct masks. Reviewers: btrahan Reviewed By: btrahan CC: aran Maniphest Tasks: T2009 Differential Revision: https://secure.phabricator.com/D4398 --- .../parser/DifferentialChangesetParser.php | 84 ++--- .../parser/DifferentialHunkParser.php | 355 +++++++++--------- 2 files changed, 205 insertions(+), 234 deletions(-) diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php index 1728767227..c8b9ccfe63 100644 --- a/src/applications/differential/parser/DifferentialChangesetParser.php +++ b/src/applications/differential/parser/DifferentialChangesetParser.php @@ -40,7 +40,7 @@ final class DifferentialChangesetParser { private $markupEngine; private $highlightErrors; - const CACHE_VERSION = 9; + const CACHE_VERSION = 10; const CACHE_MAX_SIZE = 8e6; const ATTR_GENERATED = 'attr:generated'; @@ -497,68 +497,39 @@ final class DifferentialChangesetParser { } } - $old_text = array(); - $new_text = array(); - $is_unchanged = null; - $whitelines = null; + $hunk_parser = new DifferentialHunkParser(); + $hunk_parser->setWhitespaceMode($whitespace_mode); + $hunk_parser->parseHunksForLineData($changeset->getHunks()); + + // Depending on the whitespace mode, we may need to compute a different + // set of changes than the set of changes in the hunk data (specificaly, + // we might want to consider changed lines which have only whitespace + // changes as unchanged). if ($ignore_all) { - - // Huge mess. Generate a "-bw" (ignore all whitespace changes) diff, - // parse it out, and then play a shell game with the parsed format - // later so we highlight only changed lines but render - // whitespace differences. If we don't do this, we either fail to - // render whitespace changes (which is incredibly confusing, - // especially for python) or often produce a much larger set of - // differences than necessary. - $engine = new PhabricatorDifferenceEngine(); $engine->setIgnoreWhitespace(true); $no_whitespace_changeset = $engine->generateChangesetFromFileContent( $old_file, $new_file); - $hunk_parser = new DifferentialHunkParser(); - $hunk_parser->setWhitespaceMode($this->whitespaceMode); - $hunk_parser->parseHunksForLineData($changeset->getHunks()); - $hunk_parser->reparseHunksForSpecialAttributes(); - $is_unchanged = $hunk_parser->getIsUnchanged(); - $whitelines = $hunk_parser->getHasWhiteLines(); + $type_parser = new DifferentialHunkParser(); + $type_parser->parseHunksForLineData($no_whitespace_changeset->getHunks()); - // While we aren't updating $this->changeset (since it has a bunch - // of metadata we need to preserve, so that headers like "this file - // was moved" render correctly), we're overwriting the local - // $changeset so that the block below will choose the synthetic - // hunks we've built instead of the original hunks. - $changeset = $no_whitespace_changeset; - - // let the games continue - pull out the proper text so we can - // later accurately display the diff - $old_text = ipull($hunk_parser->getOldLines(), 'text', 'line'); - $new_text = ipull($hunk_parser->getNewLines(), 'text', 'line'); + $hunk_parser->setOldLineTypeMap($type_parser->getOldLineTypeMap()); + $hunk_parser->setNewLineTypeMap($type_parser->getNewLineTypeMap()); } - // This either uses the real hunks, or synthetic hunks we built above. - // $is_unchanged, $whitelines, $old_text and $new_text are populated - // for synthetic hunks, otherwise they are default values. - $hunk_parser = new DifferentialHunkParser(); - $hunk_parser->setWhitespaceMode($this->whitespaceMode); - $hunk_parser->parseHunksForLineData($changeset->getHunks()); $hunk_parser->reparseHunksForSpecialAttributes(); $unchanged = false; - // i.e. if we didn't have to play horrendous games above - if ($is_unchanged === null) { - if ($hunk_parser->getIsUnchanged()) { - $filetype = $this->changeset->getFileType(); - if ($filetype == DifferentialChangeType::FILE_TEXT || - $filetype == DifferentialChangeType::FILE_SYMLINK) { - $unchanged = true; - } + if (!$hunk_parser->getHasAnyChanges()) { + $filetype = $this->changeset->getFileType(); + if ($filetype == DifferentialChangeType::FILE_TEXT || + $filetype == DifferentialChangeType::FILE_SYMLINK) { + $unchanged = true; } - $whitelines = $hunk_parser->getHasWhiteLines(); - } else { - $unchanged = $is_unchanged; } + $changetype = $this->changeset->getChangeType(); if ($changetype == DifferentialChangeType::TYPE_MOVE_AWAY) { // sometimes we show moved files as unchanged, sometimes deleted, @@ -567,13 +538,13 @@ final class DifferentialChangesetParser { // omit the 'not changed' notice if this is the source of a move $unchanged = false; } + $this->setSpecialAttributes(array( self::ATTR_UNCHANGED => $unchanged, self::ATTR_DELETED => $hunk_parser->getIsDeleted(), - self::ATTR_WHITELINES => $whitelines + self::ATTR_WHITELINES => !$hunk_parser->getHasTextChanges(), )); - $hunk_parser->updateParsedHunksText($old_text, $new_text); $hunk_parser->generateIntraLineDiffs(); $hunk_parser->generateVisibileLinesMask(); @@ -722,17 +693,14 @@ final class DifferentialChangesetParser { 'need to be reviewed.'), true); } else if ($this->isUnchanged()) { - if ($this->isWhitespaceOnly()) { - $shield = $renderer->renderShield( - pht( - 'This file was changed only by adding or removing trailing '. - 'whitespace.'), - false); - } else { $shield = $renderer->renderShield( pht("The contents of this file were not changed."), false); - } + } else if ($this->isWhitespaceOnly()) { + $shield = $renderer->renderShield( + pht( + 'This file was changed only by adding or removing whitespace.'), + false); } else if ($this->isDeleted()) { $shield = $renderer->renderShield( pht("This file was completely deleted."), diff --git a/src/applications/differential/parser/DifferentialHunkParser.php b/src/applications/differential/parser/DifferentialHunkParser.php index 0214d0092c..424c6cfbf0 100644 --- a/src/applications/differential/parser/DifferentialHunkParser.php +++ b/src/applications/differential/parser/DifferentialHunkParser.php @@ -2,15 +2,11 @@ final class DifferentialHunkParser { - private $isUnchanged; - private $hasWhiteLines; - private $isDeleted; private $oldLines; private $newLines; - private $skipIntraLines; - private $whitespaceMode; private $intraLineDiffs; private $visibleLinesMask; + private $whitespaceMode; /** * Get a map of lines on which hunks start, other than line 1. This @@ -59,32 +55,6 @@ final class DifferentialHunkParser { return $this->intraLineDiffs; } - public function setWhitespaceMode($white_space_mode) { - $this->whitespaceMode = $white_space_mode; - return $this; - } - private function getWhitespaceMode() { - if ($this->whitespaceMode === null) { - throw new Exception( - 'You must setWhitespaceMode before accessing this data.' - ); - } - return $this->whitespaceMode; - } - - private function setSkipIntraLines($skip_intra_lines) { - $this->skipIntraLines = $skip_intra_lines; - return $this; - } - public function getSkipIntraLines() { - if ($this->skipIntraLines === null) { - throw new Exception( - 'You must reparseHunksForSpecialAttributes before accessing this data.' - ); - } - return $this->skipIntraLines; - } - private function setNewLines($new_lines) { $this->newLines = $new_lines; return $this; @@ -111,30 +81,137 @@ final class DifferentialHunkParser { return $this->oldLines; } - private function setIsDeleted($is_deleted) { - $this->isDeleted = $is_deleted; + public function getOldLineTypeMap() { + $map = array(); + $old = $this->getOldLines(); + foreach ($old as $o) { + if (!$o) { + continue; + } + $map[$o['line']] = $o['type']; + } + return $map; + } + + public function setOldLineTypeMap(array $map) { + $lines = $this->getOldLines(); + foreach ($lines as $key => $data) { + $lines[$key]['type'] = $map[$data['line']]; + } + $this->oldLines = $lines; return $this; } + + public function getNewLineTypeMap() { + $map = array(); + $new = $this->getNewLines(); + foreach ($new as $n) { + if (!$n) { + continue; + } + $map[$n['line']] = $n['type']; + } + return $map; + } + + public function setNewLineTypeMap(array $map) { + $lines = $this->getNewLines(); + foreach ($lines as $key => $data) { + $lines[$key]['type'] = $map[$data['line']]; + } + $this->newLines = $lines; + return $this; + } + + + public function setWhitespaceMode($white_space_mode) { + $this->whitespaceMode = $white_space_mode; + return $this; + } + + private function getWhitespaceMode() { + if ($this->whitespaceMode === null) { + throw new Exception( + 'You must setWhitespaceMode before accessing this data.' + ); + } + return $this->whitespaceMode; + } + public function getIsDeleted() { - return $this->isDeleted; + foreach ($this->getNewLines() as $line) { + if ($line) { + // At least one new line, so the entire file wasn't deleted. + return false; + } + } + + foreach ($this->getOldLines() as $line) { + if ($line) { + // No new lines, at least one old line; the entire file was deleted. + return true; + } + } + + // This is an empty file. + return false; } - private function setHasWhiteLines($has_white_lines) { - $this->hasWhiteLines = $has_white_lines; - return $this; - } - public function getHasWhiteLines() { - return $this->hasWhiteLines; + /** + * Returns true if the hunks change any text, not just whitespace. + */ + public function getHasTextChanges() { + return $this->getHasChanges('text'); } - public function setIsUnchanged($is_unchanged) { - $this->isUnchanged = $is_unchanged; - return $this; + /** + * Returns true if the hunks change anything, including whitespace. + */ + public function getHasAnyChanges() { + return $this->getHasChanges('any'); } - public function getIsUnchanged() { - return $this->isUnchanged; + + private function getHasChanges($filter) { + if ($filter !== 'any' && $filter !== 'text') { + throw new Exception("Unknown change filter '{$filter}'."); + } + + $old = $this->getOldLines(); + $new = $this->getNewLines(); + + $is_any = ($filter === 'any'); + + foreach ($old as $key => $o) { + $n = $new[$key]; + if ($o === null || $n === null) { + // One side is missing, and it's impossible for both sides to be null, + // so the other side must have something, and thus the two sides are + // different and the file has been changed under any type of filter. + return true; + } + + if ($o['type'] !== $n['type']) { + // The types are different, so either the underlying text is actually + // different or whatever whitespace rules we're using consider them + // different. + return true; + } + + if ($o['text'] !== $n['text']) { + if ($is_any) { + // The text is different, so there's a change. + return true; + } else if (trim($o['text']) !== trim($n['text'])) { + return true; + } + } + } + + // No changes anywhere in the file. + return false; } + /** * This function takes advantage of the parsing work done in * @{method:parseHunksForLineData} and continues the struggle to hammer this @@ -142,30 +219,20 @@ final class DifferentialHunkParser { * * In particular, this function re-parses the hunks to make them equivalent * in length for easy rendering, adding `null` as necessary to pad the - * length. Further, this re-parsing stage figures out various special - * properties about the changes such as if the change is a delete, has any - * whitelines, or has any changes whatsoever. Finally, this function - * calculates what lines - if any - should be skipped within a diff display, - * ostensibly because they don't have anything to do with the current set - * of changes with respect to display options. + * length. * * Anyhoo, this function is not particularly well-named but I try. * * NOTE: this function must be called after * @{method:parseHunksForLineData}. - * NOTE: you must @{method:setWhitespaceMode} before calling this method. */ public function reparseHunksForSpecialAttributes() { $rebuild_old = array(); $rebuild_new = array(); - $skip_intra = array(); $old_lines = array_reverse($this->getOldLines()); $new_lines = array_reverse($this->getNewLines()); - $whitelines = false; - $changed = false; - while (count($old_lines) || count($new_lines)) { $old_line_data = array_pop($old_lines); $new_line_data = array_pop($new_lines); @@ -188,7 +255,6 @@ final class DifferentialHunkParser { if ($new_line_data) { array_push($new_lines, $new_line_data); } - $changed = true; continue; } @@ -198,52 +264,9 @@ final class DifferentialHunkParser { if ($old_line_data) { array_push($old_lines, $old_line_data); } - $changed = true; continue; } - if ($this->getWhitespaceMode() != - DifferentialChangesetParser::WHITESPACE_SHOW_ALL) { - $similar = false; - switch ($this->getWhitespaceMode()) { - case DifferentialChangesetParser::WHITESPACE_IGNORE_TRAILING: - if (rtrim($old_line_data['text']) == - rtrim($new_line_data['text'])) { - if ($old_line_data['type']) { - // If we're converting this into an unchanged line because of - // a trailing whitespace difference, mark it as a whitespace - // change so we can show "This file was modified only by - // adding or removing trailing whitespace." instead of - // "This file was not modified.". - $whitelines = true; - } - $similar = true; - } - break; - default: - // In this case, the lines are similar if there is no change type - // (that is, just trust the diff algorithm). - if (!$old_line_data['type']) { - $similar = true; - } - break; - } - if ($similar) { - if ($old_line_data['type'] == '\\') { - // These are similar because they're "No newline at end of file" - // comments. - } else { - $old_line_data['type'] = null; - $new_line_data['type'] = null; - $skip_intra[count($rebuild_old)] = true; - } - } else { - $changed = true; - } - } else { - $changed = true; - } - $rebuild_old[] = $old_line_data; $rebuild_new[] = $new_line_data; } @@ -251,106 +274,86 @@ final class DifferentialHunkParser { $this->setOldLines($rebuild_old); $this->setNewLines($rebuild_new); - $this->setIsUnchanged(!$changed); - $this->setHasWhiteLines($whitelines); - $this->setIsDeleted(array_filter($this->getOldLines()) && - !array_filter($this->getNewLines())); - $this->setSkipIntraLines($skip_intra); + $this->updateChangeTypesForWhitespaceMode(); return $this; } - public function updateParsedHunksText($old_text, $new_text) { - if ($old_text || $new_text) { + private function updateChangeTypesForWhitespaceMode() { + $mode = $this->getWhitespaceMode(); - // Use this parser's side-by-side line information -- notably, the - // change types -- but replace all the line text. - // This lets us render whitespace-only changes without marking them as - // different. + $mode_show_all = DifferentialChangesetParser::WHITESPACE_SHOW_ALL; + if ($mode === $mode_show_all) { + // If we're showing all whitespace, we don't need to perform any updates. + return; + } - $old = $this->getOldLines(); - $new = $this->getNewLines(); + $mode_trailing = DifferentialChangesetParser::WHITESPACE_IGNORE_TRAILING; + $is_trailing = ($mode === $mode_trailing); - foreach ($old as $k => $desc) { - if (empty($desc)) { - continue; - } - $old[$k]['text'] = idx($old_text, $desc['line']); + $new = $this->getNewLines(); + $old = $this->getOldLines(); + foreach ($old as $key => $o) { + $n = $new[$key]; + + if (!$o || !$n) { + continue; } - $skip_intra = $this->getSkipIntraLines(); - foreach ($new as $k => $desc) { - if (empty($desc)) { - continue; + + if ($is_trailing) { + // In "trailing" mode, we need to identify lines which are marked + // changed but differ only by trailing whitespace. We mark these lines + // unchanged. + if ($o['type'] != $n['type']) { + if (rtrim($o['text']) === rtrim($n['text'])) { + $old[$key]['type'] = null; + $new[$key]['type'] = null; + } } - $new[$k]['text'] = idx($new_text, $desc['line']); - - if ($this->whitespaceMode == - DifferentialChangesetParser::WHITESPACE_IGNORE_FORCE) { - // Under forced ignore mode, ignore even internal whitespace - // changes. - continue; - } - - // If there's a corresponding "old" text and the line is marked as - // unchanged, test if there are internal whitespace changes between - // non-whitespace characters, e.g. spaces added to a string or spaces - // added around operators. If we find internal spaces, mark the line - // as changed. - // - // We only need to do this for "new" lines because any line that is - // missing either "old" or "new" text certainly can not have internal - // whitespace changes without also having non-whitespace changes, - // because characters had to be either added or removed to create the - // possibility of internal whitespace. - if (isset($old[$k]['text']) && empty($new[$k]['type'])) { - if (trim($old[$k]['text']) != trim($new[$k]['text'])) { - // The strings aren't the same when trimmed, so there are internal - // whitespace changes. Mark this line changed. - $old[$k]['type'] = '-'; - $new[$k]['type'] = '+'; - - // Re-mark this line for intraline diffing. - unset($skip_intra[$k]); + } else { + // In "ignore most" and "ignore all" modes, we need to identify lines + // which are marked unchanged but have internal whitespace changes. + // We want to ignore leading and trailing whitespace changes only, not + // internal whitespace changes (`diff` doesn't have a mode for this, so + // we have to fix it here). If the text is marked unchanged but the + // old and new text differs by internal space, mark the lines changed. + if ($o['type'] === null && $n['type'] === null) { + if ($o['text'] !== $n['text']) { + if (trim($o['text']) !== trim($n['text'])) { + $old[$key]['type'] = '-'; + $new[$key]['type'] = '+'; + } } } } - - $this->setSkipIntraLines($skip_intra); - $this->setOldLines($old); - $this->setNewLines($new); } + $this->setOldLines($old); + $this->setNewLines($new); + return $this; } public function generateIntraLineDiffs() { $old = $this->getOldLines(); $new = $this->getNewLines(); - $skip_intra = $this->getSkipIntraLines(); - $intra_line_diffs = array(); - $min_length = min(count($old), count($new)); - for ($ii = 0; $ii < $min_length; $ii++) { - if ($old[$ii] || $new[$ii]) { - if (isset($old[$ii]['text'])) { - $otext = $old[$ii]['text']; - } else { - $otext = ''; - } - if (isset($new[$ii]['text'])) { - $ntext = $new[$ii]['text']; - } else { - $ntext = ''; - } - if ($otext != $ntext && empty($skip_intra[$ii])) { - $intra_line_diffs[$ii] = ArcanistDiffUtils::generateIntralineDiff( - $otext, - $ntext); - } + $diffs = array(); + foreach ($old as $key => $o) { + $n = $new[$key]; + + if (!$o || !$n) { + continue; + } + + if ($o['type'] != $n['type']) { + $diffs[$key] = ArcanistDiffUtils::generateIntralineDiff( + $o['text'], + $n['text']); } } - $this->setIntraLineDiffs($intra_line_diffs); + $this->setIntraLineDiffs($diffs); return $this; }