From 92abe3c8fb84bc51b2f845e5bd1fe8da4117e1dd Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 14 Feb 2019 09:54:55 -0800 Subject: [PATCH] Extract scope line selection logic from the diff rendering engine so it can reasonably be iterated on Summary: Ref T13249. Ref T11738. See PHI985. Currently, we have a crude heuristic for guessing what line in a source file provides the best context. We get it wrong in a lot of cases, sometimes selecting very silly lines like "{". Although we can't always pick the same line a human would pick, we //can// pile on heuristics until this is less frequently completely wrong and perhaps eventually get it to work fairly well most of the time. Pull the logic for this into a separate standalone class and make it testable to prepare for adding heuristics. Test Plan: Ran unit tests, browsed various files in the web UI and saw as-good-or-better context selection. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13249, T11738 Differential Revision: https://secure.phabricator.com/D20171 --- src/__phutil_library_map__.php | 4 + .../parser/DifferentialChangesetParser.php | 49 +----- .../render/DifferentialChangesetRenderer.php | 38 ++++- .../DifferentialChangesetTwoUpRenderer.php | 46 ++++-- .../diff/PhabricatorDiffScopeEngine.php | 156 ++++++++++++++++++ .../PhabricatorDiffScopeEngineTestCase.php | 51 ++++++ .../diff/__tests__/data/zebra.c | 5 + 7 files changed, 286 insertions(+), 63 deletions(-) create mode 100644 src/infrastructure/diff/PhabricatorDiffScopeEngine.php create mode 100644 src/infrastructure/diff/__tests__/PhabricatorDiffScopeEngineTestCase.php create mode 100644 src/infrastructure/diff/__tests__/data/zebra.c diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index 37be79ec59..94ba258f53 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -2971,6 +2971,8 @@ phutil_register_library_map(array( 'PhabricatorDeveloperPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorDeveloperPreferencesSettingsPanel.php', 'PhabricatorDiffInlineCommentQuery' => 'infrastructure/diff/query/PhabricatorDiffInlineCommentQuery.php', 'PhabricatorDiffPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorDiffPreferencesSettingsPanel.php', + 'PhabricatorDiffScopeEngine' => 'infrastructure/diff/PhabricatorDiffScopeEngine.php', + 'PhabricatorDiffScopeEngineTestCase' => 'infrastructure/diff/__tests__/PhabricatorDiffScopeEngineTestCase.php', 'PhabricatorDifferenceEngine' => 'infrastructure/diff/PhabricatorDifferenceEngine.php', 'PhabricatorDifferentialApplication' => 'applications/differential/application/PhabricatorDifferentialApplication.php', 'PhabricatorDifferentialAttachCommitWorkflow' => 'applications/differential/management/PhabricatorDifferentialAttachCommitWorkflow.php', @@ -8850,6 +8852,8 @@ phutil_register_library_map(array( 'PhabricatorDeveloperPreferencesSettingsPanel' => 'PhabricatorEditEngineSettingsPanel', 'PhabricatorDiffInlineCommentQuery' => 'PhabricatorApplicationTransactionCommentQuery', 'PhabricatorDiffPreferencesSettingsPanel' => 'PhabricatorEditEngineSettingsPanel', + 'PhabricatorDiffScopeEngine' => 'Phobject', + 'PhabricatorDiffScopeEngineTestCase' => 'PhabricatorTestCase', 'PhabricatorDifferenceEngine' => 'Phobject', 'PhabricatorDifferentialApplication' => 'PhabricatorApplication', 'PhabricatorDifferentialAttachCommitWorkflow' => 'PhabricatorDifferentialManagementWorkflow', diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php index e214aa16a4..27d2a2d845 100644 --- a/src/applications/differential/parser/DifferentialChangesetParser.php +++ b/src/applications/differential/parser/DifferentialChangesetParser.php @@ -1173,7 +1173,7 @@ final class DifferentialChangesetParser extends Phobject { } $range_len = min($range_len, $rows - $range_start); - list($gaps, $mask, $depths) = $this->calculateGapsMaskAndDepths( + list($gaps, $mask) = $this->calculateGapsAndMask( $mask_force, $feedback_mask, $range_start, @@ -1181,8 +1181,7 @@ final class DifferentialChangesetParser extends Phobject { $renderer ->setGaps($gaps) - ->setMask($mask) - ->setDepths($depths); + ->setMask($mask); $html = $renderer->renderTextChange( $range_start, @@ -1208,15 +1207,9 @@ final class DifferentialChangesetParser extends Phobject { * "show more"). The $mask returned is a sparsely populated dictionary * of $visible_line_number => true. * - * Depths - compute how indented any given line is. The $depths returned - * is a sparsely populated dictionary of $visible_line_number => $depth. - * - * This function also has the side effect of modifying member variable - * new such that tabs are normalized to spaces for each line of the diff. - * - * @return array($gaps, $mask, $depths) + * @return array($gaps, $mask) */ - private function calculateGapsMaskAndDepths( + private function calculateGapsAndMask( $mask_force, $feedback_mask, $range_start, @@ -1224,7 +1217,6 @@ final class DifferentialChangesetParser extends Phobject { $lines_context = $this->getLinesOfContext(); - // Calculate gaps and mask first $gaps = array(); $gap_start = 0; $in_gap = false; @@ -1253,38 +1245,7 @@ final class DifferentialChangesetParser extends Phobject { $gaps = array_reverse($gaps); $mask = $base_mask; - // Time to calculate depth. - // We need to go backwards to properly indent whitespace in this code: - // - // 0: class C { - // 1: - // 1: function f() { - // 2: - // 2: return; - // 1: - // 1: } - // 0: - // 0: } - // - $depths = array(); - $last_depth = 0; - $range_end = $range_start + $range_len; - if (!isset($this->new[$range_end])) { - $range_end--; - } - for ($ii = $range_end; $ii >= $range_start; $ii--) { - // We need to expand tabs to process mixed indenting and to round - // correctly later. - $line = str_replace("\t", ' ', $this->new[$ii]['text']); - $trimmed = ltrim($line); - if ($trimmed != '') { - // We round down to flatten "/**" and " *". - $last_depth = floor((strlen($line) - strlen($trimmed)) / 2); - } - $depths[$ii] = $last_depth; - } - - return array($gaps, $mask, $depths); + return array($gaps, $mask); } /** diff --git a/src/applications/differential/render/DifferentialChangesetRenderer.php b/src/applications/differential/render/DifferentialChangesetRenderer.php index 450d160e23..f295695286 100644 --- a/src/applications/differential/render/DifferentialChangesetRenderer.php +++ b/src/applications/differential/render/DifferentialChangesetRenderer.php @@ -28,12 +28,12 @@ abstract class DifferentialChangesetRenderer extends Phobject { private $originalNew; private $gaps; private $mask; - private $depths; private $originalCharacterEncoding; private $showEditAndReplyLinks; private $canMarkDone; private $objectOwnerPHID; private $highlightingDisabled; + private $scopeEngine; private $oldFile = false; private $newFile = false; @@ -76,14 +76,6 @@ abstract class DifferentialChangesetRenderer extends Phobject { return $this->isUndershield; } - public function setDepths($depths) { - $this->depths = $depths; - return $this; - } - protected function getDepths() { - return $this->depths; - } - public function setMask($mask) { $this->mask = $mask; return $this; @@ -678,4 +670,32 @@ abstract class DifferentialChangesetRenderer extends Phobject { return $views; } + + final protected function getScopeEngine() { + if (!$this->scopeEngine) { + $line_map = $this->getNewLineTextMap(); + + $scope_engine = id(new PhabricatorDiffScopeEngine()) + ->setLineTextMap($line_map); + + $this->scopeEngine = $scope_engine; + } + + return $this->scopeEngine; + } + + private function getNewLineTextMap() { + $new = $this->getNewLines(); + + $text_map = array(); + foreach ($new as $new_line) { + if (!isset($new_line['line'])) { + continue; + } + $text_map[$new_line['line']] = $new_line['text']; + } + + return $text_map; + } + } diff --git a/src/applications/differential/render/DifferentialChangesetTwoUpRenderer.php b/src/applications/differential/render/DifferentialChangesetTwoUpRenderer.php index 5d476f5136..f40a7f5e0b 100644 --- a/src/applications/differential/render/DifferentialChangesetTwoUpRenderer.php +++ b/src/applications/differential/render/DifferentialChangesetTwoUpRenderer.php @@ -3,6 +3,8 @@ final class DifferentialChangesetTwoUpRenderer extends DifferentialChangesetHTMLRenderer { + private $newOffsetMap; + public function isOneUpRenderer() { return false; } @@ -66,9 +68,12 @@ final class DifferentialChangesetTwoUpRenderer $new_render = $this->getNewRender(); $original_left = $this->getOriginalOld(); $original_right = $this->getOriginalNew(); - $depths = $this->getDepths(); $mask = $this->getMask(); + $scope_engine = $this->getScopeEngine(); + + $offset_map = null; + for ($ii = $range_start; $ii < $range_start + $range_len; $ii++) { if (empty($mask[$ii])) { // If we aren't going to show this line, we've just entered a gap. @@ -87,16 +92,19 @@ final class DifferentialChangesetTwoUpRenderer $is_last_block = true; } - $context = null; + $context_text = null; $context_line = null; - if (!$is_last_block && $depths[$ii + $len]) { - for ($l = $ii + $len - 1; $l >= $ii; $l--) { - $line = $new_lines[$l]['text']; - if ($depths[$l] < $depths[$ii + $len] && trim($line) != '') { - $context = $new_render[$l]; - $context_line = $new_lines[$l]['line']; - break; + if (!$is_last_block) { + $target_line = $new_lines[$ii + $len]['line']; + $context_line = $scope_engine->getScopeStart($target_line); + if ($context_line !== null) { + // The scope engine returns a line number in the file. We need + // to map that back to a display offset in the diff. + if (!$offset_map) { + $offset_map = $this->getNewLineToOffsetMap(); } + $offset = $offset_map[$context_line]; + $context_text = $new_render[$offset]; } } @@ -126,7 +134,7 @@ final class DifferentialChangesetTwoUpRenderer 'class' => 'show-context', ), // TODO: [HTML] Escaping model here isn't ideal. - phutil_safe_html($context)), + phutil_safe_html($context_text)), )); $html[] = $container; @@ -386,4 +394,22 @@ final class DifferentialChangesetTwoUpRenderer ->addInlineView($view); } + private function getNewLineToOffsetMap() { + if ($this->newOffsetMap === null) { + $new = $this->getNewLines(); + + $map = array(); + foreach ($new as $offset => $new_line) { + if ($new_line['line'] === null) { + continue; + } + $map[$new_line['line']] = $offset; + } + + $this->newOffsetMap = $map; + } + + return $this->newOffsetMap; + } + } diff --git a/src/infrastructure/diff/PhabricatorDiffScopeEngine.php b/src/infrastructure/diff/PhabricatorDiffScopeEngine.php new file mode 100644 index 0000000000..5ea1ec5021 --- /dev/null +++ b/src/infrastructure/diff/PhabricatorDiffScopeEngine.php @@ -0,0 +1,156 @@ + $value) { + if ($key === $expect) { + $expect++; + continue; + } + + throw new Exception( + pht( + 'ScopeEngine text map must be a contiguous map of '. + 'lines, but is not: found key "%s" where key "%s" was expected.', + $key, + $expect)); + } + + $this->lineTextMap = $map; + + return $this; + } + + public function getLineTextMap() { + if ($this->lineTextMap === null) { + throw new PhutilInvalidStateException('setLineTextMap'); + } + return $this->lineTextMap; + } + + public function getScopeStart($line) { + $text_map = $this->getLineTextMap(); + $depth_map = $this->getLineDepthMap(); + $length = count($text_map); + + // Figure out the effective depth of the line we're getting scope for. + // If the line is just whitespace, it may have no depth on its own. In + // this case, we look for the next line. + $line_depth = null; + for ($ii = $line; $ii <= $length; $ii++) { + if ($depth_map[$ii] !== null) { + $line_depth = $depth_map[$ii]; + break; + } + } + + // If we can't find a line depth for the target line, just bail. + if ($line_depth === null) { + return null; + } + + // Limit the maximum number of lines we'll examine. If a user has a + // million-line diff of nonsense, scanning the whole thing is a waste + // of time. + $search_range = 1000; + $search_until = max(0, $ii - $search_range); + + for ($ii = $line - 1; $ii > $search_until; $ii--) { + $line_text = $text_map[$ii]; + + // This line is in missing context: the diff was diffed with partial + // context, and we ran out of context before finding a good scope line. + // Bail out, we don't want to jump across missing context blocks. + if ($line_text === null) { + return null; + } + + $depth = $depth_map[$ii]; + + // This line is all whitespace. This isn't a possible match. + if ($depth === null) { + continue; + } + + // The depth is the same as (or greater than) the depth we started with, + // so this isn't a possible match. + if ($depth >= $line_depth) { + continue; + } + + // Reject lines which begin with "}" or "{". These lines are probably + // never good matches. + if (preg_match('/^\s*[{}]/i', $line_text)) { + continue; + } + + return $ii; + } + + return null; + } + + private function getLineDepthMap() { + if (!$this->lineDepthMap) { + $this->lineDepthMap = $this->newLineDepthMap(); + } + + return $this->lineDepthMap; + } + + private function newLineDepthMap() { + $text_map = $this->getLineTextMap(); + + // TODO: This should be configurable once we handle tab widths better. + $tab_width = 2; + + $depth_map = array(); + foreach ($text_map as $line_number => $line_text) { + if ($line_text === null) { + $depth_map[$line_number] = null; + continue; + } + + $len = strlen($line_text); + + // If the line has no actual text, don't assign it a depth. + if (!$len || !strlen(trim($line_text))) { + $depth_map[$line_number] = null; + continue; + } + + $count = 0; + for ($ii = 0; $ii < $len; $ii++) { + $c = $line_text[$ii]; + if ($c == ' ') { + $count++; + } else if ($c == "\t") { + $count += $tab_width; + } else { + break; + } + } + + // Round down to cheat our way through the " *" parts of docblock + // comments. This is generally a reasonble heuristic because odd tab + // widths are exceptionally rare. + $depth = ($count >> 1); + + $depth_map[$line_number] = $depth; + } + + return $depth_map; + } + +} diff --git a/src/infrastructure/diff/__tests__/PhabricatorDiffScopeEngineTestCase.php b/src/infrastructure/diff/__tests__/PhabricatorDiffScopeEngineTestCase.php new file mode 100644 index 0000000000..50e23ac31c --- /dev/null +++ b/src/infrastructure/diff/__tests__/PhabricatorDiffScopeEngineTestCase.php @@ -0,0 +1,51 @@ +assertScopeStart('zebra.c', 4, 2); + } + + private function assertScopeStart($file, $line, $expect) { + $engine = $this->getScopeTestEngine($file); + + $actual = $engine->getScopeStart($line); + $this->assertEqual( + $expect, + $actual, + pht( + 'Expect scope for line %s to start on line %s (actual: %s) in "%s".', + $line, + $expect, + $actual, + $file)); + } + + private function getScopeTestEngine($file) { + if (!isset($this->engines[$file])) { + $this->engines[$file] = $this->newScopeTestEngine($file); + } + + return $this->engines[$file]; + } + + private function newScopeTestEngine($file) { + $path = dirname(__FILE__).'/data/'.$file; + $data = Filesystem::readFile($path); + + $lines = phutil_split_lines($data); + $map = array(); + foreach ($lines as $key => $line) { + $map[$key + 1] = $line; + } + + $engine = id(new PhabricatorDiffScopeEngine()) + ->setLineTextMap($map); + + return $engine; + } + +} diff --git a/src/infrastructure/diff/__tests__/data/zebra.c b/src/infrastructure/diff/__tests__/data/zebra.c new file mode 100644 index 0000000000..d587b018a9 --- /dev/null +++ b/src/infrastructure/diff/__tests__/data/zebra.c @@ -0,0 +1,5 @@ +void +ZebraTamer::TameAZebra(nsPoint where, const nsRect& zone, nsAtom* material) +{ + zebra.tame = true; +}