1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-19 11:11:10 +01:00

Detect moves and copies with some unchanged lines as moves or copies

Summary:
Ref T1266. We won't detect a move/copy if fewer than 3 lines are changed.

However, you may move a block like:

  Complicated Line A
  Trivial Line B
  Complicated Line C

...where "Trivial Line B" is something like a curly brace. If you move this block somewhere that happened to previously have a similar trivial curly brace line, we won't be able to find 3 contiguous added lines in order to detect the copy/move.

Instead, consider both changed and unchanged lines when trying to find contiguous blocks. This allows us to detect across gaps where lines were not actually changed.

This new algorithm may be too liberal (for example, we may end up incorrectly identifying moved/copied code before or after changed lines, not just between changed lines), but we can keep an eye on it and tweak it. The algorithm is better factored and better covered, now.

Test Plan:
  - Added a unit test for this case.
  - Spot-checked a handful of diffs and generally saw behavior that made sense and looked better than before.

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: epriestley

Maniphest Tasks: T1266

Differential Revision: https://secure.phabricator.com/D12146
This commit is contained in:
epriestley 2015-03-24 13:12:24 -07:00
parent 373aaa643a
commit aa310230b6
3 changed files with 74 additions and 10 deletions

View file

@ -1347,12 +1347,10 @@ final class DifferentialChangesetParser {
$copies = array();
foreach ($changeset->getHunks() as $hunk) {
$added = $hunk->getStructuredNewFile();
$atype = array();
foreach ($added as $line => $info) {
if ($info['type'] != '+') {
unset($added[$line]);
continue;
}
$atype[$line] = $info['type'];
$added[$line] = trim($info['text']);
}
@ -1365,6 +1363,12 @@ final class DifferentialChangesetParser {
continue;
}
if ($atype[$line] !== '+') {
// This line hasn't been changed in the new file, so don't try
// to figure out where it came from.
continue;
}
if (empty($map[$code])) {
// This line was too short to trigger copy/move detection.
continue;

View file

@ -3,17 +3,36 @@
final class DifferentialDiffTestCase extends ArcanistPhutilTestCase {
public function testDetectCopiedCode() {
$copies = $this->detectCopiesIn('lint_engine.diff');
$this->assertEqual(
array_combine(range(237, 252), range(167, 182)),
ipull($copies, 1));
}
public function testDetectCopiedOverlaidCode() {
$copies = $this->detectCopiesIn('copy_overlay.diff');
$this->assertEqual(
array(
7 => 22,
8 => 23,
9 => 24,
10 => 25,
11 => 26,
12 => 27,
),
ipull($copies, 1));
}
private function detectCopiesIn($file) {
$root = dirname(__FILE__).'/diff/';
$parser = new ArcanistDiffParser();
$diff = DifferentialDiff::newFromRawChanges(
PhabricatorUser::getOmnipotentUser(),
$parser->parseDiff(Filesystem::readFile($root.'lint_engine.diff')));
$copies = idx(head($diff->getChangesets())->getMetadata(), 'copy:lines');
$this->assertEqual(
array_combine(range(237, 252), range(167, 182)),
ipull($copies, 1));
$parser->parseDiff(Filesystem::readFile($root.$file)));
return idx(head($diff->getChangesets())->getMetadata(), 'copy:lines');
}
public function testDetectSlowCopiedCode() {

View file

@ -0,0 +1,41 @@
diff --git a/test.c b/test.c
index 441f670..5a0aa05 100644
--- a/test.c
+++ b/test.c
@@ -1,33 +1,28 @@
// NOTE: Lines must be more than 30 characters long to activate copy/move
// detection.
if (100000000000000000000000000000)
{
// A
// B
}
-else if (200000000000000000000000000000)
+else if (500000000000000000000000000000)
{
- // C
- // D
+ // I
+ // J
}
else if (300000000000000000000000000000)
{
// E
// F
}
else if (400000000000000000000000000000)
{
// G
// H
}
-else if (500000000000000000000000000000)
-{
- // I
- // J
-}
else if (600000000000000000000000000000)
{
// K
// L
}