mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-19 21:32:43 +01:00
Detect moves and copies with some unchanged lines as moves or copies
Summary: Ref T1266. We won't detect a move/copy if fewer than 3 lines are changed. However, you may move a block like: Complicated Line A Trivial Line B Complicated Line C ...where "Trivial Line B" is something like a curly brace. If you move this block somewhere that happened to previously have a similar trivial curly brace line, we won't be able to find 3 contiguous added lines in order to detect the copy/move. Instead, consider both changed and unchanged lines when trying to find contiguous blocks. This allows us to detect across gaps where lines were not actually changed. This new algorithm may be too liberal (for example, we may end up incorrectly identifying moved/copied code before or after changed lines, not just between changed lines), but we can keep an eye on it and tweak it. The algorithm is better factored and better covered, now. Test Plan: - Added a unit test for this case. - Spot-checked a handful of diffs and generally saw behavior that made sense and looked better than before. Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T1266 Differential Revision: https://secure.phabricator.com/D12146
This commit is contained in:
parent
373aaa643a
commit
aa310230b6
3 changed files with 74 additions and 10 deletions
|
@ -1347,12 +1347,10 @@ final class DifferentialChangesetParser {
|
|||
$copies = array();
|
||||
foreach ($changeset->getHunks() as $hunk) {
|
||||
$added = $hunk->getStructuredNewFile();
|
||||
$atype = array();
|
||||
|
||||
foreach ($added as $line => $info) {
|
||||
if ($info['type'] != '+') {
|
||||
unset($added[$line]);
|
||||
continue;
|
||||
}
|
||||
$atype[$line] = $info['type'];
|
||||
$added[$line] = trim($info['text']);
|
||||
}
|
||||
|
||||
|
@ -1365,6 +1363,12 @@ final class DifferentialChangesetParser {
|
|||
continue;
|
||||
}
|
||||
|
||||
if ($atype[$line] !== '+') {
|
||||
// This line hasn't been changed in the new file, so don't try
|
||||
// to figure out where it came from.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (empty($map[$code])) {
|
||||
// This line was too short to trigger copy/move detection.
|
||||
continue;
|
||||
|
|
|
@ -3,17 +3,36 @@
|
|||
final class DifferentialDiffTestCase extends ArcanistPhutilTestCase {
|
||||
|
||||
public function testDetectCopiedCode() {
|
||||
$copies = $this->detectCopiesIn('lint_engine.diff');
|
||||
|
||||
$this->assertEqual(
|
||||
array_combine(range(237, 252), range(167, 182)),
|
||||
ipull($copies, 1));
|
||||
}
|
||||
|
||||
public function testDetectCopiedOverlaidCode() {
|
||||
$copies = $this->detectCopiesIn('copy_overlay.diff');
|
||||
|
||||
$this->assertEqual(
|
||||
array(
|
||||
7 => 22,
|
||||
8 => 23,
|
||||
9 => 24,
|
||||
10 => 25,
|
||||
11 => 26,
|
||||
12 => 27,
|
||||
),
|
||||
ipull($copies, 1));
|
||||
}
|
||||
|
||||
private function detectCopiesIn($file) {
|
||||
$root = dirname(__FILE__).'/diff/';
|
||||
$parser = new ArcanistDiffParser();
|
||||
|
||||
$diff = DifferentialDiff::newFromRawChanges(
|
||||
PhabricatorUser::getOmnipotentUser(),
|
||||
$parser->parseDiff(Filesystem::readFile($root.'lint_engine.diff')));
|
||||
$copies = idx(head($diff->getChangesets())->getMetadata(), 'copy:lines');
|
||||
|
||||
$this->assertEqual(
|
||||
array_combine(range(237, 252), range(167, 182)),
|
||||
ipull($copies, 1));
|
||||
$parser->parseDiff(Filesystem::readFile($root.$file)));
|
||||
return idx(head($diff->getChangesets())->getMetadata(), 'copy:lines');
|
||||
}
|
||||
|
||||
public function testDetectSlowCopiedCode() {
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
diff --git a/test.c b/test.c
|
||||
index 441f670..5a0aa05 100644
|
||||
--- a/test.c
|
||||
+++ b/test.c
|
||||
@@ -1,33 +1,28 @@
|
||||
// NOTE: Lines must be more than 30 characters long to activate copy/move
|
||||
// detection.
|
||||
|
||||
if (100000000000000000000000000000)
|
||||
{
|
||||
// A
|
||||
// B
|
||||
}
|
||||
-else if (200000000000000000000000000000)
|
||||
+else if (500000000000000000000000000000)
|
||||
{
|
||||
- // C
|
||||
- // D
|
||||
+ // I
|
||||
+ // J
|
||||
}
|
||||
else if (300000000000000000000000000000)
|
||||
{
|
||||
// E
|
||||
// F
|
||||
}
|
||||
else if (400000000000000000000000000000)
|
||||
{
|
||||
// G
|
||||
// H
|
||||
}
|
||||
-else if (500000000000000000000000000000)
|
||||
-{
|
||||
- // I
|
||||
- // J
|
||||
-}
|
||||
else if (600000000000000000000000000000)
|
||||
{
|
||||
// K
|
||||
// L
|
||||
}
|
Loading…
Reference in a new issue