mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-20 13:52:40 +01:00
Fix explosive runtime of detectCopiedCode()
Summary: Fixes T5041. Pretty sure this is the issue: if a diff contains a large number of identical lines longer than 30 characters, we end up paying O(N^2) for each set. Instead, when N > 16, opt to pay 0. Test Plan: Added a test which dropped from ~100s to ~0 after changes (this diff includes a reduced-strenght version of the test, since parsing a 4,000 line diff is a little bit pricey). Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T5041 Differential Revision: https://secure.phabricator.com/D9178
This commit is contained in:
parent
ba6a5dae61
commit
b64407d47e
2 changed files with 47 additions and 0 deletions
|
@ -1176,6 +1176,16 @@ final class DifferentialChangesetParser {
|
||||||
$added = array_map('trim', $hunk->getAddedLines());
|
$added = array_map('trim', $hunk->getAddedLines());
|
||||||
for (reset($added); list($line, $code) = each($added); ) {
|
for (reset($added); list($line, $code) = each($added); ) {
|
||||||
if (isset($map[$code])) { // We found a long matching line.
|
if (isset($map[$code])) { // We found a long matching line.
|
||||||
|
|
||||||
|
if (count($map[$code]) > 16) {
|
||||||
|
// If there are a large number of identical lines in this diff,
|
||||||
|
// don't try to figure out where this block came from: the
|
||||||
|
// analysis is O(N^2), since we need to compare every line
|
||||||
|
// against every other line. Even if we arrive at a result, it
|
||||||
|
// is unlikely to be meaningful. See T5041.
|
||||||
|
continue 2;
|
||||||
|
}
|
||||||
|
|
||||||
$best_length = 0;
|
$best_length = 0;
|
||||||
foreach ($map[$code] as $val) { // Explore all candidates.
|
foreach ($map[$code] as $val) { // Explore all candidates.
|
||||||
list($file, $orig_line) = $val;
|
list($file, $orig_line) = $val;
|
||||||
|
|
|
@ -15,4 +15,41 @@ final class DifferentialDiffTestCase extends ArcanistPhutilTestCase {
|
||||||
ipull($copies, 1));
|
ipull($copies, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testDetectSlowCopiedCode() {
|
||||||
|
// This tests that the detector has a reasonable runtime when a diff
|
||||||
|
// contains a very large number of identical lines. See T5041.
|
||||||
|
|
||||||
|
$parser = new ArcanistDiffParser();
|
||||||
|
|
||||||
|
$line = str_repeat('x', 60);
|
||||||
|
$oline = '-'.$line."\n";
|
||||||
|
$nline = '+'.$line."\n";
|
||||||
|
|
||||||
|
$n = 1000;
|
||||||
|
$oblock = str_repeat($oline, $n);
|
||||||
|
$nblock = str_repeat($nline, $n);
|
||||||
|
|
||||||
|
$raw_diff = <<<EODIFF
|
||||||
|
diff --git a/dst b/dst
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..1234567
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/dst
|
||||||
|
@@ -0,0 +1,{$n} @@
|
||||||
|
{$nblock}
|
||||||
|
diff --git a/src b/src
|
||||||
|
deleted file mode 100644
|
||||||
|
index 123457..0000000
|
||||||
|
--- a/src
|
||||||
|
+++ /dev/null
|
||||||
|
@@ -1,{$n} +0,0 @@
|
||||||
|
{$oblock}
|
||||||
|
EODIFF;
|
||||||
|
|
||||||
|
$diff = DifferentialDiff::newFromRawChanges($parser->parseDiff($raw_diff));
|
||||||
|
|
||||||
|
$this->assertTrue(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue