1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-20 05:42:40 +01:00

Fix explosive runtime of detectCopiedCode()

Summary:
Fixes T5041. Pretty sure this is the issue: if a diff contains a large number of identical lines longer than 30 characters, we end up paying O(N^2) for each set.

Instead, when N > 16, opt to pay 0.

Test Plan: Added a test which dropped from ~100s to ~0 after changes (this diff includes a reduced-strenght version of the test, since parsing a 4,000 line diff is a little bit pricey).

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: epriestley

Maniphest Tasks: T5041

Differential Revision: https://secure.phabricator.com/D9178
This commit is contained in:
epriestley 2014-05-19 12:39:12 -07:00
parent ba6a5dae61
commit b64407d47e
2 changed files with 47 additions and 0 deletions

View file

@ -1176,6 +1176,16 @@ final class DifferentialChangesetParser {
$added = array_map('trim', $hunk->getAddedLines());
for (reset($added); list($line, $code) = each($added); ) {
if (isset($map[$code])) { // We found a long matching line.
if (count($map[$code]) > 16) {
// If there are a large number of identical lines in this diff,
// don't try to figure out where this block came from: the
// analysis is O(N^2), since we need to compare every line
// against every other line. Even if we arrive at a result, it
// is unlikely to be meaningful. See T5041.
continue 2;
}
$best_length = 0;
foreach ($map[$code] as $val) { // Explore all candidates.
list($file, $orig_line) = $val;

View file

@ -15,4 +15,41 @@ final class DifferentialDiffTestCase extends ArcanistPhutilTestCase {
ipull($copies, 1));
}
public function testDetectSlowCopiedCode() {
// This tests that the detector has a reasonable runtime when a diff
// contains a very large number of identical lines. See T5041.
$parser = new ArcanistDiffParser();
$line = str_repeat('x', 60);
$oline = '-'.$line."\n";
$nline = '+'.$line."\n";
$n = 1000;
$oblock = str_repeat($oline, $n);
$nblock = str_repeat($nline, $n);
$raw_diff = <<<EODIFF
diff --git a/dst b/dst
new file mode 100644
index 0000000..1234567
--- /dev/null
+++ b/dst
@@ -0,0 +1,{$n} @@
{$nblock}
diff --git a/src b/src
deleted file mode 100644
index 123457..0000000
--- a/src
+++ /dev/null
@@ -1,{$n} +0,0 @@
{$oblock}
EODIFF;
$diff = DifferentialDiff::newFromRawChanges($parser->parseDiff($raw_diff));
$this->assertTrue(true);
}
}