mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-20 05:42:40 +01:00
Fix explosive runtime of detectCopiedCode()
Summary: Fixes T5041. Pretty sure this is the issue: if a diff contains a large number of identical lines longer than 30 characters, we end up paying O(N^2) for each set. Instead, when N > 16, opt to pay 0. Test Plan: Added a test which dropped from ~100s to ~0 after changes (this diff includes a reduced-strenght version of the test, since parsing a 4,000 line diff is a little bit pricey). Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T5041 Differential Revision: https://secure.phabricator.com/D9178
This commit is contained in:
parent
ba6a5dae61
commit
b64407d47e
2 changed files with 47 additions and 0 deletions
|
@ -1176,6 +1176,16 @@ final class DifferentialChangesetParser {
|
|||
$added = array_map('trim', $hunk->getAddedLines());
|
||||
for (reset($added); list($line, $code) = each($added); ) {
|
||||
if (isset($map[$code])) { // We found a long matching line.
|
||||
|
||||
if (count($map[$code]) > 16) {
|
||||
// If there are a large number of identical lines in this diff,
|
||||
// don't try to figure out where this block came from: the
|
||||
// analysis is O(N^2), since we need to compare every line
|
||||
// against every other line. Even if we arrive at a result, it
|
||||
// is unlikely to be meaningful. See T5041.
|
||||
continue 2;
|
||||
}
|
||||
|
||||
$best_length = 0;
|
||||
foreach ($map[$code] as $val) { // Explore all candidates.
|
||||
list($file, $orig_line) = $val;
|
||||
|
|
|
@ -15,4 +15,41 @@ final class DifferentialDiffTestCase extends ArcanistPhutilTestCase {
|
|||
ipull($copies, 1));
|
||||
}
|
||||
|
||||
public function testDetectSlowCopiedCode() {
|
||||
// This tests that the detector has a reasonable runtime when a diff
|
||||
// contains a very large number of identical lines. See T5041.
|
||||
|
||||
$parser = new ArcanistDiffParser();
|
||||
|
||||
$line = str_repeat('x', 60);
|
||||
$oline = '-'.$line."\n";
|
||||
$nline = '+'.$line."\n";
|
||||
|
||||
$n = 1000;
|
||||
$oblock = str_repeat($oline, $n);
|
||||
$nblock = str_repeat($nline, $n);
|
||||
|
||||
$raw_diff = <<<EODIFF
|
||||
diff --git a/dst b/dst
|
||||
new file mode 100644
|
||||
index 0000000..1234567
|
||||
--- /dev/null
|
||||
+++ b/dst
|
||||
@@ -0,0 +1,{$n} @@
|
||||
{$nblock}
|
||||
diff --git a/src b/src
|
||||
deleted file mode 100644
|
||||
index 123457..0000000
|
||||
--- a/src
|
||||
+++ /dev/null
|
||||
@@ -1,{$n} +0,0 @@
|
||||
{$oblock}
|
||||
EODIFF;
|
||||
|
||||
$diff = DifferentialDiff::newFromRawChanges($parser->parseDiff($raw_diff));
|
||||
|
||||
$this->assertTrue(true);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue