mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-11-29 10:12:41 +01:00
Use linear distance instead of square distance in levenshtein
Summary: We used square distance that optimized for the wrong thing. Making the same markers spread out instead of being together Also added a very little cost for switching type. That will make diff types stick together a bit more Task ID: #623 Blame Rev: Test Plan: Ran my new unit test. And tested a few diff in phabricator Revert Plan: Tags: Reviewers: epriestley Reviewed By: epriestley CC: aran, epriestley Differential Revision: 1112
This commit is contained in:
parent
16caf63d98
commit
5be54ec283
4 changed files with 145 additions and 28 deletions
|
@ -29,6 +29,7 @@ phutil_register_library_map(array(
|
||||||
'ArcanistDiffParser' => 'parser/diff',
|
'ArcanistDiffParser' => 'parser/diff',
|
||||||
'ArcanistDiffParserTestCase' => 'parser/diff/__tests__',
|
'ArcanistDiffParserTestCase' => 'parser/diff/__tests__',
|
||||||
'ArcanistDiffUtils' => 'difference',
|
'ArcanistDiffUtils' => 'difference',
|
||||||
|
'ArcanistDiffUtilsTestCase' => 'difference/__tests__',
|
||||||
'ArcanistDiffWorkflow' => 'workflow/diff',
|
'ArcanistDiffWorkflow' => 'workflow/diff',
|
||||||
'ArcanistDifferentialCommitMessage' => 'differential/commitmessage',
|
'ArcanistDifferentialCommitMessage' => 'differential/commitmessage',
|
||||||
'ArcanistDifferentialCommitMessageParserException' => 'differential/commitmessage',
|
'ArcanistDifferentialCommitMessageParserException' => 'differential/commitmessage',
|
||||||
|
@ -110,6 +111,7 @@ phutil_register_library_map(array(
|
||||||
'ArcanistCommitWorkflow' => 'ArcanistBaseWorkflow',
|
'ArcanistCommitWorkflow' => 'ArcanistBaseWorkflow',
|
||||||
'ArcanistCoverWorkflow' => 'ArcanistBaseWorkflow',
|
'ArcanistCoverWorkflow' => 'ArcanistBaseWorkflow',
|
||||||
'ArcanistDiffParserTestCase' => 'ArcanistPhutilTestCase',
|
'ArcanistDiffParserTestCase' => 'ArcanistPhutilTestCase',
|
||||||
|
'ArcanistDiffUtilsTestCase' => 'ArcanistPhutilTestCase',
|
||||||
'ArcanistDiffWorkflow' => 'ArcanistBaseWorkflow',
|
'ArcanistDiffWorkflow' => 'ArcanistBaseWorkflow',
|
||||||
'ArcanistDownloadWorkflow' => 'ArcanistBaseWorkflow',
|
'ArcanistDownloadWorkflow' => 'ArcanistBaseWorkflow',
|
||||||
'ArcanistExportWorkflow' => 'ArcanistBaseWorkflow',
|
'ArcanistExportWorkflow' => 'ArcanistBaseWorkflow',
|
||||||
|
|
|
@ -204,7 +204,7 @@ final class ArcanistDiffUtils {
|
||||||
return array_values($runs);
|
return array_values($runs);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static function buildLevenshteinDifferenceString($o, $n) {
|
public static function buildLevenshteinDifferenceString($o, $n) {
|
||||||
$olt = strlen($o);
|
$olt = strlen($o);
|
||||||
$nlt = strlen($n);
|
$nlt = strlen($n);
|
||||||
|
|
||||||
|
@ -256,25 +256,27 @@ final class ArcanistDiffUtils {
|
||||||
$ol = strlen($o);
|
$ol = strlen($o);
|
||||||
$nl = strlen($n);
|
$nl = strlen($n);
|
||||||
|
|
||||||
$m = array_fill(0, strlen($o) + 1, array_fill(0, strlen($n) + 1, array()));
|
$m = array_fill(0, $ol + 1, array_fill(0, $nl + 1, array()));
|
||||||
|
|
||||||
$T_D = 'd';
|
$T_D = 'd';
|
||||||
$T_I = 'i';
|
$T_I = 'i';
|
||||||
$T_S = 's';
|
$T_S = 's';
|
||||||
$T_X = 'x';
|
$T_X = 'x';
|
||||||
|
|
||||||
$path = 0;
|
$m[0][0] = array(
|
||||||
for ($ii = 0; $ii <= $ol; $ii++) {
|
0,
|
||||||
$m[$ii][0][0] = $ii;
|
null);
|
||||||
$m[$ii][0][1] = ($path += $ii);
|
|
||||||
$m[$ii][0][2] = $T_D;
|
for ($ii = 1; $ii <= $ol; $ii++) {
|
||||||
|
$m[$ii][0] = array(
|
||||||
|
$ii * 1000,
|
||||||
|
$T_D);
|
||||||
}
|
}
|
||||||
|
|
||||||
$path = 0;
|
for ($jj = 1; $jj <= $nl; $jj++) {
|
||||||
for ($jj = 0; $jj <= $nl; $jj++) {
|
$m[0][$jj] = array(
|
||||||
$m[0][$jj][0] = $jj;
|
$jj * 1000,
|
||||||
$m[0][$jj][1] = ($path += $jj);
|
$T_I);
|
||||||
$m[0][$jj][2] = $T_I;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$ii = 1;
|
$ii = 1;
|
||||||
|
@ -285,31 +287,35 @@ final class ArcanistDiffUtils {
|
||||||
$sub_t_cost = $m[$ii - 1][$jj - 1][0] + 0;
|
$sub_t_cost = $m[$ii - 1][$jj - 1][0] + 0;
|
||||||
$sub_t = $T_S;
|
$sub_t = $T_S;
|
||||||
} else {
|
} else {
|
||||||
$sub_t_cost = $m[$ii - 1][$jj - 1][0] + 2;
|
$sub_t_cost = $m[$ii - 1][$jj - 1][0] + 2000;
|
||||||
$sub_t = $T_X;
|
$sub_t = $T_X;
|
||||||
}
|
}
|
||||||
$sub_p_cost = $m[$ii - 1][$jj - 1][1] + $sub_t_cost;
|
|
||||||
|
|
||||||
$del_t_cost = $m[$ii - 1][$jj][0] + 1;
|
if ($m[$ii - 1][$jj - 1][1] != $sub_t) {
|
||||||
$del_p_cost = $m[$ii - 1][$jj][1] + $del_t_cost;
|
$sub_t_cost += 1;
|
||||||
|
}
|
||||||
|
|
||||||
$ins_t_cost = $m[$ii][$jj - 1][0] + 1;
|
$del_t_cost = $m[$ii - 1][$jj][0] + 1000;
|
||||||
$ins_p_cost = $m[$ii][$jj - 1][1] + $ins_t_cost;
|
if ($m[$ii - 1][$jj][1] != $T_D) {
|
||||||
|
$del_t_cost += 1;
|
||||||
|
}
|
||||||
|
|
||||||
if ($sub_p_cost <= $del_p_cost && $sub_p_cost <= $ins_p_cost) {
|
$ins_t_cost = $m[$ii][$jj - 1][0] + 1000;
|
||||||
|
if ($m[$ii][$jj - 1][1] != $T_I) {
|
||||||
|
$ins_t_cost += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($sub_t_cost <= $del_t_cost && $sub_t_cost <= $ins_t_cost) {
|
||||||
$m[$ii][$jj] = array(
|
$m[$ii][$jj] = array(
|
||||||
$sub_t_cost,
|
$sub_t_cost,
|
||||||
$sub_p_cost,
|
|
||||||
$sub_t);
|
$sub_t);
|
||||||
} else if ($ins_p_cost <= $del_p_cost) {
|
} else if ($ins_t_cost <= $del_t_cost) {
|
||||||
$m[$ii][$jj] = array(
|
$m[$ii][$jj] = array(
|
||||||
$ins_t_cost,
|
$ins_t_cost,
|
||||||
$ins_p_cost,
|
|
||||||
$T_I);
|
$T_I);
|
||||||
} else {
|
} else {
|
||||||
$m[$ii][$jj] = array(
|
$m[$ii][$jj] = array(
|
||||||
$del_t_cost,
|
$del_t_cost,
|
||||||
$del_p_cost,
|
|
||||||
$T_D);
|
$T_D);
|
||||||
}
|
}
|
||||||
} while ($jj++ < $nl);
|
} while ($jj++ < $nl);
|
||||||
|
@ -319,18 +325,18 @@ final class ArcanistDiffUtils {
|
||||||
$ii = $ol;
|
$ii = $ol;
|
||||||
$jj = $nl;
|
$jj = $nl;
|
||||||
do {
|
do {
|
||||||
$r = $m[$ii][$jj][2];
|
$r = $m[$ii][$jj][1];
|
||||||
$result .= $r;
|
$result .= $r;
|
||||||
switch ($r) {
|
switch ($r) {
|
||||||
case 's':
|
case $T_S:
|
||||||
case 'x':
|
case $T_X:
|
||||||
$ii--;
|
$ii--;
|
||||||
$jj--;
|
$jj--;
|
||||||
break;
|
break;
|
||||||
case 'i':
|
case $T_I:
|
||||||
$jj--;
|
$jj--;
|
||||||
break;
|
break;
|
||||||
case 'd':
|
case $T_D:
|
||||||
$ii--;
|
$ii--;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
96
src/difference/__tests__/ArcanistDiffUtilsTestCase.php
Normal file
96
src/difference/__tests__/ArcanistDiffUtilsTestCase.php
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright 2011 Facebook, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test cases for @{class:ArcanistDiffUtils}.
|
||||||
|
*
|
||||||
|
* @group testcase
|
||||||
|
*/
|
||||||
|
class ArcanistDiffUtilsTestCase extends ArcanistPhutilTestCase {
|
||||||
|
public function testLevenshtein() {
|
||||||
|
$tests = array(
|
||||||
|
array(
|
||||||
|
'a',
|
||||||
|
'b',
|
||||||
|
'x'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'kalrmr(array($b))',
|
||||||
|
'array($b)',
|
||||||
|
'dddddddssssssssds'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'array($b)',
|
||||||
|
'kalrmr(array($b))',
|
||||||
|
'iiiiiiissssssssis'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'zkalrmr(array($b))z',
|
||||||
|
'xarray($b)x',
|
||||||
|
'dddddddxsssssssssdx'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'xarray($b)x',
|
||||||
|
'zkalrmr(array($b))z',
|
||||||
|
'iiiiiiixsssssssssix'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'abcdefghi',
|
||||||
|
'abcdefghi',
|
||||||
|
'sssssssss'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'abcdefghi',
|
||||||
|
'abcdefghijkl',
|
||||||
|
'sssssssssiii'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'abcdefghijkl',
|
||||||
|
'abcdefghi',
|
||||||
|
'sssssssssddd'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'xyzabcdefghi',
|
||||||
|
'abcdefghi',
|
||||||
|
'dddsssssssss'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'abcdefghi',
|
||||||
|
'xyzabcdefghi',
|
||||||
|
'iiisssssssss'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'abcdefg',
|
||||||
|
'abxdxfg',
|
||||||
|
'ssxsxss'
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'private function a($a, $b) {',
|
||||||
|
'public function and($b, $c) {',
|
||||||
|
'siixsdddxsssssssssssiissxsssxsss'
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach ($tests as $test) {
|
||||||
|
$this->assertEqual(
|
||||||
|
$test[2],
|
||||||
|
ArcanistDiffUtils::buildLevenshteinDifferenceString($test[0], $test[1])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
13
src/difference/__tests__/__init__.php
Normal file
13
src/difference/__tests__/__init__.php
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* This file is automatically generated. Lint this module to rebuild it.
|
||||||
|
* @generated
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
phutil_require_module('arcanist', 'difference');
|
||||||
|
phutil_require_module('arcanist', 'unit/engine/phutil/testcase');
|
||||||
|
|
||||||
|
|
||||||
|
phutil_require_source('ArcanistDiffUtilsTestCase.php');
|
Loading…
Reference in a new issue