1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2024-11-10 00:42:40 +01:00

Use linear distance instead of square distance in levenshtein

Summary:
We used square distance that optimized for the wrong thing. Making the same
markers spread out instead of being together

Also added a very little cost for switching type. That will make diff types
stick together a bit more

Task ID: #623

Blame Rev:

Test Plan:
Ran my new unit test. And tested a few diff in phabricator

Revert Plan:

Tags:

Reviewers: epriestley

Reviewed By: epriestley

CC: aran, epriestley

Differential Revision: 1112
This commit is contained in:
Emil Hesslow 2011-11-15 10:43:36 -08:00
parent 16caf63d98
commit 5be54ec283
4 changed files with 145 additions and 28 deletions

View file

@ -29,6 +29,7 @@ phutil_register_library_map(array(
'ArcanistDiffParser' => 'parser/diff',
'ArcanistDiffParserTestCase' => 'parser/diff/__tests__',
'ArcanistDiffUtils' => 'difference',
'ArcanistDiffUtilsTestCase' => 'difference/__tests__',
'ArcanistDiffWorkflow' => 'workflow/diff',
'ArcanistDifferentialCommitMessage' => 'differential/commitmessage',
'ArcanistDifferentialCommitMessageParserException' => 'differential/commitmessage',
@ -110,6 +111,7 @@ phutil_register_library_map(array(
'ArcanistCommitWorkflow' => 'ArcanistBaseWorkflow',
'ArcanistCoverWorkflow' => 'ArcanistBaseWorkflow',
'ArcanistDiffParserTestCase' => 'ArcanistPhutilTestCase',
'ArcanistDiffUtilsTestCase' => 'ArcanistPhutilTestCase',
'ArcanistDiffWorkflow' => 'ArcanistBaseWorkflow',
'ArcanistDownloadWorkflow' => 'ArcanistBaseWorkflow',
'ArcanistExportWorkflow' => 'ArcanistBaseWorkflow',

View file

@ -204,7 +204,7 @@ final class ArcanistDiffUtils {
return array_values($runs);
}
private static function buildLevenshteinDifferenceString($o, $n) {
public static function buildLevenshteinDifferenceString($o, $n) {
$olt = strlen($o);
$nlt = strlen($n);
@ -256,25 +256,27 @@ final class ArcanistDiffUtils {
$ol = strlen($o);
$nl = strlen($n);
$m = array_fill(0, strlen($o) + 1, array_fill(0, strlen($n) + 1, array()));
$m = array_fill(0, $ol + 1, array_fill(0, $nl + 1, array()));
$T_D = 'd';
$T_I = 'i';
$T_S = 's';
$T_X = 'x';
$path = 0;
for ($ii = 0; $ii <= $ol; $ii++) {
$m[$ii][0][0] = $ii;
$m[$ii][0][1] = ($path += $ii);
$m[$ii][0][2] = $T_D;
$m[0][0] = array(
0,
null);
for ($ii = 1; $ii <= $ol; $ii++) {
$m[$ii][0] = array(
$ii * 1000,
$T_D);
}
$path = 0;
for ($jj = 0; $jj <= $nl; $jj++) {
$m[0][$jj][0] = $jj;
$m[0][$jj][1] = ($path += $jj);
$m[0][$jj][2] = $T_I;
for ($jj = 1; $jj <= $nl; $jj++) {
$m[0][$jj] = array(
$jj * 1000,
$T_I);
}
$ii = 1;
@ -285,31 +287,35 @@ final class ArcanistDiffUtils {
$sub_t_cost = $m[$ii - 1][$jj - 1][0] + 0;
$sub_t = $T_S;
} else {
$sub_t_cost = $m[$ii - 1][$jj - 1][0] + 2;
$sub_t_cost = $m[$ii - 1][$jj - 1][0] + 2000;
$sub_t = $T_X;
}
$sub_p_cost = $m[$ii - 1][$jj - 1][1] + $sub_t_cost;
$del_t_cost = $m[$ii - 1][$jj][0] + 1;
$del_p_cost = $m[$ii - 1][$jj][1] + $del_t_cost;
if ($m[$ii - 1][$jj - 1][1] != $sub_t) {
$sub_t_cost += 1;
}
$ins_t_cost = $m[$ii][$jj - 1][0] + 1;
$ins_p_cost = $m[$ii][$jj - 1][1] + $ins_t_cost;
$del_t_cost = $m[$ii - 1][$jj][0] + 1000;
if ($m[$ii - 1][$jj][1] != $T_D) {
$del_t_cost += 1;
}
if ($sub_p_cost <= $del_p_cost && $sub_p_cost <= $ins_p_cost) {
$ins_t_cost = $m[$ii][$jj - 1][0] + 1000;
if ($m[$ii][$jj - 1][1] != $T_I) {
$ins_t_cost += 1;
}
if ($sub_t_cost <= $del_t_cost && $sub_t_cost <= $ins_t_cost) {
$m[$ii][$jj] = array(
$sub_t_cost,
$sub_p_cost,
$sub_t);
} else if ($ins_p_cost <= $del_p_cost) {
} else if ($ins_t_cost <= $del_t_cost) {
$m[$ii][$jj] = array(
$ins_t_cost,
$ins_p_cost,
$T_I);
} else {
$m[$ii][$jj] = array(
$del_t_cost,
$del_p_cost,
$T_D);
}
} while ($jj++ < $nl);
@ -319,18 +325,18 @@ final class ArcanistDiffUtils {
$ii = $ol;
$jj = $nl;
do {
$r = $m[$ii][$jj][2];
$r = $m[$ii][$jj][1];
$result .= $r;
switch ($r) {
case 's':
case 'x':
case $T_S:
case $T_X:
$ii--;
$jj--;
break;
case 'i':
case $T_I:
$jj--;
break;
case 'd':
case $T_D:
$ii--;
break;
}

View file

@ -0,0 +1,96 @@
<?php
/*
* Copyright 2011 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Test cases for @{class:ArcanistDiffUtils}.
*
* @group testcase
*/
class ArcanistDiffUtilsTestCase extends ArcanistPhutilTestCase {
public function testLevenshtein() {
$tests = array(
array(
'a',
'b',
'x'
),
array(
'kalrmr(array($b))',
'array($b)',
'dddddddssssssssds'
),
array(
'array($b)',
'kalrmr(array($b))',
'iiiiiiissssssssis'
),
array(
'zkalrmr(array($b))z',
'xarray($b)x',
'dddddddxsssssssssdx'
),
array(
'xarray($b)x',
'zkalrmr(array($b))z',
'iiiiiiixsssssssssix'
),
array(
'abcdefghi',
'abcdefghi',
'sssssssss'
),
array(
'abcdefghi',
'abcdefghijkl',
'sssssssssiii'
),
array(
'abcdefghijkl',
'abcdefghi',
'sssssssssddd'
),
array(
'xyzabcdefghi',
'abcdefghi',
'dddsssssssss'
),
array(
'abcdefghi',
'xyzabcdefghi',
'iiisssssssss'
),
array(
'abcdefg',
'abxdxfg',
'ssxsxss'
),
array(
'private function a($a, $b) {',
'public function and($b, $c) {',
'siixsdddxsssssssssssiissxsssxsss'
),
);
foreach ($tests as $test) {
$this->assertEqual(
$test[2],
ArcanistDiffUtils::buildLevenshteinDifferenceString($test[0], $test[1])
);
}
}
}

View file

@ -0,0 +1,13 @@
<?php
/**
* This file is automatically generated. Lint this module to rebuild it.
* @generated
*/
phutil_require_module('arcanist', 'difference');
phutil_require_module('arcanist', 'unit/engine/phutil/testcase');
phutil_require_source('ArcanistDiffUtilsTestCase.php');