2011-01-25 01:46:19 +01:00
|
|
|
<?php
|
|
|
|
|
2011-02-19 20:36:08 +01:00
|
|
|
/**
|
|
|
|
* Dumping ground for diff- and diff-algorithm-related miscellany.
|
|
|
|
*
|
|
|
|
* @group diff
|
|
|
|
*/
|
2011-01-25 01:46:19 +01:00
|
|
|
final class ArcanistDiffUtils {
|
|
|
|
|
2011-10-24 19:33:34 +02:00
|
|
|
/**
|
|
|
|
* Make a best-effort attempt to determine if a file is definitely binary.
|
|
|
|
*
|
|
|
|
* @return bool If true, the file is almost certainly binary. If false, the
|
|
|
|
* file might still be binary but is subtle about it.
|
|
|
|
*/
|
|
|
|
public static function isHeuristicBinaryFile($data) {
|
|
|
|
// Detect if a file is binary according to the Git heuristic, which is the
|
|
|
|
// presence of NULL ("\0") bytes. Git only examines the first "few" bytes of
|
|
|
|
// each file (8KB or so) as an optimization, but we don't have a reasonable
|
|
|
|
// equivalent in PHP, so just look at all of it.
|
|
|
|
return (strpos($data, "\0") !== false);
|
|
|
|
}
|
|
|
|
|
2011-01-25 01:46:19 +01:00
|
|
|
public static function renderDifferences(
|
|
|
|
$old,
|
|
|
|
$new,
|
|
|
|
$context_lines = 3,
|
|
|
|
$diff_options = "-L 'Old Value' -L 'New Value'") {
|
|
|
|
|
|
|
|
if ((string)$old === (string)$new) {
|
|
|
|
$new .= "\n(Old and new values are identical.)";
|
|
|
|
}
|
|
|
|
|
|
|
|
$file_old = new TempFile();
|
|
|
|
$file_new = new TempFile();
|
|
|
|
|
|
|
|
Filesystem::writeFile($file_old, (string)$old."\n");
|
|
|
|
Filesystem::writeFile($file_new, (string)$new."\n");
|
|
|
|
|
|
|
|
list($err, $stdout) = exec_manual(
|
2013-03-14 17:14:50 +01:00
|
|
|
'diff %C -U %s %s %s',
|
2013-02-02 22:07:39 +01:00
|
|
|
$diff_options,
|
|
|
|
$context_lines,
|
2011-01-25 01:46:19 +01:00
|
|
|
$file_old,
|
|
|
|
$file_new);
|
|
|
|
|
|
|
|
return $stdout;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static function generateIntralineDiff($o, $n) {
|
2013-05-23 23:38:09 +02:00
|
|
|
$ol = strlen($o);
|
|
|
|
$nl = strlen($n);
|
|
|
|
|
|
|
|
if (($o === $n) || !$ol || !$nl) {
|
2011-01-25 01:46:19 +01:00
|
|
|
return array(
|
2013-05-23 23:38:09 +02:00
|
|
|
array(array(0, $ol)),
|
|
|
|
array(array(0, $nl))
|
2011-01-25 01:46:19 +01:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
return self::computeIntralineEdits($o, $n);
|
2011-01-25 01:46:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
public static function applyIntralineDiff($str, $intra_stack) {
|
|
|
|
$buf = '';
|
|
|
|
$p = $s = $e = 0; // position, start, end
|
|
|
|
$highlight = $tag = $ent = false;
|
|
|
|
$highlight_o = '<span class="bright">';
|
|
|
|
$highlight_c = '</span>';
|
|
|
|
|
2013-02-13 09:33:14 +01:00
|
|
|
$is_html = false;
|
|
|
|
if ($str instanceof PhutilSafeHTML) {
|
|
|
|
$is_html = true;
|
|
|
|
$str = $str->getHTMLContent();
|
|
|
|
}
|
|
|
|
|
2011-01-25 01:46:19 +01:00
|
|
|
$n = strlen($str);
|
|
|
|
for ($i = 0; $i < $n; $i++) {
|
|
|
|
|
|
|
|
if ($p == $e) {
|
|
|
|
do {
|
|
|
|
if (empty($intra_stack)) {
|
|
|
|
$buf .= substr($str, $i);
|
|
|
|
break 2;
|
|
|
|
}
|
|
|
|
$stack = array_shift($intra_stack);
|
|
|
|
$s = $e;
|
|
|
|
$e += $stack[1];
|
|
|
|
} while ($stack[0] == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!$highlight && !$tag && !$ent && $p == $s) {
|
|
|
|
$buf .= $highlight_o;
|
|
|
|
$highlight = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($str[$i] == '<') {
|
|
|
|
$tag = true;
|
|
|
|
if ($highlight) {
|
|
|
|
$buf .= $highlight_c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!$tag) {
|
|
|
|
if ($str[$i] == '&') {
|
|
|
|
$ent = true;
|
|
|
|
}
|
|
|
|
if ($ent && $str[$i] == ';') {
|
|
|
|
$ent = false;
|
|
|
|
}
|
|
|
|
if (!$ent) {
|
|
|
|
$p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$buf .= $str[$i];
|
|
|
|
|
|
|
|
if ($tag && $str[$i] == '>') {
|
|
|
|
$tag = false;
|
|
|
|
if ($highlight) {
|
|
|
|
$buf .= $highlight_o;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($highlight && ($p == $e || $i == $n - 1)) {
|
|
|
|
$buf .= $highlight_c;
|
|
|
|
$highlight = false;
|
|
|
|
}
|
|
|
|
}
|
2013-02-13 09:33:14 +01:00
|
|
|
|
|
|
|
if ($is_html) {
|
|
|
|
return phutil_safe_html($buf);
|
|
|
|
}
|
|
|
|
|
2011-01-25 01:46:19 +01:00
|
|
|
return $buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static function collapseIntralineRuns($runs) {
|
|
|
|
$count = count($runs);
|
|
|
|
for ($ii = 0; $ii < $count - 1; $ii++) {
|
|
|
|
if ($runs[$ii][0] == $runs[$ii + 1][0]) {
|
|
|
|
$runs[$ii + 1][1] += $runs[$ii][1];
|
|
|
|
unset($runs[$ii]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return array_values($runs);
|
|
|
|
}
|
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
public static function generateEditString(array $ov, array $nv, $max = 80) {
|
2013-05-23 23:36:42 +02:00
|
|
|
return id(new PhutilEditDistanceMatrix())
|
|
|
|
->setComputeString(true)
|
2013-05-23 23:38:09 +02:00
|
|
|
->setAlterCost(1 / ($max * 2))
|
2013-05-23 23:36:42 +02:00
|
|
|
->setReplaceCost(2)
|
2013-05-23 23:38:09 +02:00
|
|
|
->setMaximumLength($max)
|
2013-05-23 23:36:42 +02:00
|
|
|
->setSequences($ov, $nv)
|
|
|
|
->getEditString();
|
2011-01-25 01:46:19 +01:00
|
|
|
}
|
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
public static function computeIntralineEdits($o, $n) {
|
|
|
|
if (preg_match('/[\x80-\xFF]/', $o.$n)) {
|
|
|
|
$ov = phutil_utf8v_combined($o);
|
|
|
|
$nv = phutil_utf8v_combined($n);
|
|
|
|
$multibyte = true;
|
|
|
|
} else {
|
|
|
|
$ov = str_split($o);
|
|
|
|
$nv = str_split($n);
|
|
|
|
$multibyte = false;
|
2013-05-21 22:06:14 +02:00
|
|
|
}
|
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
$result = self::generateEditString($ov, $nv);
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
// Smooth the string out, by replacing short runs of similar characters
|
|
|
|
// with 'x' operations. This makes the result more readable to humans, since
|
|
|
|
// there are fewer choppy runs of short added and removed substrings.
|
|
|
|
do {
|
|
|
|
$original = $result;
|
|
|
|
$result = preg_replace(
|
|
|
|
'/([xdi])(s{3})([xdi])/',
|
|
|
|
'$1xxx$3',
|
|
|
|
$result);
|
|
|
|
$result = preg_replace(
|
|
|
|
'/([xdi])(s{2})([xdi])/',
|
|
|
|
'$1xx$3',
|
|
|
|
$result);
|
|
|
|
$result = preg_replace(
|
|
|
|
'/([xdi])(s{1})([xdi])/',
|
|
|
|
'$1x$3',
|
|
|
|
$result);
|
|
|
|
} while ($result != $original);
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
// Now we have a character-based description of the edit. We need to
|
|
|
|
// convert into a byte-based description. Walk through the edit string and
|
|
|
|
// adjust each operation to reflect the number of bytes in the underlying
|
|
|
|
// character.
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
$o_pos = 0;
|
|
|
|
$n_pos = 0;
|
|
|
|
$result_len = strlen($result);
|
|
|
|
$o_run = array();
|
|
|
|
$n_run = array();
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
$old_char_len = 1;
|
|
|
|
$new_char_len = 1;
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
for ($ii = 0; $ii < $result_len; $ii++) {
|
|
|
|
$c = $result[$ii];
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
if ($multibyte) {
|
|
|
|
$old_char_len = strlen($ov[$o_pos]);
|
|
|
|
$new_char_len = strlen($nv[$n_pos]);
|
2013-05-21 22:06:14 +02:00
|
|
|
}
|
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
switch ($c) {
|
|
|
|
case 's':
|
|
|
|
case 'x':
|
|
|
|
$byte_o = $old_char_len;
|
|
|
|
$byte_n = $new_char_len;
|
|
|
|
$o_pos++;
|
|
|
|
$n_pos++;
|
|
|
|
break;
|
|
|
|
case 'i':
|
|
|
|
$byte_o = 0;
|
|
|
|
$byte_n = $new_char_len;
|
|
|
|
$n_pos++;
|
|
|
|
break;
|
|
|
|
case 'd':
|
|
|
|
$byte_o = $old_char_len;
|
|
|
|
$byte_n = 0;
|
|
|
|
$o_pos++;
|
|
|
|
break;
|
|
|
|
}
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
if ($byte_o) {
|
|
|
|
if ($c == 's') {
|
|
|
|
$o_run[] = array(0, $byte_o);
|
|
|
|
} else {
|
|
|
|
$o_run[] = array(1, $byte_o);
|
|
|
|
}
|
|
|
|
}
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
if ($byte_n) {
|
|
|
|
if ($c == 's') {
|
|
|
|
$n_run[] = array(0, $byte_n);
|
|
|
|
} else {
|
|
|
|
$n_run[] = array(1, $byte_n);
|
|
|
|
}
|
|
|
|
}
|
2013-05-21 22:06:14 +02:00
|
|
|
}
|
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
$o_run = self::collapseIntralineRuns($o_run);
|
|
|
|
$n_run = self::collapseIntralineRuns($n_run);
|
2013-05-21 22:06:14 +02:00
|
|
|
|
2013-05-23 23:38:09 +02:00
|
|
|
return array($o_run, $n_run);
|
2013-05-21 22:06:14 +02:00
|
|
|
}
|
|
|
|
|
2011-01-25 01:46:19 +01:00
|
|
|
}
|