'; $highlight_c = ''; $is_html = false; if ($str instanceof PhutilSafeHTML) { $is_html = true; $str = $str->getHTMLContent(); } $n = strlen($str); for ($i = 0; $i < $n; $i++) { if ($p == $e) { do { if (empty($intra_stack)) { $buf .= substr($str, $i); break 2; } $stack = array_shift($intra_stack); $s = $e; $e += $stack[1]; } while ($stack[0] == 0); } if (!$highlight && !$tag && !$ent && $p == $s) { $buf .= $highlight_o; $highlight = true; } if ($str[$i] == '<') { $tag = true; if ($highlight) { $buf .= $highlight_c; } } if (!$tag) { if ($str[$i] == '&') { $ent = true; } if ($ent && $str[$i] == ';') { $ent = false; } if (!$ent) { $p++; } } $buf .= $str[$i]; if ($tag && $str[$i] == '>') { $tag = false; if ($highlight) { $buf .= $highlight_o; } } if ($highlight && ($p == $e || $i == $n - 1)) { $buf .= $highlight_c; $highlight = false; } } if ($is_html) { return phutil_safe_html($buf); } return $buf; } private static function collapseIntralineRuns($runs) { $count = count($runs); for ($ii = 0; $ii < $count - 1; $ii++) { if ($runs[$ii][0] == $runs[$ii + 1][0]) { $runs[$ii + 1][1] += $runs[$ii][1]; unset($runs[$ii]); } } return array_values($runs); } public static function buildLevenshteinDifferenceString($o, $n) { $olt = strlen($o); $nlt = strlen($n); if (!$olt) { return str_repeat('i', $nlt); } if (!$nlt) { return str_repeat('d', $olt); } $min = min($olt, $nlt); $t_start = microtime(true); $pre = 0; while ($pre < $min && $o[$pre] == $n[$pre]) { $pre++; } $end = 0; while ($end < $min && $o[($olt - 1) - $end] == $n[($nlt - 1) - $end]) { $end++; } if ($end + $pre >= $min) { $end = min($end, $min - $pre); $prefix = str_repeat('s', $pre); $suffix = str_repeat('s', $end); $infix = null; if ($olt > $nlt) { $infix = str_repeat('d', $olt - ($end + $pre)); } else if ($nlt > $olt) { $infix = str_repeat('i', $nlt - ($end + $pre)); } return $prefix.$infix.$suffix; } if ($min - ($end + $pre) > 80) { $max = max($olt, $nlt); return str_repeat('x', $min) . str_repeat($olt < $nlt ? 'i' : 'd', $max - $min); } $prefix = str_repeat('s', $pre); $suffix = str_repeat('s', $end); $o = substr($o, $pre, $olt - $end - $pre); $n = substr($n, $pre, $nlt - $end - $pre); $ol = strlen($o); $nl = strlen($n); $m = array_fill(0, $ol + 1, array_fill(0, $nl + 1, array())); $t_d = 'd'; $t_i = 'i'; $t_s = 's'; $t_x = 'x'; $m[0][0] = array( 0, null); for ($ii = 1; $ii <= $ol; $ii++) { $m[$ii][0] = array( $ii * 1000, $t_d); } for ($jj = 1; $jj <= $nl; $jj++) { $m[0][$jj] = array( $jj * 1000, $t_i); } $ii = 1; do { $jj = 1; do { if ($o[$ii - 1] == $n[$jj - 1]) { $sub_t_cost = $m[$ii - 1][$jj - 1][0] + 0; $sub_t = $t_s; } else { $sub_t_cost = $m[$ii - 1][$jj - 1][0] + 2000; $sub_t = $t_x; } if ($m[$ii - 1][$jj - 1][1] != $sub_t) { $sub_t_cost += 1; } $del_t_cost = $m[$ii - 1][$jj][0] + 1000; if ($m[$ii - 1][$jj][1] != $t_d) { $del_t_cost += 1; } $ins_t_cost = $m[$ii][$jj - 1][0] + 1000; if ($m[$ii][$jj - 1][1] != $t_i) { $ins_t_cost += 1; } if ($sub_t_cost <= $del_t_cost && $sub_t_cost <= $ins_t_cost) { $m[$ii][$jj] = array( $sub_t_cost, $sub_t); } else if ($ins_t_cost <= $del_t_cost) { $m[$ii][$jj] = array( $ins_t_cost, $t_i); } else { $m[$ii][$jj] = array( $del_t_cost, $t_d); } } while ($jj++ < $nl); } while ($ii++ < $ol); $result = ''; $ii = $ol; $jj = $nl; do { $r = $m[$ii][$jj][1]; $result .= $r; switch ($r) { case $t_s: case $t_x: $ii--; $jj--; break; case $t_i: $jj--; break; case $t_d: $ii--; break; } } while ($ii || $jj); return $prefix.strrev($result).$suffix; } public static function generateUTF8IntralineDiff($o, $n) { if (!strlen($o) || !strlen($n)) { return array( array(array(0, strlen($o))), array(array(0, strlen($n))) ); } // Breaking both the strings into their component characters $old_characters = phutil_utf8v($o); $new_characters = phutil_utf8v($n); $old_count = count($old_characters); $new_count = count($new_characters); $prefix_match_length = 0; $suffix_match_length = 0; // Prefix matching. for ($i = 0; $i < $old_count; $i++) { if ($old_characters[$i] != $new_characters[$i]) { $prefix_match_length = $i; break; } } // Return no change. if ($old_count == $new_count && $i == $old_count) { return array( array(array(0, strlen($o))), array(array(0, strlen($n))) ); } // Suffix Matching. $i = $old_count - 1; $j = $new_count - 1; while ($i >= 0 && $j >= 0) { if ($old_characters[$i] != $new_characters[$j]) { break; } $i--; $j--; $suffix_match_length++; } // Just a temporary fix for the edge cases where, the strings differ // only at beginnning, only in the end and both at the beginning and end. if (!$prefix_match_length || !$suffix_match_length) { return array( array(array(1, strlen($o))), array(array(1, strlen($n))) ); } $old_length = strlen($o); $new_length = strlen($n); return array( array( array(0, $prefix_match_length), array(1, $old_length - $prefix_match_length - $suffix_match_length), array(0, $suffix_match_length), ), array( array(0, $prefix_match_length), array(1, $new_length - $prefix_match_length - $suffix_match_length), array(0, $suffix_match_length), ) ); } }