From 18e34d06bcb589127c9db621a41a20a319fb65ba Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 24 Jun 2011 10:11:43 -0700 Subject: [PATCH] Disable intraline diff highlighting algorithm for lines which contain UTF-8 text Summary: This is sort of cheating, but just have this feature disable itself if the input contains multibyte UTF-8 characters. We can clean it up in the future, maybe when we have better utf8 tools. This means that all the algorithms are safe to pass utf8 to, so we can get rid of all the "" silliness. Test Plan: Added a UTF8 character to a line, diffed it out, and got the entire line highlighted as changed. See: https://secure.phabricator.com/file/view/PHID-FILE-70fb54eb3f88dc057ab3/ Reviewers: jungejason, aran, tuomaspelkonen CC: Differential Revision: 514 --- src/difference/ArcanistDiffUtils.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php index e1534cfb..9949d464 100644 --- a/src/difference/ArcanistDiffUtils.php +++ b/src/difference/ArcanistDiffUtils.php @@ -55,6 +55,16 @@ final class ArcanistDiffUtils { ); } + // This algorithm is byte-oriented and thus not safe for UTF-8, so just + // mark all the text as changed if either string has multibyte characters + // in it. TODO: Fix this so that this algorithm is UTF-8 aware. + if (preg_match('/[\x80-\xFF]/', $o.$n)) { + return array( + array(array(1, strlen($o))), + array(array(1, strlen($n))), + ); + } + $result = self::buildLevenshteinDifferenceString($o, $n); do {