1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-23 21:18:19 +01:00

Make Differential linewrap utf-8 aware

Summary:
Differential uses a byte-oriented linewrap algorithm. Instead, use a
character-oriented one which will handle utf-8 properly.

This implies a very slightly performance hit but we only run this code for lines
which need to wrap, and the results get cached. It took about ~2.5ms for the
test file on my machine. I'll keep an eye on it but I think it's currently a
manageable cost.

Test Plan:
Diffed this file: https://secure.phabricator.com/P43
...and got it to render like this:
https://secure.phabricator.com/file/info/PHID-FILE-331ac241bede705b193b/

To do so, I had to disable the un-utf8 block which we can't actually do yet
because of intraline diff, but it shows that once we can get rid of that it
works completely correctly. It will "sort of" work in the meantime (nothing
terrible happens).

Reviewers: jungejason, aran, tuomaspelkonen
CC: aran, epriestley
Differential Revision: 513
This commit is contained in:
epriestley 2011-06-24 09:25:32 -07:00
parent 1b55c4bdc9
commit 5cfc14cb43

View file

@ -660,36 +660,54 @@ class DifferentialChangesetParser {
}
}
protected function lineWrap($l) {
/**
* Hard-wrap a piece of UTF-8 text with embedded HTML tags and entities.
*
* @param string An HTML string with tags and entities.
* @return string Hard-wrapped string.
*/
protected function lineWrap($line) {
$c = 0;
$len = strlen($l);
$ins = array();
$break_here = array();
// Convert the UTF-8 string into a list of UTF-8 characters.
$vector = phutil_utf8v($line);
$len = count($vector);
$byte_pos = 0;
for ($ii = 0; $ii < $len; ++$ii) {
if ($l[$ii] == '&') {
// An ampersand indicates an HTML entity; consume the whole thing (until
// ";") but treat it all as one character.
if ($vector[$ii] == '&') {
do {
++$ii;
} while ($l[$ii] != ';');
} while ($vector[$ii] != ';');
++$c;
} else if ($l[$ii] == '<') {
// An "<" indicates an HTML tag, consume the whole thing but don't treat
// it as a character.
} else if ($vector[$ii] == '<') {
do {
++$ii;
} while ($l[$ii] != '>');
} while ($vector[$ii] != '>');
} else {
++$c;
}
// Keep track of where we need to break the string later.
if ($c == $this->lineWidth) {
$ins[] = ($ii + 1);
$break_here[$ii] = true;
$c = 0;
}
}
while (($pos = array_pop($ins))) {
$l = substr_replace(
$l,
"<span class=\"over-the-line\">\xE2\xAC\x85</span><br />",
$pos,
0);
$result = array();
foreach ($vector as $ii => $char) {
$result[] = $char;
if (isset($break_here[$ii])) {
$result[] = "<span class=\"over-the-line\">!</span><br />";
}
}
return $l;
return implode('', $result);
}