mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-13 00:01:03 +01:00
Make Differential linewrap utf-8 aware
Summary: Differential uses a byte-oriented linewrap algorithm. Instead, use a character-oriented one which will handle utf-8 properly. This implies a very slightly performance hit but we only run this code for lines which need to wrap, and the results get cached. It took about ~2.5ms for the test file on my machine. I'll keep an eye on it but I think it's currently a manageable cost. Test Plan: Diffed this file: https://secure.phabricator.com/P43 ...and got it to render like this: https://secure.phabricator.com/file/info/PHID-FILE-331ac241bede705b193b/ To do so, I had to disable the un-utf8 block which we can't actually do yet because of intraline diff, but it shows that once we can get rid of that it works completely correctly. It will "sort of" work in the meantime (nothing terrible happens). Reviewers: jungejason, aran, tuomaspelkonen CC: Differential Revision: 513
This commit is contained in:
parent
a632b220a8
commit
e5a036e8c9
1 changed files with 33 additions and 15 deletions
|
@ -660,36 +660,54 @@ class DifferentialChangesetParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function lineWrap($l) {
|
/**
|
||||||
|
* Hard-wrap a piece of UTF-8 text with embedded HTML tags and entities.
|
||||||
|
*
|
||||||
|
* @param string An HTML string with tags and entities.
|
||||||
|
* @return string Hard-wrapped string.
|
||||||
|
*/
|
||||||
|
protected function lineWrap($line) {
|
||||||
$c = 0;
|
$c = 0;
|
||||||
$len = strlen($l);
|
$break_here = array();
|
||||||
$ins = array();
|
|
||||||
|
// Convert the UTF-8 string into a list of UTF-8 characters.
|
||||||
|
$vector = phutil_utf8v($line);
|
||||||
|
$len = count($vector);
|
||||||
|
$byte_pos = 0;
|
||||||
for ($ii = 0; $ii < $len; ++$ii) {
|
for ($ii = 0; $ii < $len; ++$ii) {
|
||||||
if ($l[$ii] == '&') {
|
// An ampersand indicates an HTML entity; consume the whole thing (until
|
||||||
|
// ";") but treat it all as one character.
|
||||||
|
if ($vector[$ii] == '&') {
|
||||||
do {
|
do {
|
||||||
++$ii;
|
++$ii;
|
||||||
} while ($l[$ii] != ';');
|
} while ($vector[$ii] != ';');
|
||||||
++$c;
|
++$c;
|
||||||
} else if ($l[$ii] == '<') {
|
// An "<" indicates an HTML tag, consume the whole thing but don't treat
|
||||||
|
// it as a character.
|
||||||
|
} else if ($vector[$ii] == '<') {
|
||||||
do {
|
do {
|
||||||
++$ii;
|
++$ii;
|
||||||
} while ($l[$ii] != '>');
|
} while ($vector[$ii] != '>');
|
||||||
} else {
|
} else {
|
||||||
++$c;
|
++$c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Keep track of where we need to break the string later.
|
||||||
if ($c == $this->lineWidth) {
|
if ($c == $this->lineWidth) {
|
||||||
$ins[] = ($ii + 1);
|
$break_here[$ii] = true;
|
||||||
$c = 0;
|
$c = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (($pos = array_pop($ins))) {
|
|
||||||
$l = substr_replace(
|
$result = array();
|
||||||
$l,
|
foreach ($vector as $ii => $char) {
|
||||||
"<span class=\"over-the-line\">\xE2\xAC\x85</span><br />",
|
$result[] = $char;
|
||||||
$pos,
|
if (isset($break_here[$ii])) {
|
||||||
0);
|
$result[] = "<span class=\"over-the-line\">!</span><br />";
|
||||||
}
|
}
|
||||||
return $l;
|
}
|
||||||
|
|
||||||
|
return implode('', $result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue