1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-13 00:01:03 +01:00

Make Differential linewrap utf-8 aware

Summary: Differential uses a byte-oriented linewrap algorithm. Instead, use a character-oriented one which will handle utf-8 properly.

This implies a very slightly performance hit but we only run this code for lines which need to wrap, and the results get cached. It took about ~2.5ms for the test file on my machine. I'll keep an eye on it but I think it's currently a manageable cost.

Test Plan: Diffed this file: https://secure.phabricator.com/P43
...and got it to render like this: https://secure.phabricator.com/file/info/PHID-FILE-331ac241bede705b193b/

To do so, I had to disable the un-utf8 block which we can't actually do yet because of intraline diff, but it shows that once we can get rid of that it works completely correctly. It will "sort of" work in the meantime (nothing terrible happens).

Reviewers: jungejason, aran, tuomaspelkonen

CC:

Differential Revision: 513
This commit is contained in:
epriestley 2011-06-24 09:25:32 -07:00
parent a632b220a8
commit e5a036e8c9

View file

@ -660,36 +660,54 @@ class DifferentialChangesetParser {
} }
} }
protected function lineWrap($l) { /**
* Hard-wrap a piece of UTF-8 text with embedded HTML tags and entities.
*
* @param string An HTML string with tags and entities.
* @return string Hard-wrapped string.
*/
protected function lineWrap($line) {
$c = 0; $c = 0;
$len = strlen($l); $break_here = array();
$ins = array();
// Convert the UTF-8 string into a list of UTF-8 characters.
$vector = phutil_utf8v($line);
$len = count($vector);
$byte_pos = 0;
for ($ii = 0; $ii < $len; ++$ii) { for ($ii = 0; $ii < $len; ++$ii) {
if ($l[$ii] == '&') { // An ampersand indicates an HTML entity; consume the whole thing (until
// ";") but treat it all as one character.
if ($vector[$ii] == '&') {
do { do {
++$ii; ++$ii;
} while ($l[$ii] != ';'); } while ($vector[$ii] != ';');
++$c; ++$c;
} else if ($l[$ii] == '<') { // An "<" indicates an HTML tag, consume the whole thing but don't treat
// it as a character.
} else if ($vector[$ii] == '<') {
do { do {
++$ii; ++$ii;
} while ($l[$ii] != '>'); } while ($vector[$ii] != '>');
} else { } else {
++$c; ++$c;
} }
// Keep track of where we need to break the string later.
if ($c == $this->lineWidth) { if ($c == $this->lineWidth) {
$ins[] = ($ii + 1); $break_here[$ii] = true;
$c = 0; $c = 0;
} }
} }
while (($pos = array_pop($ins))) {
$l = substr_replace( $result = array();
$l, foreach ($vector as $ii => $char) {
"<span class=\"over-the-line\">\xE2\xAC\x85</span><br />", $result[] = $char;
$pos, if (isset($break_here[$ii])) {
0); $result[] = "<span class=\"over-the-line\">!</span><br />";
} }
return $l; }
return implode('', $result);
} }