diff --git a/src/applications/differential/parser/changeset/DifferentialChangesetParser.php b/src/applications/differential/parser/changeset/DifferentialChangesetParser.php
index de48e8e970..b2e6b12973 100644
--- a/src/applications/differential/parser/changeset/DifferentialChangesetParser.php
+++ b/src/applications/differential/parser/changeset/DifferentialChangesetParser.php
@@ -660,36 +660,54 @@ class DifferentialChangesetParser {
}
}
- protected function lineWrap($l) {
+ /**
+ * Hard-wrap a piece of UTF-8 text with embedded HTML tags and entities.
+ *
+ * @param string An HTML string with tags and entities.
+ * @return string Hard-wrapped string.
+ */
+ protected function lineWrap($line) {
$c = 0;
- $len = strlen($l);
- $ins = array();
+ $break_here = array();
+
+ // Convert the UTF-8 string into a list of UTF-8 characters.
+ $vector = phutil_utf8v($line);
+ $len = count($vector);
+ $byte_pos = 0;
for ($ii = 0; $ii < $len; ++$ii) {
- if ($l[$ii] == '&') {
+ // An ampersand indicates an HTML entity; consume the whole thing (until
+ // ";") but treat it all as one character.
+ if ($vector[$ii] == '&') {
do {
++$ii;
- } while ($l[$ii] != ';');
+ } while ($vector[$ii] != ';');
++$c;
- } else if ($l[$ii] == '<') {
+ // An "<" indicates an HTML tag, consume the whole thing but don't treat
+ // it as a character.
+ } else if ($vector[$ii] == '<') {
do {
++$ii;
- } while ($l[$ii] != '>');
+ } while ($vector[$ii] != '>');
} else {
++$c;
}
+
+ // Keep track of where we need to break the string later.
if ($c == $this->lineWidth) {
- $ins[] = ($ii + 1);
+ $break_here[$ii] = true;
$c = 0;
}
}
- while (($pos = array_pop($ins))) {
- $l = substr_replace(
- $l,
- "\xE2\xAC\x85
",
- $pos,
- 0);
+
+ $result = array();
+ foreach ($vector as $ii => $char) {
+ $result[] = $char;
+ if (isset($break_here[$ii])) {
+ $result[] = "!
";
+ }
}
- return $l;
+
+ return implode('', $result);
}