diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
index 8d3e3f1fe8..3e9d57d4c5 100644
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -662,6 +662,7 @@ phutil_register_library_map(array(
'DifferentialSubscribersCommitMessageField' => 'applications/differential/field/DifferentialSubscribersCommitMessageField.php',
'DifferentialSummaryCommitMessageField' => 'applications/differential/field/DifferentialSummaryCommitMessageField.php',
'DifferentialSummaryField' => 'applications/differential/customfield/DifferentialSummaryField.php',
+ 'DifferentialTabReplacementTestCase' => 'applications/differential/parser/__tests__/DifferentialTabReplacementTestCase.php',
'DifferentialTagsCommitMessageField' => 'applications/differential/field/DifferentialTagsCommitMessageField.php',
'DifferentialTasksCommitMessageField' => 'applications/differential/field/DifferentialTasksCommitMessageField.php',
'DifferentialTestPlanCommitMessageField' => 'applications/differential/field/DifferentialTestPlanCommitMessageField.php',
@@ -6332,6 +6333,7 @@ phutil_register_library_map(array(
'DifferentialSubscribersCommitMessageField' => 'DifferentialCommitMessageField',
'DifferentialSummaryCommitMessageField' => 'DifferentialCommitMessageField',
'DifferentialSummaryField' => 'DifferentialCoreCustomField',
+ 'DifferentialTabReplacementTestCase' => 'PhabricatorTestCase',
'DifferentialTagsCommitMessageField' => 'DifferentialCommitMessageField',
'DifferentialTasksCommitMessageField' => 'DifferentialCommitMessageField',
'DifferentialTestPlanCommitMessageField' => 'DifferentialCommitMessageField',
diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php
index 8ed6d80eed..9843ff3d5d 100644
--- a/src/applications/differential/parser/DifferentialChangesetParser.php
+++ b/src/applications/differential/parser/DifferentialChangesetParser.php
@@ -1456,9 +1456,10 @@ final class DifferentialChangesetParser extends Phobject {
$line = phutil_string_cast($line);
- if (strpos($line, "\t") !== false) {
- $line = $this->replaceTabsWithSpaces($line);
- }
+ // TODO: This should be flexible, eventually.
+ $tab_width = 2;
+
+ $line = self::replaceTabsWithSpaces($line, $tab_width);
$line = str_replace($search, $replace, $line);
if ($is_html) {
@@ -1543,13 +1544,9 @@ final class DifferentialChangesetParser extends Phobject {
return $rules;
}
- private function replaceTabsWithSpaces($line) {
- // TODO: This should be flexible, eventually.
- $tab_width = 2;
-
- static $tags;
- if ($tags === null) {
- $tags = array();
+ public static function replaceTabsWithSpaces($line, $tab_width) {
+ static $tags = array();
+ if (empty($tags[$tab_width])) {
for ($ii = 1; $ii <= $tab_width; $ii++) {
$tag = phutil_tag(
'span',
@@ -1562,42 +1559,120 @@ final class DifferentialChangesetParser extends Phobject {
}
}
- // If the line is particularly long, don't try to vectorize it. Use a
- // faster approximation of the correct tabstop expansion instead. This
- // usually still arrives at the right result.
- if (strlen($line) > 256) {
- return str_replace("\t", $tags[$tab_width], $line);
+ // Expand all prefix tabs until we encounter any non-tab character. This
+ // is cheap and often immediately produces the correct result with no
+ // further work (and, particularly, no need to handle any unicode cases).
+
+ $len = strlen($line);
+
+ $head = 0;
+ for ($head = 0; $head < $len; $head++) {
+ $char = $line[$head];
+ if ($char !== "\t") {
+ break;
+ }
+ }
+
+ if ($head) {
+ if (empty($tags[$tab_width * $head])) {
+ $tags[$tab_width * $head] = str_repeat($tags[$tab_width], $head);
+ }
+ $prefix = $tags[$tab_width * $head];
+ $line = substr($line, $head);
+ } else {
+ $prefix = '';
+ }
+
+ // If we have no remaining tabs elsewhere in the string after taking care
+ // of all the prefix tabs, we're done.
+ if (strpos($line, "\t") === false) {
+ return $prefix.$line;
+ }
+
+ $len = strlen($line);
+
+ // If the line is particularly long, don't try to do anything special with
+ // it. Use a faster approximation of the correct tabstop expansion instead.
+ // This usually still arrives at the right result.
+ if ($len > 256) {
+ return $prefix.str_replace("\t", $tags[$tab_width], $line);
}
- $line = phutil_utf8v_combined($line);
$in_tag = false;
$pos = 0;
- foreach ($line as $key => $char) {
- if ($char === '<') {
- $in_tag = true;
- continue;
+
+ // See PHI1210. If the line only has single-byte characters, we don't need
+ // to vectorize it and can avoid an expensive UTF8 call.
+
+ $fast_path = preg_match('/^[\x01-\x7F]*\z/', $line);
+ if ($fast_path) {
+ $replace = array();
+ for ($ii = 0; $ii < $len; $ii++) {
+ $char = $line[$ii];
+ if ($char === '>') {
+ $in_tag = false;
+ continue;
+ }
+
+ if ($in_tag) {
+ continue;
+ }
+
+ if ($char === '<') {
+ $in_tag = true;
+ continue;
+ }
+
+ if ($char === "\t") {
+ $count = $tab_width - ($pos % $tab_width);
+ $pos += $count;
+ $replace[$ii] = $tags[$count];
+ continue;
+ }
+
+ $pos++;
}
- if ($char === '>') {
- $in_tag = false;
- continue;
+ if ($replace) {
+ // Apply replacements starting at the end of the string so they
+ // don't mess up the offsets for following replacements.
+ $replace = array_reverse($replace, true);
+
+ foreach ($replace as $replace_pos => $replacement) {
+ $line = substr_replace($line, $replacement, $replace_pos, 1);
+ }
+ }
+ } else {
+ $line = phutil_utf8v_combined($line);
+ foreach ($line as $key => $char) {
+ if ($char === '>') {
+ $in_tag = false;
+ continue;
+ }
+
+ if ($in_tag) {
+ continue;
+ }
+
+ if ($char === '<') {
+ $in_tag = true;
+ continue;
+ }
+
+ if ($char === "\t") {
+ $count = $tab_width - ($pos % $tab_width);
+ $pos += $count;
+ $line[$key] = $tags[$count];
+ continue;
+ }
+
+ $pos++;
}
- if ($in_tag) {
- continue;
- }
-
- if ($char === "\t") {
- $count = $tab_width - ($pos % $tab_width);
- $pos += $count;
- $line[$key] = $tags[$count];
- continue;
- }
-
- $pos++;
+ $line = implode('', $line);
}
- return implode('', $line);
+ return $prefix.$line;
}
}
diff --git a/src/applications/differential/parser/__tests__/DifferentialTabReplacementTestCase.php b/src/applications/differential/parser/__tests__/DifferentialTabReplacementTestCase.php
new file mode 100644
index 0000000000..d63978cb61
--- /dev/null
+++ b/src/applications/differential/parser/__tests__/DifferentialTabReplacementTestCase.php
@@ -0,0 +1,56 @@
+ ";
+ $tab2 = " ";
+
+ $cat = "\xF0\x9F\x90\xB1";
+
+ $cases = array(
+ '' => '',
+ 'x' => 'x',
+
+ // Tabs inside HTML tags should not be replaced.
+ "<\t>x" => "<\t>x",
+
+ // Normal tabs should be replaced. These are all aligned to the tab
+ // width, so they'll be replaced inline.
+ "\tx" => "{$tab2}x",
+ " \tx" => " {$tab2}x",
+ "\t x" => "{$tab2} x",
+ "aa\tx" => "aa{$tab2}x",
+ "aa \tx" => "aa {$tab2}x",
+ "aa\t x" => "aa{$tab2} x",
+
+ // This tab is not tabstop-aligned, so it is replaced with fewer
+ // spaces to bring us to the next tabstop.
+ " \tx" => " {$tab1}x",
+
+ // Text inside HTML tags should not count when aligning tabs with
+ // tabstops.
+ " \tx" => " {$tab1}x",
+ " \tx" => " {$tab1}x",
+
+ // The code has to take a slow path when inputs contain unicode, but
+ // should produce the right results and align tabs to tabstops while
+ // respecting UTF8 display character widths, not byte widths.
+ "{$cat}\tx" => "{$cat}{$tab1}x",
+ "{$cat}{$cat}\tx" => "{$cat}{$cat}{$tab2}x",
+ );
+
+ foreach ($cases as $input => $expect) {
+ $actual = DifferentialChangesetParser::replaceTabsWithSpaces(
+ $input,
+ 2);
+
+ $this->assertEqual(
+ $expect,
+ $actual,
+ pht('Tabs to Spaces: %s', $input));
+ }
+ }
+
+}