1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-22 14:52:41 +01:00

Display some invisible/nonprintable characters in diffs by default

Summary:
Ref T12822. Ref T2495. This is the good version of D20193.

Currently, we display various nonprintable characters (ZWS, nonbreaking space, various control characters) as themselves, so they're generally invisible.

In T12822, one user reports that all their engineers frequently type ZWS characters into source somehow? I don't really believe this (??), and this should be fixed in lint.

That said, the only real reason not to show these weird characters in a special way was that it would break copy/paste: if we render ZWS as "🐑", and a user copy-pastes the line including the ZWS, they'll get a sheep.

At least, they would have, until D20191. Now that this whole thing is end-to-end Javascript magic, we can copy whatever we want.

In particular, we can render any character `X` as `<span data-copy-text="Y">X</span>`, and then copy "Y" instead of "X" when the user copies the node. Limitations:

  - If users select only "X", they'll get "X" on their clipboard. This seems fine. If you're selecting our ZWS marker *only*, you probably want to copy it?
  - If "X" is more than one character long, users will get the full "Y" if they select any part of "X". At least here, this only matters when "X" is several spaces and "Y" is a tab. This also seems fine.
  - We have to be kind of careful because this approach involves editing an HTML blob directly. However, we already do that elsewhere and this isn't really too hard to get right.

With those tools in hand:

  - Replace "\t" (raw text / what gets copied) with the number of spaces to the next tab stop for display.
  - Replace ZWS and NBSP (raw text) with a special marker for display.
  - Replace control characters 0x00-0x19 and 0x7F, except for "\t", "\r", and "\n", with the special unicode "control character pictures" reserved for this purpose.

Test Plan:
- Generated and viewed a file like this one:

{F6220422}

- Copied text out of it, got authentic raw original source text instead of displayed text.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T12822, T2495

Differential Revision: https://secure.phabricator.com/D20194
This commit is contained in:
epriestley 2019-02-17 13:39:46 -08:00
parent efccd75ae3
commit fe7047d12d
6 changed files with 221 additions and 24 deletions

View file

@ -9,8 +9,8 @@ return array(
'names' => array(
'conpherence.pkg.css' => '3c8a0668',
'conpherence.pkg.js' => '020aebcf',
'core.pkg.css' => '261ee8cf',
'core.pkg.js' => 'e368deda',
'core.pkg.css' => 'e3c1a8f2',
'core.pkg.js' => '2cda17a4',
'differential.pkg.css' => '249b542d',
'differential.pkg.js' => '53f8d00c',
'diffusion.pkg.css' => '42c75c37',
@ -112,7 +112,7 @@ return array(
'rsrc/css/application/uiexample/example.css' => 'b4795059',
'rsrc/css/core/core.css' => '1b29ed61',
'rsrc/css/core/remarkup.css' => '9e627d41',
'rsrc/css/core/syntax.css' => '8a16f91b',
'rsrc/css/core/syntax.css' => '4234f572',
'rsrc/css/core/z-index.css' => '99c0f5eb',
'rsrc/css/diviner/diviner-shared.css' => '4bd263b0',
'rsrc/css/font/font-awesome.css' => '3883938a',
@ -473,7 +473,7 @@ return array(
'rsrc/js/core/behavior-linked-container.js' => '74446546',
'rsrc/js/core/behavior-more.js' => '506aa3f4',
'rsrc/js/core/behavior-object-selector.js' => 'a4af0b4a',
'rsrc/js/core/behavior-oncopy.js' => 'de59bf15',
'rsrc/js/core/behavior-oncopy.js' => 'ff7b3f22',
'rsrc/js/core/behavior-phabricator-nav.js' => 'f166c949',
'rsrc/js/core/behavior-phabricator-remarkup-assist.js' => '2f80333f',
'rsrc/js/core/behavior-read-only-warning.js' => 'b9109f8f',
@ -636,7 +636,7 @@ return array(
'javelin-behavior-phabricator-nav' => 'f166c949',
'javelin-behavior-phabricator-notification-example' => '29819b75',
'javelin-behavior-phabricator-object-selector' => 'a4af0b4a',
'javelin-behavior-phabricator-oncopy' => 'de59bf15',
'javelin-behavior-phabricator-oncopy' => 'ff7b3f22',
'javelin-behavior-phabricator-remarkup-assist' => '2f80333f',
'javelin-behavior-phabricator-reveal-content' => 'b105a3a6',
'javelin-behavior-phabricator-search-typeahead' => '1cb7d027',
@ -878,7 +878,7 @@ return array(
'sprite-login-css' => '18b368a6',
'sprite-tokens-css' => 'f1896dc5',
'syntax-default-css' => '055fc231',
'syntax-highlighting-css' => '8a16f91b',
'syntax-highlighting-css' => '4234f572',
'tokens-css' => 'ce5a50bd',
'typeahead-browse-css' => 'b7ed02d2',
'unhandled-exception-css' => '9ecfc00d',
@ -1222,6 +1222,9 @@ return array(
'javelin-behavior',
'javelin-uri',
),
'4234f572' => array(
'syntax-default-css',
),
'42c7a5a7' => array(
'javelin-install',
'javelin-dom',
@ -1580,9 +1583,6 @@ return array(
'javelin-stratcom',
'javelin-install',
),
'8a16f91b' => array(
'syntax-default-css',
),
'8ac32fd9' => array(
'javelin-behavior',
'javelin-stratcom',
@ -2010,10 +2010,6 @@ return array(
'javelin-uri',
'phabricator-notification',
),
'de59bf15' => array(
'javelin-behavior',
'javelin-dom',
),
'dfa1d313' => array(
'javelin-behavior',
'javelin-dom',
@ -2147,6 +2143,10 @@ return array(
'owners-path-editor',
'javelin-behavior',
),
'ff7b3f22' => array(
'javelin-behavior',
'javelin-dom',
),
),
'packages' => array(
'conpherence.pkg.css' => array(

View file

@ -189,7 +189,7 @@ final class DifferentialChangesetParser extends Phobject {
return $this;
}
const CACHE_VERSION = 13;
const CACHE_VERSION = 14;
const CACHE_MAX_SIZE = 8e6;
const ATTR_GENERATED = 'attr:generated';
@ -568,11 +568,17 @@ final class DifferentialChangesetParser extends Phobject {
private function applyIntraline(&$render, $intra, $corpus) {
foreach ($render as $key => $text) {
$result = $text;
if (isset($intra[$key])) {
$render[$key] = ArcanistDiffUtils::applyIntralineDiff(
$text,
$result = ArcanistDiffUtils::applyIntralineDiff(
$result,
$intra[$key]);
}
$result = $this->adjustRenderedLineForDisplay($result);
$render[$key] = $result;
}
}
@ -1415,5 +1421,183 @@ final class DifferentialChangesetParser extends Phobject {
$hunk_parser->setNewLineTypeMap($type_parser->getNewLineTypeMap());
}
private function adjustRenderedLineForDisplay($line) {
// IMPORTANT: We're using "str_replace()" against raw HTML here, which can
// easily become unsafe. The input HTML has already had syntax highlighting
// and intraline diff highlighting applied, so it's full of "<span />" tags.
static $search;
static $replace;
if ($search === null) {
$rules = $this->newSuspiciousCharacterRules();
$map = array();
foreach ($rules as $key => $spec) {
$tag = phutil_tag(
'span',
array(
'data-copy-text' => $key,
'class' => $spec['class'],
'title' => $spec['title'],
),
$spec['replacement']);
$map[$key] = phutil_string_cast($tag);
}
$search = array_keys($map);
$replace = array_values($map);
}
$is_html = false;
if ($line instanceof PhutilSafeHTML) {
$is_html = true;
$line = hsprintf('%s', $line);
}
$line = phutil_string_cast($line);
if (strpos($line, "\t") !== false) {
$line = $this->replaceTabsWithSpaces($line);
}
$line = str_replace($search, $replace, $line);
if ($is_html) {
$line = phutil_safe_html($line);
}
return $line;
}
private function newSuspiciousCharacterRules() {
// The "title" attributes are cached in the database, so they're
// intentionally not wrapped in "pht(...)".
$rules = array(
"\xE2\x80\x8B" => array(
'title' => 'ZWS',
'class' => 'suspicious-character',
'replacement' => '!',
),
"\xC2\xA0" => array(
'title' => 'NBSP',
'class' => 'suspicious-character',
'replacement' => '!',
),
"\x7F" => array(
'title' => 'DEL (0x7F)',
'class' => 'suspicious-character',
'replacement' => "\xE2\x90\xA1",
),
);
// Unicode defines special pictures for the control characters in the
// range between "0x00" and "0x1F".
$control = array(
'NULL',
'SOH',
'STX',
'ETX',
'EOT',
'ENQ',
'ACK',
'BEL',
'BS',
null, // "\t" Tab
null, // "\n" New Line
'VT',
'FF',
null, // "\r" Carriage Return,
'SO',
'SI',
'DLE',
'DC1',
'DC2',
'DC3',
'DC4',
'NAK',
'SYN',
'ETB',
'CAN',
'EM',
'SUB',
'ESC',
'FS',
'GS',
'RS',
'US',
);
foreach ($control as $idx => $label) {
if ($label === null) {
continue;
}
$rules[chr($idx)] = array(
'title' => sprintf('%s (0x%02X)', $label, $idx),
'class' => 'suspicious-character',
'replacement' => "\xE2\x90".chr(0x80 + $idx),
);
}
return $rules;
}
private function replaceTabsWithSpaces($line) {
// TODO: This should be flexible, eventually.
$tab_width = 2;
static $tags;
if ($tags === null) {
$tags = array();
for ($ii = 1; $ii <= $tab_width; $ii++) {
$tag = phutil_tag(
'span',
array(
'data-copy-text' => "\t",
),
str_repeat(' ', $ii));
$tag = phutil_string_cast($tag);
$tags[$ii] = $tag;
}
}
// If the line is particularly long, don't try to vectorize it. Use a
// faster approximation of the correct tabstop expansion instead. This
// usually still arrives at the right result.
if (strlen($line) > 256) {
return str_replace("\t", $tags[$tab_width], $line);
}
$line = phutil_utf8v_combined($line);
$in_tag = false;
$pos = 0;
foreach ($line as $key => $char) {
if ($char === '<') {
$in_tag = true;
continue;
}
if ($char === '>') {
$in_tag = false;
continue;
}
if ($in_tag) {
continue;
}
if ($char === "\t") {
$count = $tab_width - ($pos % $tab_width);
$pos += $count;
$line[$key] = $tags[$count];
continue;
}
$pos++;
}
return implode('', $line);
}
}

View file

@ -363,14 +363,14 @@ abstract class DifferentialChangesetRenderer extends Phobject {
$undershield = $this->renderUndershieldHeader();
}
$result = $notice.$props.$undershield.$content;
$result = array(
$notice,
$props,
$undershield,
$content,
);
// TODO: Let the user customize their tab width / display style.
// TODO: We should possibly post-process "\r" as well.
// TODO: Both these steps should happen earlier.
$result = str_replace("\t", ' ', $result);
return phutil_safe_html($result);
return hsprintf('%s', $result);
}
abstract public function isOneUpRenderer();

View file

@ -131,7 +131,7 @@ abstract class DifferentialChangesetTestRenderer
}
$out = implode("\n", $out)."\n";
return $out;
return phutil_safe_html($out);
}

View file

@ -29,3 +29,9 @@ span.crossreference-item {
color: #222222;
background: #dddddd;
}
.suspicious-character {
background: #ff7700;
color: #ffffff;
cursor: default;
}

View file

@ -271,6 +271,13 @@ JX.behavior('phabricator-oncopy', function() {
// Otherwise, fall through and extract this node's text normally.
}
if (node.getAttribute) {
var copy_text = node.getAttribute('data-copy-text');
if (copy_text) {
return copy_text;
}
}
if (!node.childNodes || !node.childNodes.length) {
return node.textContent;
}