1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2024-11-25 08:12:40 +01:00

Add very hacky encoding transformation support for arc

Summary:
Adds a secret, undoucmented "encoding" key to ".arcconfig" which makes a very
half-hearted effort to convert encodings. This is probably good enough that
Differential can be used for code review, but there will be issues with 'arc
patch', 'arc export', paste, maybe conduit stuff, Diffusion, and whatever else I
haven't thought of.

This also doesn't store the original encoding so anything converted like this
won't reasonably be able to be made to work with all that stuff in the future.

See T452 for a broader discussion of the issues involved.

Test Plan:
Short circuited the UTF-8 detection to always fail, had my files "converted"
from ISO-8859-1 to UTF-8.

@davidreuss: you can test this by applying this patch to arcanist/, adding
'"encoding" : "ISO-8859-1"' to your .arcconfig, touching some non-ASCII file,
and then running "arc diff".

Reviewers: davidreuss, jungejason, tuomaspelkonen, aran

Reviewed By: davidreuss

CC: aran, davidreuss, epriestley, nshamg123

Differential Revision: 812
This commit is contained in:
epriestley 2011-08-15 09:10:22 -07:00
parent 08ce2a2e2c
commit 5150252f91
2 changed files with 42 additions and 1 deletions

View file

@ -749,10 +749,43 @@ EOTEXT
}
}
$try_encoding = null;
$utf8_problems = array();
foreach ($changes as $change) {
foreach ($change->getHunks() as $hunk) {
if (!phutil_is_utf8($hunk->getCorpus())) {
$corpus = $hunk->getCorpus();
if (!phutil_is_utf8($corpus)) {
// If this corpus is heuristically binary, don't try to convert it.
// mb_check_encoding() and mb_convert_encoding() are both very very
// liberal about what they're willing to process.
$is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
if (!$is_binary) {
if ($try_encoding === null) {
// Make a call to check if there's an encoding specified for this
// project.
$project_info = $this->getConduit()->callMethodSynchronous(
'arcanist.projectinfo',
array(
'name' => $this->getWorkingCopy()->getProjectID(),
));
$try_encoding = nonempty($project_info['encoding'], false);
}
if ($try_encoding) {
// NOTE: This feature is HIGHLY EXPERIMENTAL and will cause a lot
// of issues. Use it at your own risk.
$corpus = mb_convert_encoding($corpus, 'UTF-8', $try_encoding);
$name = $change->getCurrentPath();
if (phutil_is_utf8($corpus)) {
$this->writeStatusMessage(
"[Experimental] Converted a '{$name}' hunk from ".
"'{$try_encoding}' to UTF-8.\n");
$hunk->setCorpus($corpus);
continue;
}
}
}
$utf8_problems[] = $change;
break;
}
@ -763,11 +796,17 @@ EOTEXT
// and treat them as binary changes. See D327 for discussion of why Arcanist
// has this behavior.
if ($utf8_problems) {
$learn_more =
"You can learn more about how Phabricator handles character encodings ".
"(and how to configure encoding settings and detect and correct ".
"encoding problems) by reading 'User Guide: UTF-8 and Character ".
"Encoding' in the Phabricator documentation.\n\n";
if (count($utf8_problems) == 1) {
$utf8_warning =
"This diff includes a file which is not valid UTF-8 (it has invalid ".
"byte sequences). You can either stop this workflow and fix it, or ".
"continue. If you continue, this file will be marked as binary.\n\n".
$learn_more.
" AFFECTED FILE\n";
$confirm = "Do you want to mark this file as binary and continue?";
@ -777,6 +816,7 @@ EOTEXT
"invalid byte sequences). You can either stop this workflow and fix ".
"these files, or continue. If you continue, these files will be ".
"marked as binary.\n\n".
$learn_more.
" AFFECTED FILES\n";
$confirm = "Do you want to mark these files as binary and continue?";

View file

@ -6,6 +6,7 @@
phutil_require_module('arcanist', 'difference');
phutil_require_module('arcanist', 'differential/commitmessage');
phutil_require_module('arcanist', 'exception/usage');
phutil_require_module('arcanist', 'exception/usage/userabort');