mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-11-29 10:12:41 +01:00
Add very hacky encoding transformation support for arc
Summary: Adds a secret, undoucmented "encoding" key to ".arcconfig" which makes a very half-hearted effort to convert encodings. This is probably good enough that Differential can be used for code review, but there will be issues with 'arc patch', 'arc export', paste, maybe conduit stuff, Diffusion, and whatever else I haven't thought of. This also doesn't store the original encoding so anything converted like this won't reasonably be able to be made to work with all that stuff in the future. See T452 for a broader discussion of the issues involved. Test Plan: Short circuited the UTF-8 detection to always fail, had my files "converted" from ISO-8859-1 to UTF-8. @davidreuss: you can test this by applying this patch to arcanist/, adding '"encoding" : "ISO-8859-1"' to your .arcconfig, touching some non-ASCII file, and then running "arc diff". Reviewers: davidreuss, jungejason, tuomaspelkonen, aran Reviewed By: davidreuss CC: aran, davidreuss, epriestley, nshamg123 Differential Revision: 812
This commit is contained in:
parent
08ce2a2e2c
commit
5150252f91
2 changed files with 42 additions and 1 deletions
|
@ -749,10 +749,43 @@ EOTEXT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$try_encoding = null;
|
||||||
|
|
||||||
$utf8_problems = array();
|
$utf8_problems = array();
|
||||||
foreach ($changes as $change) {
|
foreach ($changes as $change) {
|
||||||
foreach ($change->getHunks() as $hunk) {
|
foreach ($change->getHunks() as $hunk) {
|
||||||
if (!phutil_is_utf8($hunk->getCorpus())) {
|
$corpus = $hunk->getCorpus();
|
||||||
|
if (!phutil_is_utf8($corpus)) {
|
||||||
|
|
||||||
|
// If this corpus is heuristically binary, don't try to convert it.
|
||||||
|
// mb_check_encoding() and mb_convert_encoding() are both very very
|
||||||
|
// liberal about what they're willing to process.
|
||||||
|
$is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
|
||||||
|
if (!$is_binary) {
|
||||||
|
if ($try_encoding === null) {
|
||||||
|
// Make a call to check if there's an encoding specified for this
|
||||||
|
// project.
|
||||||
|
$project_info = $this->getConduit()->callMethodSynchronous(
|
||||||
|
'arcanist.projectinfo',
|
||||||
|
array(
|
||||||
|
'name' => $this->getWorkingCopy()->getProjectID(),
|
||||||
|
));
|
||||||
|
$try_encoding = nonempty($project_info['encoding'], false);
|
||||||
|
}
|
||||||
|
if ($try_encoding) {
|
||||||
|
// NOTE: This feature is HIGHLY EXPERIMENTAL and will cause a lot
|
||||||
|
// of issues. Use it at your own risk.
|
||||||
|
$corpus = mb_convert_encoding($corpus, 'UTF-8', $try_encoding);
|
||||||
|
$name = $change->getCurrentPath();
|
||||||
|
if (phutil_is_utf8($corpus)) {
|
||||||
|
$this->writeStatusMessage(
|
||||||
|
"[Experimental] Converted a '{$name}' hunk from ".
|
||||||
|
"'{$try_encoding}' to UTF-8.\n");
|
||||||
|
$hunk->setCorpus($corpus);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
$utf8_problems[] = $change;
|
$utf8_problems[] = $change;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -763,11 +796,17 @@ EOTEXT
|
||||||
// and treat them as binary changes. See D327 for discussion of why Arcanist
|
// and treat them as binary changes. See D327 for discussion of why Arcanist
|
||||||
// has this behavior.
|
// has this behavior.
|
||||||
if ($utf8_problems) {
|
if ($utf8_problems) {
|
||||||
|
$learn_more =
|
||||||
|
"You can learn more about how Phabricator handles character encodings ".
|
||||||
|
"(and how to configure encoding settings and detect and correct ".
|
||||||
|
"encoding problems) by reading 'User Guide: UTF-8 and Character ".
|
||||||
|
"Encoding' in the Phabricator documentation.\n\n";
|
||||||
if (count($utf8_problems) == 1) {
|
if (count($utf8_problems) == 1) {
|
||||||
$utf8_warning =
|
$utf8_warning =
|
||||||
"This diff includes a file which is not valid UTF-8 (it has invalid ".
|
"This diff includes a file which is not valid UTF-8 (it has invalid ".
|
||||||
"byte sequences). You can either stop this workflow and fix it, or ".
|
"byte sequences). You can either stop this workflow and fix it, or ".
|
||||||
"continue. If you continue, this file will be marked as binary.\n\n".
|
"continue. If you continue, this file will be marked as binary.\n\n".
|
||||||
|
$learn_more.
|
||||||
" AFFECTED FILE\n";
|
" AFFECTED FILE\n";
|
||||||
|
|
||||||
$confirm = "Do you want to mark this file as binary and continue?";
|
$confirm = "Do you want to mark this file as binary and continue?";
|
||||||
|
@ -777,6 +816,7 @@ EOTEXT
|
||||||
"invalid byte sequences). You can either stop this workflow and fix ".
|
"invalid byte sequences). You can either stop this workflow and fix ".
|
||||||
"these files, or continue. If you continue, these files will be ".
|
"these files, or continue. If you continue, these files will be ".
|
||||||
"marked as binary.\n\n".
|
"marked as binary.\n\n".
|
||||||
|
$learn_more.
|
||||||
" AFFECTED FILES\n";
|
" AFFECTED FILES\n";
|
||||||
|
|
||||||
$confirm = "Do you want to mark these files as binary and continue?";
|
$confirm = "Do you want to mark these files as binary and continue?";
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
phutil_require_module('arcanist', 'difference');
|
||||||
phutil_require_module('arcanist', 'differential/commitmessage');
|
phutil_require_module('arcanist', 'differential/commitmessage');
|
||||||
phutil_require_module('arcanist', 'exception/usage');
|
phutil_require_module('arcanist', 'exception/usage');
|
||||||
phutil_require_module('arcanist', 'exception/usage/userabort');
|
phutil_require_module('arcanist', 'exception/usage/userabort');
|
||||||
|
|
Loading…
Reference in a new issue