From 5150252f9138e9970de5b9fb1ad9d9e000758532 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 15 Aug 2011 09:10:22 -0700 Subject: [PATCH] Add very hacky encoding transformation support for arc Summary: Adds a secret, undoucmented "encoding" key to ".arcconfig" which makes a very half-hearted effort to convert encodings. This is probably good enough that Differential can be used for code review, but there will be issues with 'arc patch', 'arc export', paste, maybe conduit stuff, Diffusion, and whatever else I haven't thought of. This also doesn't store the original encoding so anything converted like this won't reasonably be able to be made to work with all that stuff in the future. See T452 for a broader discussion of the issues involved. Test Plan: Short circuited the UTF-8 detection to always fail, had my files "converted" from ISO-8859-1 to UTF-8. @davidreuss: you can test this by applying this patch to arcanist/, adding '"encoding" : "ISO-8859-1"' to your .arcconfig, touching some non-ASCII file, and then running "arc diff". Reviewers: davidreuss, jungejason, tuomaspelkonen, aran Reviewed By: davidreuss CC: aran, davidreuss, epriestley, nshamg123 Differential Revision: 812 --- src/workflow/diff/ArcanistDiffWorkflow.php | 42 +++++++++++++++++++++- src/workflow/diff/__init__.php | 1 + 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/workflow/diff/ArcanistDiffWorkflow.php b/src/workflow/diff/ArcanistDiffWorkflow.php index b10e0ade..e26286c5 100644 --- a/src/workflow/diff/ArcanistDiffWorkflow.php +++ b/src/workflow/diff/ArcanistDiffWorkflow.php @@ -749,10 +749,43 @@ EOTEXT } } + $try_encoding = null; + $utf8_problems = array(); foreach ($changes as $change) { foreach ($change->getHunks() as $hunk) { - if (!phutil_is_utf8($hunk->getCorpus())) { + $corpus = $hunk->getCorpus(); + if (!phutil_is_utf8($corpus)) { + + // If this corpus is heuristically binary, don't try to convert it. + // mb_check_encoding() and mb_convert_encoding() are both very very + // liberal about what they're willing to process. + $is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus); + if (!$is_binary) { + if ($try_encoding === null) { + // Make a call to check if there's an encoding specified for this + // project. + $project_info = $this->getConduit()->callMethodSynchronous( + 'arcanist.projectinfo', + array( + 'name' => $this->getWorkingCopy()->getProjectID(), + )); + $try_encoding = nonempty($project_info['encoding'], false); + } + if ($try_encoding) { + // NOTE: This feature is HIGHLY EXPERIMENTAL and will cause a lot + // of issues. Use it at your own risk. + $corpus = mb_convert_encoding($corpus, 'UTF-8', $try_encoding); + $name = $change->getCurrentPath(); + if (phutil_is_utf8($corpus)) { + $this->writeStatusMessage( + "[Experimental] Converted a '{$name}' hunk from ". + "'{$try_encoding}' to UTF-8.\n"); + $hunk->setCorpus($corpus); + continue; + } + } + } $utf8_problems[] = $change; break; } @@ -763,11 +796,17 @@ EOTEXT // and treat them as binary changes. See D327 for discussion of why Arcanist // has this behavior. if ($utf8_problems) { + $learn_more = + "You can learn more about how Phabricator handles character encodings ". + "(and how to configure encoding settings and detect and correct ". + "encoding problems) by reading 'User Guide: UTF-8 and Character ". + "Encoding' in the Phabricator documentation.\n\n"; if (count($utf8_problems) == 1) { $utf8_warning = "This diff includes a file which is not valid UTF-8 (it has invalid ". "byte sequences). You can either stop this workflow and fix it, or ". "continue. If you continue, this file will be marked as binary.\n\n". + $learn_more. " AFFECTED FILE\n"; $confirm = "Do you want to mark this file as binary and continue?"; @@ -777,6 +816,7 @@ EOTEXT "invalid byte sequences). You can either stop this workflow and fix ". "these files, or continue. If you continue, these files will be ". "marked as binary.\n\n". + $learn_more. " AFFECTED FILES\n"; $confirm = "Do you want to mark these files as binary and continue?"; diff --git a/src/workflow/diff/__init__.php b/src/workflow/diff/__init__.php index 029b9fae..c7dd37ca 100644 --- a/src/workflow/diff/__init__.php +++ b/src/workflow/diff/__init__.php @@ -6,6 +6,7 @@ +phutil_require_module('arcanist', 'difference'); phutil_require_module('arcanist', 'differential/commitmessage'); phutil_require_module('arcanist', 'exception/usage'); phutil_require_module('arcanist', 'exception/usage/userabort');