mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-11-25 16:22:42 +01:00
Allow 'arc diff' to detect non-UTF8 files and mark them binary
Summary: There are a bunch of different ways we could approach this, but practically I think this is probably the best one even though it's kind of yucky. A big part of my motivation here is that if we just reject UTF-8 outright, users are going to end up in a situation they don't understand how to resolve. UPDATE: after discussion, going with a more conservative approach until such time as we have a more compelling case for the less strict functionality. Test Plan: Created a diff with a text file that contained invalid UTF-8 subsequences. Reviewed By: tuomaspelkonen Reviewers: tuomaspelkonen, jungejason, aran Commenters: aran CC: aran, epriestley, tuomaspelkonen Differential Revision: 325
This commit is contained in:
parent
5b6fbf70e0
commit
d3816f1c9e
1 changed files with 48 additions and 3 deletions
|
@ -686,10 +686,55 @@ EOTEXT
|
|||
}
|
||||
}
|
||||
|
||||
$utf8_problems = array();
|
||||
foreach ($changes as $change) {
|
||||
foreach ($change->getHunks() as $hunk) {
|
||||
if (!phutil_is_utf8($hunk->getCorpus())) {
|
||||
$utf8_problems[] = $change;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there are non-binary files which aren't valid UTF-8, warn the user
|
||||
// and treat them as binary changes. See D327 for discussion of why Arcanist
|
||||
// has this behavior.
|
||||
if ($utf8_problems) {
|
||||
if (count($utf8_problems) == 1) {
|
||||
$utf8_warning =
|
||||
"This diff includes a file which is not valid UTF-8 (it has invalid ".
|
||||
"byte sequences). You can either stop this workflow and fix it, or ".
|
||||
"continue. If you continue, this file will be marked as binary.\n\n".
|
||||
" AFFECTED FILE\n";
|
||||
|
||||
$confirm = "Do you want to mark this file as binary and continue?";
|
||||
} else {
|
||||
$utf8_warning =
|
||||
"This diff includes files which are not valid UTF-8 (they contain ".
|
||||
"invalid byte sequences). You can either stop this workflow and fix ".
|
||||
"these files, or continue. If you continue, these files will be ".
|
||||
"marked as binary.\n\n".
|
||||
" AFFECTED FILES\n";
|
||||
|
||||
$confirm = "Do you want to mark these files as binary and continue?";
|
||||
}
|
||||
|
||||
echo phutil_console_format("**Invalid Content Encoding (Non-UTF8)**\n");
|
||||
echo phutil_console_wrap($utf8_warning);
|
||||
|
||||
$file_list = mpull($utf8_problems, 'getCurrentPath');
|
||||
$file_list = ' '.implode("\n ", $file_list);
|
||||
echo $file_list;
|
||||
|
||||
if (!phutil_console_confirm($confirm, $default_no = false)) {
|
||||
throw new ArcanistUsageException("Aborted workflow to fix UTF-8.");
|
||||
} else {
|
||||
foreach ($utf8_problems as $change) {
|
||||
$change->convertToBinaryChange();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Ideally, we should do this later, after validating commit message
|
||||
// fields (i.e., test plan), in case there are large/slow file upload steps
|
||||
// involved.
|
||||
foreach ($changes as $change) {
|
||||
if ($change->getFileType() != ArcanistDiffChangeType::FILE_BINARY) {
|
||||
continue;
|
||||
|
|
Loading…
Reference in a new issue