mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-11-22 14:52:40 +01:00
Allow 'arc diff' to detect non-UTF8 files and mark them binary
Summary: There are a bunch of different ways we could approach this, but practically I think this is probably the best one even though it's kind of yucky. A big part of my motivation here is that if we just reject UTF-8 outright, users are going to end up in a situation they don't understand how to resolve. UPDATE: after discussion, going with a more conservative approach until such time as we have a more compelling case for the less strict functionality. Test Plan: Created a diff with a text file that contained invalid UTF-8 subsequences. Reviewed By: tuomaspelkonen Reviewers: tuomaspelkonen, jungejason, aran Commenters: aran CC: aran, epriestley, tuomaspelkonen Differential Revision: 325
This commit is contained in:
parent
5b6fbf70e0
commit
d3816f1c9e
1 changed files with 48 additions and 3 deletions
|
@ -686,10 +686,55 @@ EOTEXT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$utf8_problems = array();
|
||||||
|
foreach ($changes as $change) {
|
||||||
|
foreach ($change->getHunks() as $hunk) {
|
||||||
|
if (!phutil_is_utf8($hunk->getCorpus())) {
|
||||||
|
$utf8_problems[] = $change;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there are non-binary files which aren't valid UTF-8, warn the user
|
||||||
|
// and treat them as binary changes. See D327 for discussion of why Arcanist
|
||||||
|
// has this behavior.
|
||||||
|
if ($utf8_problems) {
|
||||||
|
if (count($utf8_problems) == 1) {
|
||||||
|
$utf8_warning =
|
||||||
|
"This diff includes a file which is not valid UTF-8 (it has invalid ".
|
||||||
|
"byte sequences). You can either stop this workflow and fix it, or ".
|
||||||
|
"continue. If you continue, this file will be marked as binary.\n\n".
|
||||||
|
" AFFECTED FILE\n";
|
||||||
|
|
||||||
|
$confirm = "Do you want to mark this file as binary and continue?";
|
||||||
|
} else {
|
||||||
|
$utf8_warning =
|
||||||
|
"This diff includes files which are not valid UTF-8 (they contain ".
|
||||||
|
"invalid byte sequences). You can either stop this workflow and fix ".
|
||||||
|
"these files, or continue. If you continue, these files will be ".
|
||||||
|
"marked as binary.\n\n".
|
||||||
|
" AFFECTED FILES\n";
|
||||||
|
|
||||||
|
$confirm = "Do you want to mark these files as binary and continue?";
|
||||||
|
}
|
||||||
|
|
||||||
|
echo phutil_console_format("**Invalid Content Encoding (Non-UTF8)**\n");
|
||||||
|
echo phutil_console_wrap($utf8_warning);
|
||||||
|
|
||||||
|
$file_list = mpull($utf8_problems, 'getCurrentPath');
|
||||||
|
$file_list = ' '.implode("\n ", $file_list);
|
||||||
|
echo $file_list;
|
||||||
|
|
||||||
|
if (!phutil_console_confirm($confirm, $default_no = false)) {
|
||||||
|
throw new ArcanistUsageException("Aborted workflow to fix UTF-8.");
|
||||||
|
} else {
|
||||||
|
foreach ($utf8_problems as $change) {
|
||||||
|
$change->convertToBinaryChange();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Ideally, we should do this later, after validating commit message
|
|
||||||
// fields (i.e., test plan), in case there are large/slow file upload steps
|
|
||||||
// involved.
|
|
||||||
foreach ($changes as $change) {
|
foreach ($changes as $change) {
|
||||||
if ($change->getFileType() != ArcanistDiffChangeType::FILE_BINARY) {
|
if ($change->getFileType() != ArcanistDiffChangeType::FILE_BINARY) {
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Reference in a new issue