mirror of
https://we.phorge.it/source/arcanist.git
synced 2025-01-01 10:20:58 +01:00
8e0e07664a
Summary: Ref T13098. Historically, Phabricator was split into three parts: - Phabricator, the server. - Arcanist, the client. - libphutil, libraries shared between the client and server. One imagined use case for this was that `libphutil` might become a general-purpose library that other projects would use. However, this didn't really happen, and it seems unlikely to at this point: Phabricator has become a relatively more sophisticated application platform; we didn't end up seeing or encouraging much custom development; what custom development there is basically embraces all of Phabricator since there are huge advantages to doing so; and a general "open source is awful" sort of factor here in the sense that open source users often don't have goals well aligned to our goals. Turning "arc" into a client platform and building package management solidify us in this direction of being a standalone platform, not a standalone utility library. Phabricator also depends on `arcanist/`. If it didn't, there would be a small advantage to saying "shared code + client for client, shared code + server for server", but there's no such distinction and it seems unlikely that one will ever exist. Even if it did, I think this has little value. Nowadays, I think this separation has no advantages for us and one significant cost: it makes installing `arcanist` more difficult for end-users. This will need some more finesssing (Phabricator will need some changes for compatibility, and a lot of stuff that still says "libphutil" or "phutil" may eventually want to say "arcanist"), and some stuff (like xhpast) is probably straight-up broken right now and needs some tweaking, but I don't anticipate any major issues here. There was never anything particularly magical about libphutil as a separate standalone library. Test Plan: Ran `arc`, it gets about as far as it did before. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13098 Differential Revision: https://secure.phabricator.com/D19688
170 lines
3.7 KiB
PHP
Executable file
170 lines
3.7 KiB
PHP
Executable file
#!/usr/bin/env php
|
|
<?php
|
|
|
|
require_once dirname(dirname(__FILE__)).'/__init_script__.php';
|
|
|
|
$args = new PhutilArgumentParser($argv);
|
|
$args->setTagline(pht('utf8 charset test script'));
|
|
$args->setSynopsis(<<<EOHELP
|
|
**utf8.php** [-C n] __file__ ...
|
|
Show regions in files which are not valid UTF-8. With "-C n",
|
|
show __n__ lines of context instead of the default of 3. Use
|
|
"-" to read stdin.
|
|
|
|
**utf8.php** --test __file__ ...
|
|
Test for files which are not valid UTF-8. For example, this
|
|
will find all ".php" files under the working directory which
|
|
aren't valid UTF-8:
|
|
|
|
find . -type f -name '*.php' | xargs -n256 ./utf8.php -t
|
|
|
|
If the script exits with no output, all input files were
|
|
valid UTF-8.
|
|
EOHELP
|
|
);
|
|
|
|
$args->parseStandardArguments();
|
|
$args->parse(array(
|
|
array(
|
|
'name' => 'context',
|
|
'short' => 'C',
|
|
'param' => 'lines',
|
|
'default' => 3,
|
|
'help' => pht(
|
|
'Show __lines__ lines of context instead of the default 3.'),
|
|
'conflicts' => array(
|
|
'test' => pht('with %s, context is not shown.', '--test'),
|
|
),
|
|
),
|
|
array(
|
|
'name' => 'test',
|
|
'short' => 't',
|
|
'help' => pht('Print file names containing invalid UTF-8 to stdout.'),
|
|
),
|
|
array(
|
|
'name' => 'files',
|
|
'wildcard' => true,
|
|
),
|
|
));
|
|
|
|
|
|
$is_test = $args->getArg('test');
|
|
$context = $args->getArg('context');
|
|
$files = $args->getArg('files');
|
|
|
|
if (empty($files)) {
|
|
$args->printHelpAndExit();
|
|
}
|
|
|
|
if ($is_test) {
|
|
$err = test($files);
|
|
} else {
|
|
$err = show($files, $context);
|
|
}
|
|
exit($err);
|
|
|
|
|
|
function read($file) {
|
|
if ($file === '-') {
|
|
return file_get_contents('php://stdin');
|
|
} else {
|
|
return Filesystem::readFile($file);
|
|
}
|
|
}
|
|
|
|
function name($file) {
|
|
if ($file === '-') {
|
|
return 'stdin';
|
|
} else {
|
|
return $file;
|
|
}
|
|
}
|
|
|
|
function test(array $files) {
|
|
foreach ($files as $file) {
|
|
$data = read($file);
|
|
if (!phutil_is_utf8($data)) {
|
|
echo name($file)."\n";
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
function show(array $files, $context) {
|
|
foreach ($files as $file) {
|
|
$data = read($file);
|
|
$ok = phutil_is_utf8($data);
|
|
if ($ok) {
|
|
echo pht('OKAY');
|
|
} else {
|
|
echo pht('FAIL');
|
|
}
|
|
echo ' '.name($file)."\n";
|
|
|
|
if (!$ok) {
|
|
$lines = explode("\n", $data);
|
|
$len = count($lines);
|
|
$map = array();
|
|
$bad = array();
|
|
foreach ($lines as $n => $line) {
|
|
if (phutil_is_utf8($line)) {
|
|
continue;
|
|
}
|
|
$bad[$n] = true;
|
|
for ($jj = max(0, $n - $context);
|
|
$jj < min($len, $n + 1 + $context);
|
|
$jj++) {
|
|
$map[$jj] = true;
|
|
}
|
|
}
|
|
|
|
$width = strlen(max(array_keys($map)));
|
|
|
|
// Set $last such that we print a newline on the first iteration through
|
|
// the loop.
|
|
$last = -2;
|
|
foreach ($map as $idx => $ignored) {
|
|
if ($idx !== $last + 1) {
|
|
echo "\n";
|
|
}
|
|
$last = $idx;
|
|
|
|
$line = $lines[$idx];
|
|
if (!empty($bad[$idx])) {
|
|
$line = show_problems($line);
|
|
}
|
|
|
|
printf(" % {$width}d %s\n", $idx + 1, $line);
|
|
}
|
|
echo "\n";
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
function show_problems($line) {
|
|
$regex =
|
|
"/^(".
|
|
"[\x01-\x7F]+".
|
|
"|([\xC2-\xDF][\x80-\xBF])".
|
|
"|([\xE0-\xEF][\x80-\xBF][\x80-\xBF])".
|
|
"|([\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF]))/";
|
|
|
|
$out = '';
|
|
while (strlen($line)) {
|
|
$match = null;
|
|
if (preg_match($regex, $line, $match)) {
|
|
$out .= $match[1];
|
|
$line = substr($line, strlen($match[1]));
|
|
} else {
|
|
$chr = sprintf('<0x%0X>', ord($line[0]));
|
|
$chr = phutil_console_format('##%s##', $chr);
|
|
$out .= $chr;
|
|
$line = substr($line, 1);
|
|
}
|
|
}
|
|
|
|
return $out;
|
|
}
|