From d4d095dbf6923b8da34a14547353ae179ec5feab Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 2 Apr 2020 12:23:15 -0700 Subject: [PATCH] Make Windows escaping preserve "%" symbols in arguments Summary: Ref T13504. Ref T13209. We currently use "escapeshellarg()" under Windows, which destructively replaces "%" symbols with spaces. This is wrong, breaks general behavior with "git log --format=...", and has led to a lot of weird workarounds in `arc` code where we don't escape arguments we should be escaping to tiptoe around issues with "%". Now that execution occurs via "bypass_shell", we can safely (probably?) escape things, although programs are still apparently free to parse the command string however they want. Test Plan: Added unit tests, ran them under Mac and Windows, got clean results. Ran "arc version" on Mac and Windows. Maniphest Tasks: T13504, T13209 Differential Revision: https://secure.phabricator.com/D21051 --- .../exec/__tests__/ExecFutureTestCase.php | 47 ++++++++++ .../workflow/ArcanistVersionWorkflow.php | 10 +-- src/xsprintf/PhutilCommandString.php | 90 ++++++++++++++++++- 3 files changed, 139 insertions(+), 8 deletions(-) diff --git a/src/future/exec/__tests__/ExecFutureTestCase.php b/src/future/exec/__tests__/ExecFutureTestCase.php index 572c5692..690ccee0 100644 --- a/src/future/exec/__tests__/ExecFutureTestCase.php +++ b/src/future/exec/__tests__/ExecFutureTestCase.php @@ -127,6 +127,53 @@ final class ExecFutureTestCase extends PhutilTestCase { $this->assertEqual(0, $err); } + public function testEscaping() { + $inputs = array( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', + '!@#$%^&*()-=_+`~,.<>/?[]{};\':"|', + '', + ' ', + 'x y', + '%PATH%', + '"', + '""', + 'a "b" c', + '\\', + '\\a', + 'a\\', + '\\a\\', + 'a\\a', + '\\\\', + '\\"', + ); + + $bin = $this->getSupportExecutable('echo'); + + foreach ($inputs as $input) { + if (!is_array($input)) { + $input = array($input); + } + + list($stdout) = execx( + 'php -f %R -- %Ls', + $bin, + $input); + + $stdout = explode("\n", $stdout); + $output = array(); + foreach ($stdout as $line) { + $output[] = stripcslashes($line); + } + + $this->assertEqual( + $input, + $output, + pht( + 'Arguments are preserved for input: %s', + implode(' ', $input))); + } + } + public function testReadBuffering() { $str_len_8 = 'abcdefgh'; $str_len_4 = 'abcd'; diff --git a/src/toolset/workflow/ArcanistVersionWorkflow.php b/src/toolset/workflow/ArcanistVersionWorkflow.php index eafca523..fae0e607 100644 --- a/src/toolset/workflow/ArcanistVersionWorkflow.php +++ b/src/toolset/workflow/ArcanistVersionWorkflow.php @@ -67,14 +67,12 @@ EOTEXT Filesystem::readablePath($root))); } - // NOTE: Carefully execute these commands in a way that works on Windows - // until T8298 is properly fixed. See PHI52. - - list($commit) = $repository_api->execxLocal('log -1 --format=%%H'); + list($commit) = $repository_api->execxLocal( + 'log -1 --format=%s', + '%ct%x01%H'); $commit = trim($commit); - list($timestamp) = $repository_api->execxLocal('log -1 --format=%%ct'); - $timestamp = trim($timestamp); + list($timestamp, $commit) = explode("\1", $commit); $console->writeOut( "%s %s (%s)\n", diff --git a/src/xsprintf/PhutilCommandString.php b/src/xsprintf/PhutilCommandString.php index 85eb673a..671600ed 100644 --- a/src/xsprintf/PhutilCommandString.php +++ b/src/xsprintf/PhutilCommandString.php @@ -6,6 +6,8 @@ final class PhutilCommandString extends Phobject { private $escapingMode = false; const MODE_DEFAULT = 'default'; + const MODE_LINUX = 'linux'; + const MODE_WINDOWS = 'windows'; const MODE_POWERSHELL = 'powershell'; public function __construct(array $argv) { @@ -46,9 +48,19 @@ final class PhutilCommandString extends Phobject { } public static function escapeArgument($value, $mode) { + if ($mode === self::MODE_DEFAULT) { + if (phutil_is_windows()) { + $mode = self::MODE_WINDOWS; + } else { + $mode = self::MODE_LINUX; + } + } + switch ($mode) { - case self::MODE_DEFAULT: - return escapeshellarg($value); + case self::MODE_LINUX: + return self::escapeLinux($value); + case self::MODE_WINDOWS: + return self::escapeWindows($value); case self::MODE_POWERSHELL: return self::escapePowershell($value); default: @@ -82,4 +94,78 @@ final class PhutilCommandString extends Phobject { return '"'.$value.'"'; } + private static function escapeLinux($value) { + if (strpos($value, "\0") !== false) { + throw new Exception( + pht( + 'Command string argument includes a NULL byte. This byte can not '. + 'be safely escaped in command line arguments in Linux '. + 'environments.')); + } + + // If the argument is nonempty and contains only common printable + // characters, we do not need to escape it. This makes debugging + // workflows a little more user-friendly by making command output + // more readable. + if (preg_match('(^[a-zA-Z0-9:/@._+-]+\z)', $value)) { + return $value; + } + + return escapeshellarg($value); + } + + private static function escapeWindows($value) { + if (strpos($value, "\0") !== false) { + throw new Exception( + pht( + 'Command string argument includes a NULL byte. This byte can not '. + 'be safely escaped in command line arguments in Windows '. + 'environments.')); + } + + if (!phutil_is_utf8($value)) { + throw new Exception( + pht( + 'Command string argument includes text which is not valid UTF-8. '. + 'This library can not safely escape this sequence in command '. + 'line arguments in Windows environments.')); + } + + $has_backslash = (strpos($value, '\\') !== false); + $has_space = (strpos($value, ' ') !== false); + $has_quote = (strpos($value, '"') !== false); + $is_empty = (strlen($value) === 0); + + // If a backslash appears before another backslash, a double quote, or + // the end of the argument, we must escape it. Otherwise, we must leave + // it unescaped. + + if ($has_backslash) { + $value_v = preg_split('//', $value, -1, PREG_SPLIT_NO_EMPTY); + $len = count($value_v); + for ($ii = 0; $ii < $len; $ii++) { + if ($value_v[$ii] === '\\') { + if ($ii + 1 < $len) { + $next = $value_v[$ii + 1]; + } else { + $next = null; + } + + if ($next === '"' || $next === '\\' || $next === null) { + $value_v[$ii] = '\\\\'; + } + } + } + $value = implode('', $value_v); + } + + // Then, escape double quotes by prefixing them with backslashes. + if ($has_quote || $has_space || $has_backslash || $is_empty) { + $value = addcslashes($value, '"'); + $value = '"'.$value.'"'; + } + + return $value; + } + }