From ee6357386d0bfbccd32d6756322743985a9d8fe6 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Wed, 10 Aug 2016 14:32:37 -0700 Subject: [PATCH 1/5] Correctly parse file renames and copies from `git diff --raw` Summary: `parseGitRawDiff` dealt with the `A`, `M`, and `D` status flags from `git diff --raw`, for file additions, modifications, and deletions respectively. However, it failed to cope with `C` and `R` flags, for copies and renames. Git version 2.9 and above default to resolving renames, even in `git diff --raw` output, making this lack of support only salient now (though users with Git's `diff.rename` set encountered it previously). Those two flags differ from the other three in that they offer both the source and destination filename, separated by a tab. As `parseGitRawDiff` was not aware of this property, it returned a "filename" of `"oldfile\tnewfile"`. This is surfaced in several places, including as passed to linters as a filename to check. Needless to say, this file is nearly guaranteed to never exist on disk. Detect both the `C` and `R` flag types, and generate either a file addition, or a pair of addition/deletion entries. Test Plan: Renamed a file, with a linter that printed each file it was called with. Reviewers: #blessed_reviewers, epriestley Reviewed By: #blessed_reviewers, epriestley Subscribers: jboning, Korvin Differential Revision: https://secure.phabricator.com/D16387 --- src/repository/api/ArcanistGitAPI.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/repository/api/ArcanistGitAPI.php b/src/repository/api/ArcanistGitAPI.php index 884d65dc..57078dab 100644 --- a/src/repository/api/ArcanistGitAPI.php +++ b/src/repository/api/ArcanistGitAPI.php @@ -826,6 +826,22 @@ final class ArcanistGitAPI extends ArcanistRepositoryAPI { $mask |= self::FLAG_EXTERNALS; } else if (isset($flags[$flag])) { $mask |= $flags[$flag]; + } else if ($flag[0] == 'R') { + $both = explode("\t", $file); + if ($full) { + $files[$both[0]] = array( + 'mask' => $mask | self::FLAG_DELETED, + 'ref' => str_repeat('0', 40), + ); + } else { + $files[$both[0]] = $mask | self::FLAG_DELETED; + } + $file = $both[1]; + $mask |= self::FLAG_ADDED; + } else if ($flag[0] == 'C') { + $both = explode("\t", $file); + $file = $both[1]; + $mask |= self::FLAG_ADDED; } if ($full) { From d0957c344156356123048be2219fb95a54c89a85 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 16 Aug 2016 13:38:28 -0700 Subject: [PATCH 2/5] Parse git renames with inconsistent quoting or custom prefixes Summary: The previous parser failed when only one of the old/new filenames was quoted, as with a rename of a Unicode filename to a non-Unicode filename, or vice versa. It also incorrectly parsed custom prefixes, even going to far as to encode this logic in the tests: a diff of "src/file dst/file" which is not a rename should not be recorded as changing "src/file", but rather "file". Switch to using the "rename from" / "rename to" as the source of truth for old and current filenames when complex renames are done. This matches the logic that git itself uses to parse patches; the contents of the `diff --git` line are merely viewed as a simplest-case prediction, with renames handled later should it not match. The diff parser already had logic to parse "rename from" / "rename to" lines and extract their information. As such, this diff consists primarily of removing logic from the `splitGitDiffPaths` method, and allowing it to quietly fail. This resolves two ambiguity mentioned in comments and tests: in renaming "old file old" to "file", as well as the renaming of "old file with spaces" to "new file with spaces" when neither are quoted. Test Plan: `arc unit`. Many of the existing test cases no longer applied to the `splitGitDiffPaths` method; they were moved into a new test method which also supplies values for "rename from" and "rename to" lines. As noted in the summary, this alters one of the expected values of an existing test. Specifically, the following diff is a file addition of `file` with custom prefixes, and not the addition of "dst/file": ``` diff --git src/file dst/file new file mode 100644 index 0000000..1269488 --- /dev/null +++ dst/file @@ -0,0 +1 @@ +data ``` Reviewers: epriestley, #blessed_reviewers Reviewed By: epriestley, #blessed_reviewers Subscribers: Korvin, epriestley Differential Revision: https://secure.phabricator.com/D16405 --- src/parser/ArcanistDiffParser.php | 111 +++--------- .../__tests__/ArcanistDiffParserTestCase.php | 171 +++++++++++------- .../diff/custom-prefixes-edit.gitdiff | 11 ++ 3 files changed, 145 insertions(+), 148 deletions(-) create mode 100644 src/parser/__tests__/diff/custom-prefixes-edit.gitdiff diff --git a/src/parser/ArcanistDiffParser.php b/src/parser/ArcanistDiffParser.php index 711edb63..a310fb41 100644 --- a/src/parser/ArcanistDiffParser.php +++ b/src/parser/ArcanistDiffParser.php @@ -288,9 +288,11 @@ final class ArcanistDiffParser extends Phobject { if (isset($match['type'])) { if ($match['type'] == 'diff --git') { - list($old, $new) = self::splitGitDiffPaths($match['oldnew']); - $match['old'] = $old; - $match['cur'] = $new; + $filename = self::extractGitCommonFilename($match['oldnew']); + if ($filename !== null) { + $match['old'] = $filename; + $match['cur'] = $filename; + } } } @@ -1307,46 +1309,6 @@ final class ArcanistDiffParser extends Phobject { } - /** - * Strip prefixes off paths from `git diff`. By default git uses a/ and b/, - * but you can set `diff.mnemonicprefix` to get a different set of prefixes, - * or use `--no-prefix`, `--src-prefix` or `--dst-prefix` to set these to - * other arbitrary values. - * - * We strip the default and mnemonic prefixes, and trust the user knows what - * they're doing in the other cases. - * - * @param string Path to strip. - * @return string Stripped path. - */ - public static function stripGitPathPrefix($path) { - - static $regex; - if ($regex === null) { - $prefixes = array( - // These are the defaults. - 'a/', - 'b/', - - // These show up when you set "diff.mnemonicprefix". - 'i/', - 'c/', - 'w/', - 'o/', - '1/', - '2/', - ); - - foreach ($prefixes as $key => $prefix) { - $prefixes[$key] = preg_quote($prefix, '@'); - } - $regex = '@^('.implode('|', $prefixes).')@S'; - } - - return preg_replace($regex, '', $path); - } - - /** * Split the paths on a "diff --git" line into old and new paths. This * is difficult because they may be ambiguous if the files contain spaces. @@ -1354,53 +1316,40 @@ final class ArcanistDiffParser extends Phobject { * @param string Text from a diff line after "diff --git ". * @return pair Old and new paths. */ - public static function splitGitDiffPaths($paths) { + public static function extractGitCommonFilename($paths) { $matches = null; $paths = rtrim($paths, "\r\n"); - $patterns = array( - // Try quoted paths, used for unicode filenames or filenames with quotes. - '@^(?P"(?:\\\\.|[^"\\\\]+)+") (?P"(?:\\\\.|[^"\\\\]+)+")$@', + // Try the exact same string twice in a row separated by a + // space, with an optional prefix. This can hit a false + // positive for moves from files like "old file old" to "file", + // but such a cases will be caught by the "rename from" / + // "rename to" lines. + $prefix = '(?:[^/]+/)?'; + $pattern = + "@^(?P(?P\"?){$prefix}(?P.+)\\k)" + ." " + ."(?P(?P\"?){$prefix}\\k\\k)$@"; - // Try paths without spaces. - '@^(?P[^ ]+) (?P[^ ]+)$@', - - // Try paths with well-known prefixes. - '@^(?P[abicwo12]/.*) (?P[abicwo12]/.*)$@', - - // Try the exact same string twice in a row separated by a space. - // This can hit a false positive for moves from files like "old file old" - // to "file", but such a case combined with custom diff prefixes is - // incredibly obscure. - '@^(?P.*) (?P\\1)$@', - ); - - foreach ($patterns as $pattern) { - if (preg_match($pattern, $paths, $matches)) { - break; - } + if (!preg_match($pattern, $paths, $matches)) { + // A rename or some form; return null for now, and let the + // "rename from" / "rename to" lines fix it up. + return null; } - if (!$matches) { - throw new Exception( - pht( - "Input diff contains ambiguous line '%s'. This line is ambiguous ". - "because there are spaces in the file names, so the parser can not ". - "determine where the file names begin and end. To resolve this ". - "ambiguity, use standard prefixes ('a/' and 'b/') when ". - "generating diffs.", - "diff --git {$paths}")); - } + // Use the common subpart. There may be ambiguity here: "src/file + // dst/file" may _either_ be a prefix-less move, or a change with + // two custom prefixes. We assume it is the latter; if it is a + // rename, diff parsing will update based on the "rename from" / + // "rename to" lines. - $old = $matches['old']; - $old = self::unescapeFilename($old); - $old = self::stripGitPathPrefix($old); - - $new = $matches['new']; + // This re-assembles with the differing prefixes removed, but the + // quoting from the original. Necessary so we know if we should + // unescape characters from the common string. + $new = $matches['newq'].$matches['common'].$matches['newq']; $new = self::unescapeFilename($new); - $new = self::stripGitPathPrefix($new); - return array($old, $new); + return $new; } diff --git a/src/parser/__tests__/ArcanistDiffParserTestCase.php b/src/parser/__tests__/ArcanistDiffParserTestCase.php index 0976fb9b..c3f23b3f 100644 --- a/src/parser/__tests__/ArcanistDiffParserTestCase.php +++ b/src/parser/__tests__/ArcanistDiffParserTestCase.php @@ -535,7 +535,14 @@ EOTEXT $this->assertEqual(1, count($changes)); $change = head($changes); $this->assertEqual( - 'dst/file', + 'file', + $change->getCurrentPath()); + break; + case 'custom-prefixes-edit.gitdiff': + $this->assertEqual(1, count($changes)); + $change = head($changes); + $this->assertEqual( + 'file', $change->getCurrentPath()); break; case 'more-newlines.svndiff': @@ -607,81 +614,111 @@ EOTEXT } } - public function testGitPrefixStripping() { + public function testGitCommonFilenameExtraction() { static $tests = array( - 'a/file.c' => 'file.c', - 'b/file.c' => 'file.c', - 'i/file.c' => 'file.c', - 'c/file.c' => 'file.c', - 'w/file.c' => 'file.c', - 'o/file.c' => 'file.c', - '1/file.c' => 'file.c', - '2/file.c' => 'file.c', - 'src/file.c' => 'src/file.c', - 'file.c' => 'file.c', + 'a/filename.c b/filename.c' => 'filename.c', + "a/filename.c b/filename.c\n" => 'filename.c', + "a/filename.c b/filename.c\r\n" => 'filename.c', + 'filename.c filename.c' => 'filename.c', + '1/filename.c 2/filename.c' => 'filename.c', + '"a/\\"quotes\\"" "b/\\"quotes\\""' => '"quotes"', + '"a/\\"quotes and spaces\\"" "b/\\"quotes and spaces\\""' => + '"quotes and spaces"', + '"a/\\342\\230\\203" "b/\\342\\230\\203"' => + "\xE2\x98\x83", + 'a/Core Data/filename.c b/Core Data/filename.c' => + 'Core Data/filename.c', + 'some file with spaces.c some file with spaces.c' => + 'some file with spaces.c', + '"foo bar.c" foo bar.c' => 'foo bar.c', + '"a/foo bar.c" b/foo bar.c' => 'foo bar.c', + 'src/file dst/file' => 'file', + + // Renames are handled by the "rename from ..." lines later in + // the diff, for simplicity of parsing; this is also how git + // itself handles it. + 'a/foo.c b/bar.c' => null, + 'a/foo bar.c b/baz troz.c' => null, + '"a/foo bar.c" b/baz troz.c' => null, + 'a/foo bar.c "b/baz troz.c"' => null, + '"a/foo bar.c" "b/baz troz.c"' => null, + 'filename file with spaces.c filename file with spaces.c' => + 'filename file with spaces.c', ); foreach ($tests as $input => $expect) { - $this->assertEqual( - $expect, - ArcanistDiffParser::stripGitPathPrefix($input), - pht("Strip git prefix from '%s'.", $input)); - } - } - - public function testGitPathSplitting() { - static $tests = array( - 'a/old.c b/new.c' => array('old.c', 'new.c'), - "a/old.c b/new.c\n" => array('old.c', 'new.c'), - "a/old.c b/new.c\r\n" => array('old.c', 'new.c'), - 'old.c new.c' => array('old.c', 'new.c'), - '1/old.c 2/new.c' => array('old.c', 'new.c'), - '"a/\\"quotes1\\"" "b/\\"quotes2\\""' => array( - '"quotes1"', - '"quotes2"', - ), - '"a/\\"quotes and spaces1\\"" "b/\\"quotes and spaces2\\""' => array( - '"quotes and spaces1"', - '"quotes and spaces2"', - ), - '"a/\\342\\230\\2031" "b/\\342\\230\\2032"' => array( - "\xE2\x98\x831", - "\xE2\x98\x832", - ), - 'a/Core Data/old.c b/Core Data/new.c' => array( - 'Core Data/old.c', - 'Core Data/new.c', - ), - 'some file with spaces.c some file with spaces.c' => array( - 'some file with spaces.c', - 'some file with spaces.c', - ), - ); - - foreach ($tests as $input => $expect) { - $result = ArcanistDiffParser::splitGitDiffPaths($input); + $result = ArcanistDiffParser::extractGitCommonFilename($input); $this->assertEqual( $expect, $result, pht('Split: %s', $input)); } - - - static $ambiguous = array( - 'old file with spaces.c new file with spaces.c', - ); - - foreach ($ambiguous as $input) { - $caught = null; - try { - ArcanistDiffParser::splitGitDiffPaths($input); - } catch (Exception $ex) { - $caught = $ex; - } - $this->assertTrue( - ($caught instanceof Exception), - pht('Ambiguous: %s', $input)); - } } + + public function runSingleRename($diffline, $from, $to, $old, $new) { + $str = "diff --git $diffline\nsimilarity index 95%\n" + ."rename from $from\nrename to $to\n"; + $parser = new ArcanistDiffParser(); + $changes = $parser->parseDiff($str); + $this->assertTrue( + $changes !== null, + pht("Parsed:\n%s", $str)); + $this->assertEqual( + $old == $new ? 1 : 2, count($changes), + pht("Parsed one change:\n%s", $str)); + $change = reset($changes); + $this->assertEqual( + array($old, $new), + array($change->getOldPath(), $change->getCurrentPath()), + pht('Split: %s', $diffline)); + } + + public function testGitRenames() { + $this->runSingleRename('a/old.c b/new.c', + 'old.c', 'new.c', + 'old.c', 'new.c'); + $this->runSingleRename('old.c new.c', + 'old.c', 'new.c', + 'old.c', 'new.c'); + $this->runSingleRename('1/old.c 2/new.c', + 'old.c', 'new.c', + 'old.c', 'new.c'); + $this->runSingleRename('from/file.c to/file.c', + 'from/file.c', 'to/file.c', + 'from/file.c', 'to/file.c'); + $this->runSingleRename('"a/\\"quotes1\\"" "b/\\"quotes2\\""', + '"\\"quotes1\\""', '"\\"quotes2\\""', + '"quotes1"', '"quotes2"'); + $this->runSingleRename('"a/\\"quotes spaces1\\"" "b/\\"quotes spaces2\\""', + '"\\"quotes spaces1\\""', '"\\"quotes spaces2\\""', + '"quotes spaces1"', '"quotes spaces2"'); + $this->runSingleRename('"a/\\342\\230\\2031" "b/\\342\\230\\2032"', + '"\\342\\230\\2031"', '"\\342\\230\\2032"', + "\xE2\x98\x831", "\xE2\x98\x832"); + $this->runSingleRename('a/Core Data/old.c b/Core Data/new.c', + 'Core Data/old.c', 'Core Data/new.c', + 'Core Data/old.c', 'Core Data/new.c'); + $this->runSingleRename('file with spaces.c file with spaces.c', + 'file with spaces.c', 'file with spaces.c', + 'file with spaces.c', 'file with spaces.c'); + $this->runSingleRename('a/non-quoted filename.c "b/quoted filename.c"', + 'non-quoted filename.c', '"quoted filename.c"', + 'non-quoted filename.c', 'quoted filename.c'); + $this->runSingleRename('non-quoted filename.c "quoted filename.c"', + 'non-quoted filename.c', '"quoted filename.c"', + 'non-quoted filename.c', 'quoted filename.c'); + $this->runSingleRename('"a/quoted filename.c" b/non quoted filename.c', + '"quoted filename.c"', 'non quoted filename.c', + 'quoted filename.c', 'non quoted filename.c'); + $this->runSingleRename('"quoted filename.c" non-quoted filename.c', + '"quoted filename.c"', 'non-quoted filename.c', + 'quoted filename.c', 'non-quoted filename.c'); + $this->runSingleRename('old file with spaces.c new file with spaces.c', + 'old file with spaces.c', 'new file with spaces.c', + 'old file with spaces.c', 'new file with spaces.c'); + $this->runSingleRename('old file old file', + 'old file old', 'file', + 'old file old', 'file'); + } } diff --git a/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff b/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff new file mode 100644 index 00000000..31abe8b9 --- /dev/null +++ b/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff @@ -0,0 +1,11 @@ +diff --git src/file dst/file +index 711edb6..a8cd7be 100644 +--- src/file ++++ dst/file +@@ -5,5 +5,5 @@ + some context + other context +-old value ++new value + some context + other context From 6507be27aeb90fe32ad9d45f97bdd61e173fd378 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 17 Aug 2016 06:55:48 -0700 Subject: [PATCH 3/5] Fix handling of View Policy in CLI upload workflow for small, unique files Summary: Ref T7148. When uploading files from the CLI with a particular view policy, we may not respect it if the file is unique (so the data isn't already known) and small (so it doesn't invoke the chunker). This is rare (and may never have happened outside of testing) because: - production dumps are always larger than the minimum chunk size; - only cluster stuff uses `setViewPolicy()`; - the default policy is "Administrators" anyway, which is safe. However, I caught it in local testing, so fix it up. Test Plan: Used `bin/host upload --file ...` to upload a small, unique file. Verified it uploaded with the correct custom view policy ("No One") rather than the default view policy ("Administrators"). Reviewers: chad Reviewed By: chad Maniphest Tasks: T7148 Differential Revision: https://secure.phabricator.com/D16408 --- src/upload/ArcanistFileUploader.php | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/upload/ArcanistFileUploader.php b/src/upload/ArcanistFileUploader.php index b37b1856..daf4136d 100644 --- a/src/upload/ArcanistFileUploader.php +++ b/src/upload/ArcanistFileUploader.php @@ -116,8 +116,7 @@ final class ArcanistFileUploader extends Phobject { $conduit = $this->conduit; $futures = array(); foreach ($files as $key => $file) { - $params = array( - 'name' => $file->getName(), + $params = $this->getUploadParameters($file) + array( 'contentLength' => $file->getByteSize(), 'contentHash' => $file->getContentHash(), ); @@ -127,11 +126,6 @@ final class ArcanistFileUploader extends Phobject { $params['deleteAfterEpoch'] = $delete_after; } - $view_policy = $file->getViewPolicy(); - if ($view_policy !== null) { - $params['viewPolicy'] = $view_policy; - } - $futures[$key] = $conduit->callMethod('file.allocate', $params); } @@ -294,13 +288,29 @@ final class ArcanistFileUploader extends Phobject { return $conduit->callMethodSynchronous( 'file.upload', - array( - 'name' => $file->getName(), + $this->getUploadParameters($file) + array( 'data_base64' => base64_encode($data), )); } + /** + * Get common parameters for file uploads. + */ + private function getUploadParameters(ArcanistFileDataRef $file) { + $params = array( + 'name' => $file->getName(), + ); + + $view_policy = $file->getViewPolicy(); + if ($view_policy !== null) { + $params['viewPolicy'] = $view_policy; + } + + return $params; + } + + /** * Write a status message. * From 3ae0cc8d41e26218ac5d0871ce6b73d0377288db Mon Sep 17 00:00:00 2001 From: Luke081515 Date: Wed, 17 Aug 2016 10:36:16 -0700 Subject: [PATCH 4/5] Avoid double [y/N] when updating a not owned revision Summary: Since 737f5c0df976fe2b3178aac6ab7feb3d3621d99e arcanist shows [y/N] two times, when arcanist asks you, if you want to proceed, if you want to update a not owned revision. Ths patch fixes that, so that Arcanist shows [y/N] only once, like at other situations, when Arcanist asks you a question. Ref T11489 Test Plan: Updated a not owned revision. Reviewers: #blessed_reviewers, epriestley Reviewed By: #blessed_reviewers, epriestley Subscribers: epriestley Tags: #arcanist Maniphest Tasks: T11489 Differential Revision: https://secure.phabricator.com/D16415 --- src/workflow/ArcanistDiffWorkflow.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/workflow/ArcanistDiffWorkflow.php b/src/workflow/ArcanistDiffWorkflow.php index 64ab5c01..7834f4b8 100644 --- a/src/workflow/ArcanistDiffWorkflow.php +++ b/src/workflow/ArcanistDiffWorkflow.php @@ -2528,7 +2528,7 @@ EOTEXT "You don't own revision %s: \"%s\". Normally, you should ". "only update revisions you own. You can \"Commandeer\" this revision ". "from the web interface if you want to become the owner.\n\n". - "Update this revision anyway? [y/N]", + "Update this revision anyway?", "D{$id}", $title); From 89e8b48523844cc3eff8b775f8fae49e85f8fc22 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Fri, 19 Aug 2016 14:23:15 -0700 Subject: [PATCH 5/5] Update documentation for changed splitGitDiffPaths function Summary: The behavior (and name) of this function was changed in D16405, but the documentation was not updated to reflect the new contract. Test Plan: Untested; pure doc changes. Reviewers: #blessed_reviewers, epriestley Reviewed By: #blessed_reviewers, epriestley Subscribers: Korvin Differential Revision: https://secure.phabricator.com/D16425 --- src/parser/ArcanistDiffParser.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/parser/ArcanistDiffParser.php b/src/parser/ArcanistDiffParser.php index a310fb41..1d213846 100644 --- a/src/parser/ArcanistDiffParser.php +++ b/src/parser/ArcanistDiffParser.php @@ -1310,11 +1310,17 @@ final class ArcanistDiffParser extends Phobject { /** - * Split the paths on a "diff --git" line into old and new paths. This - * is difficult because they may be ambiguous if the files contain spaces. + * Extracts the common filename from two strings with differing path + * prefixes as found after `diff --git`. These strings may be + * quoted; if so, the filename is returned unescaped. The prefixes + * default to "a/" and "b/", but may be any string -- or may be + * entierly absent. This function may return "null" if the hunk + * represents a file move or copy, and with pathological renames may + * return an incorrect value. Such cases are expected to be + * recovered by later rename detection codepaths. * * @param string Text from a diff line after "diff --git ". - * @return pair Old and new paths. + * @return string Filename being altered, or null for a rename. */ public static function extractGitCommonFilename($paths) { $matches = null;