diff --git a/src/parser/ArcanistDiffParser.php b/src/parser/ArcanistDiffParser.php index 711edb63..a310fb41 100644 --- a/src/parser/ArcanistDiffParser.php +++ b/src/parser/ArcanistDiffParser.php @@ -288,9 +288,11 @@ final class ArcanistDiffParser extends Phobject { if (isset($match['type'])) { if ($match['type'] == 'diff --git') { - list($old, $new) = self::splitGitDiffPaths($match['oldnew']); - $match['old'] = $old; - $match['cur'] = $new; + $filename = self::extractGitCommonFilename($match['oldnew']); + if ($filename !== null) { + $match['old'] = $filename; + $match['cur'] = $filename; + } } } @@ -1307,46 +1309,6 @@ final class ArcanistDiffParser extends Phobject { } - /** - * Strip prefixes off paths from `git diff`. By default git uses a/ and b/, - * but you can set `diff.mnemonicprefix` to get a different set of prefixes, - * or use `--no-prefix`, `--src-prefix` or `--dst-prefix` to set these to - * other arbitrary values. - * - * We strip the default and mnemonic prefixes, and trust the user knows what - * they're doing in the other cases. - * - * @param string Path to strip. - * @return string Stripped path. - */ - public static function stripGitPathPrefix($path) { - - static $regex; - if ($regex === null) { - $prefixes = array( - // These are the defaults. - 'a/', - 'b/', - - // These show up when you set "diff.mnemonicprefix". - 'i/', - 'c/', - 'w/', - 'o/', - '1/', - '2/', - ); - - foreach ($prefixes as $key => $prefix) { - $prefixes[$key] = preg_quote($prefix, '@'); - } - $regex = '@^('.implode('|', $prefixes).')@S'; - } - - return preg_replace($regex, '', $path); - } - - /** * Split the paths on a "diff --git" line into old and new paths. This * is difficult because they may be ambiguous if the files contain spaces. @@ -1354,53 +1316,40 @@ final class ArcanistDiffParser extends Phobject { * @param string Text from a diff line after "diff --git ". * @return pair Old and new paths. */ - public static function splitGitDiffPaths($paths) { + public static function extractGitCommonFilename($paths) { $matches = null; $paths = rtrim($paths, "\r\n"); - $patterns = array( - // Try quoted paths, used for unicode filenames or filenames with quotes. - '@^(?P"(?:\\\\.|[^"\\\\]+)+") (?P"(?:\\\\.|[^"\\\\]+)+")$@', + // Try the exact same string twice in a row separated by a + // space, with an optional prefix. This can hit a false + // positive for moves from files like "old file old" to "file", + // but such a cases will be caught by the "rename from" / + // "rename to" lines. + $prefix = '(?:[^/]+/)?'; + $pattern = + "@^(?P(?P\"?){$prefix}(?P.+)\\k)" + ." " + ."(?P(?P\"?){$prefix}\\k\\k)$@"; - // Try paths without spaces. - '@^(?P[^ ]+) (?P[^ ]+)$@', - - // Try paths with well-known prefixes. - '@^(?P[abicwo12]/.*) (?P[abicwo12]/.*)$@', - - // Try the exact same string twice in a row separated by a space. - // This can hit a false positive for moves from files like "old file old" - // to "file", but such a case combined with custom diff prefixes is - // incredibly obscure. - '@^(?P.*) (?P\\1)$@', - ); - - foreach ($patterns as $pattern) { - if (preg_match($pattern, $paths, $matches)) { - break; - } + if (!preg_match($pattern, $paths, $matches)) { + // A rename or some form; return null for now, and let the + // "rename from" / "rename to" lines fix it up. + return null; } - if (!$matches) { - throw new Exception( - pht( - "Input diff contains ambiguous line '%s'. This line is ambiguous ". - "because there are spaces in the file names, so the parser can not ". - "determine where the file names begin and end. To resolve this ". - "ambiguity, use standard prefixes ('a/' and 'b/') when ". - "generating diffs.", - "diff --git {$paths}")); - } + // Use the common subpart. There may be ambiguity here: "src/file + // dst/file" may _either_ be a prefix-less move, or a change with + // two custom prefixes. We assume it is the latter; if it is a + // rename, diff parsing will update based on the "rename from" / + // "rename to" lines. - $old = $matches['old']; - $old = self::unescapeFilename($old); - $old = self::stripGitPathPrefix($old); - - $new = $matches['new']; + // This re-assembles with the differing prefixes removed, but the + // quoting from the original. Necessary so we know if we should + // unescape characters from the common string. + $new = $matches['newq'].$matches['common'].$matches['newq']; $new = self::unescapeFilename($new); - $new = self::stripGitPathPrefix($new); - return array($old, $new); + return $new; } diff --git a/src/parser/__tests__/ArcanistDiffParserTestCase.php b/src/parser/__tests__/ArcanistDiffParserTestCase.php index 0976fb9b..c3f23b3f 100644 --- a/src/parser/__tests__/ArcanistDiffParserTestCase.php +++ b/src/parser/__tests__/ArcanistDiffParserTestCase.php @@ -535,7 +535,14 @@ EOTEXT $this->assertEqual(1, count($changes)); $change = head($changes); $this->assertEqual( - 'dst/file', + 'file', + $change->getCurrentPath()); + break; + case 'custom-prefixes-edit.gitdiff': + $this->assertEqual(1, count($changes)); + $change = head($changes); + $this->assertEqual( + 'file', $change->getCurrentPath()); break; case 'more-newlines.svndiff': @@ -607,81 +614,111 @@ EOTEXT } } - public function testGitPrefixStripping() { + public function testGitCommonFilenameExtraction() { static $tests = array( - 'a/file.c' => 'file.c', - 'b/file.c' => 'file.c', - 'i/file.c' => 'file.c', - 'c/file.c' => 'file.c', - 'w/file.c' => 'file.c', - 'o/file.c' => 'file.c', - '1/file.c' => 'file.c', - '2/file.c' => 'file.c', - 'src/file.c' => 'src/file.c', - 'file.c' => 'file.c', + 'a/filename.c b/filename.c' => 'filename.c', + "a/filename.c b/filename.c\n" => 'filename.c', + "a/filename.c b/filename.c\r\n" => 'filename.c', + 'filename.c filename.c' => 'filename.c', + '1/filename.c 2/filename.c' => 'filename.c', + '"a/\\"quotes\\"" "b/\\"quotes\\""' => '"quotes"', + '"a/\\"quotes and spaces\\"" "b/\\"quotes and spaces\\""' => + '"quotes and spaces"', + '"a/\\342\\230\\203" "b/\\342\\230\\203"' => + "\xE2\x98\x83", + 'a/Core Data/filename.c b/Core Data/filename.c' => + 'Core Data/filename.c', + 'some file with spaces.c some file with spaces.c' => + 'some file with spaces.c', + '"foo bar.c" foo bar.c' => 'foo bar.c', + '"a/foo bar.c" b/foo bar.c' => 'foo bar.c', + 'src/file dst/file' => 'file', + + // Renames are handled by the "rename from ..." lines later in + // the diff, for simplicity of parsing; this is also how git + // itself handles it. + 'a/foo.c b/bar.c' => null, + 'a/foo bar.c b/baz troz.c' => null, + '"a/foo bar.c" b/baz troz.c' => null, + 'a/foo bar.c "b/baz troz.c"' => null, + '"a/foo bar.c" "b/baz troz.c"' => null, + 'filename file with spaces.c filename file with spaces.c' => + 'filename file with spaces.c', ); foreach ($tests as $input => $expect) { - $this->assertEqual( - $expect, - ArcanistDiffParser::stripGitPathPrefix($input), - pht("Strip git prefix from '%s'.", $input)); - } - } - - public function testGitPathSplitting() { - static $tests = array( - 'a/old.c b/new.c' => array('old.c', 'new.c'), - "a/old.c b/new.c\n" => array('old.c', 'new.c'), - "a/old.c b/new.c\r\n" => array('old.c', 'new.c'), - 'old.c new.c' => array('old.c', 'new.c'), - '1/old.c 2/new.c' => array('old.c', 'new.c'), - '"a/\\"quotes1\\"" "b/\\"quotes2\\""' => array( - '"quotes1"', - '"quotes2"', - ), - '"a/\\"quotes and spaces1\\"" "b/\\"quotes and spaces2\\""' => array( - '"quotes and spaces1"', - '"quotes and spaces2"', - ), - '"a/\\342\\230\\2031" "b/\\342\\230\\2032"' => array( - "\xE2\x98\x831", - "\xE2\x98\x832", - ), - 'a/Core Data/old.c b/Core Data/new.c' => array( - 'Core Data/old.c', - 'Core Data/new.c', - ), - 'some file with spaces.c some file with spaces.c' => array( - 'some file with spaces.c', - 'some file with spaces.c', - ), - ); - - foreach ($tests as $input => $expect) { - $result = ArcanistDiffParser::splitGitDiffPaths($input); + $result = ArcanistDiffParser::extractGitCommonFilename($input); $this->assertEqual( $expect, $result, pht('Split: %s', $input)); } - - - static $ambiguous = array( - 'old file with spaces.c new file with spaces.c', - ); - - foreach ($ambiguous as $input) { - $caught = null; - try { - ArcanistDiffParser::splitGitDiffPaths($input); - } catch (Exception $ex) { - $caught = $ex; - } - $this->assertTrue( - ($caught instanceof Exception), - pht('Ambiguous: %s', $input)); - } } + + public function runSingleRename($diffline, $from, $to, $old, $new) { + $str = "diff --git $diffline\nsimilarity index 95%\n" + ."rename from $from\nrename to $to\n"; + $parser = new ArcanistDiffParser(); + $changes = $parser->parseDiff($str); + $this->assertTrue( + $changes !== null, + pht("Parsed:\n%s", $str)); + $this->assertEqual( + $old == $new ? 1 : 2, count($changes), + pht("Parsed one change:\n%s", $str)); + $change = reset($changes); + $this->assertEqual( + array($old, $new), + array($change->getOldPath(), $change->getCurrentPath()), + pht('Split: %s', $diffline)); + } + + public function testGitRenames() { + $this->runSingleRename('a/old.c b/new.c', + 'old.c', 'new.c', + 'old.c', 'new.c'); + $this->runSingleRename('old.c new.c', + 'old.c', 'new.c', + 'old.c', 'new.c'); + $this->runSingleRename('1/old.c 2/new.c', + 'old.c', 'new.c', + 'old.c', 'new.c'); + $this->runSingleRename('from/file.c to/file.c', + 'from/file.c', 'to/file.c', + 'from/file.c', 'to/file.c'); + $this->runSingleRename('"a/\\"quotes1\\"" "b/\\"quotes2\\""', + '"\\"quotes1\\""', '"\\"quotes2\\""', + '"quotes1"', '"quotes2"'); + $this->runSingleRename('"a/\\"quotes spaces1\\"" "b/\\"quotes spaces2\\""', + '"\\"quotes spaces1\\""', '"\\"quotes spaces2\\""', + '"quotes spaces1"', '"quotes spaces2"'); + $this->runSingleRename('"a/\\342\\230\\2031" "b/\\342\\230\\2032"', + '"\\342\\230\\2031"', '"\\342\\230\\2032"', + "\xE2\x98\x831", "\xE2\x98\x832"); + $this->runSingleRename('a/Core Data/old.c b/Core Data/new.c', + 'Core Data/old.c', 'Core Data/new.c', + 'Core Data/old.c', 'Core Data/new.c'); + $this->runSingleRename('file with spaces.c file with spaces.c', + 'file with spaces.c', 'file with spaces.c', + 'file with spaces.c', 'file with spaces.c'); + $this->runSingleRename('a/non-quoted filename.c "b/quoted filename.c"', + 'non-quoted filename.c', '"quoted filename.c"', + 'non-quoted filename.c', 'quoted filename.c'); + $this->runSingleRename('non-quoted filename.c "quoted filename.c"', + 'non-quoted filename.c', '"quoted filename.c"', + 'non-quoted filename.c', 'quoted filename.c'); + $this->runSingleRename('"a/quoted filename.c" b/non quoted filename.c', + '"quoted filename.c"', 'non quoted filename.c', + 'quoted filename.c', 'non quoted filename.c'); + $this->runSingleRename('"quoted filename.c" non-quoted filename.c', + '"quoted filename.c"', 'non-quoted filename.c', + 'quoted filename.c', 'non-quoted filename.c'); + $this->runSingleRename('old file with spaces.c new file with spaces.c', + 'old file with spaces.c', 'new file with spaces.c', + 'old file with spaces.c', 'new file with spaces.c'); + $this->runSingleRename('old file old file', + 'old file old', 'file', + 'old file old', 'file'); + } } diff --git a/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff b/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff new file mode 100644 index 00000000..31abe8b9 --- /dev/null +++ b/src/parser/__tests__/diff/custom-prefixes-edit.gitdiff @@ -0,0 +1,11 @@ +diff --git src/file dst/file +index 711edb6..a8cd7be 100644 +--- src/file ++++ dst/file +@@ -5,5 +5,5 @@ + some context + other context +-old value ++new value + some context + other context