1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2024-11-26 08:42:40 +01:00

Parse git renames with inconsistent quoting or custom prefixes

Summary:
The previous parser failed when only one of the old/new filenames was
quoted, as with a rename of a Unicode filename to a non-Unicode
filename, or vice versa.  It also incorrectly parsed custom prefixes,
even going to far as to encode this logic in the tests: a diff of
"src/file dst/file" which is not a rename should not be recorded as
changing "src/file", but rather "file".

Switch to using the "rename from" / "rename to" as the source of truth
for old and current filenames when complex renames are done.  This
matches the logic that git itself uses to parse patches; the contents
of the `diff --git` line are merely viewed as a simplest-case
prediction, with renames handled later should it not match.

The diff parser already had logic to parse "rename from" / "rename to"
lines and extract their information.  As such, this diff consists
primarily of removing logic from the `splitGitDiffPaths` method, and
allowing it to quietly fail.

This resolves two ambiguity mentioned in comments and tests: in
renaming "old file old" to "file", as well as the renaming of
"old file with spaces" to "new file with spaces" when neither are
quoted.

Test Plan:
`arc unit`.  Many of the existing test cases no longer
applied to the `splitGitDiffPaths` method; they were moved into a new
test method which also supplies values for "rename from" and "rename
to" lines.

As noted in the summary, this alters one of the expected values of an
existing test.  Specifically, the following diff is a file addition of
`file` with custom prefixes, and not the addition of "dst/file":

```
diff --git src/file dst/file
new file mode 100644
index 0000000..1269488
--- /dev/null
+++ dst/file
@@ -0,0 +1 @@
+data
```

Reviewers: epriestley, #blessed_reviewers

Reviewed By: epriestley, #blessed_reviewers

Subscribers: Korvin, epriestley

Differential Revision: https://secure.phabricator.com/D16405
This commit is contained in:
Alex Vandiver 2016-08-16 13:38:28 -07:00
parent ee6357386d
commit d0957c3441
3 changed files with 145 additions and 148 deletions

View file

@ -288,9 +288,11 @@ final class ArcanistDiffParser extends Phobject {
if (isset($match['type'])) { if (isset($match['type'])) {
if ($match['type'] == 'diff --git') { if ($match['type'] == 'diff --git') {
list($old, $new) = self::splitGitDiffPaths($match['oldnew']); $filename = self::extractGitCommonFilename($match['oldnew']);
$match['old'] = $old; if ($filename !== null) {
$match['cur'] = $new; $match['old'] = $filename;
$match['cur'] = $filename;
}
} }
} }
@ -1307,46 +1309,6 @@ final class ArcanistDiffParser extends Phobject {
} }
/**
* Strip prefixes off paths from `git diff`. By default git uses a/ and b/,
* but you can set `diff.mnemonicprefix` to get a different set of prefixes,
* or use `--no-prefix`, `--src-prefix` or `--dst-prefix` to set these to
* other arbitrary values.
*
* We strip the default and mnemonic prefixes, and trust the user knows what
* they're doing in the other cases.
*
* @param string Path to strip.
* @return string Stripped path.
*/
public static function stripGitPathPrefix($path) {
static $regex;
if ($regex === null) {
$prefixes = array(
// These are the defaults.
'a/',
'b/',
// These show up when you set "diff.mnemonicprefix".
'i/',
'c/',
'w/',
'o/',
'1/',
'2/',
);
foreach ($prefixes as $key => $prefix) {
$prefixes[$key] = preg_quote($prefix, '@');
}
$regex = '@^('.implode('|', $prefixes).')@S';
}
return preg_replace($regex, '', $path);
}
/** /**
* Split the paths on a "diff --git" line into old and new paths. This * Split the paths on a "diff --git" line into old and new paths. This
* is difficult because they may be ambiguous if the files contain spaces. * is difficult because they may be ambiguous if the files contain spaces.
@ -1354,53 +1316,40 @@ final class ArcanistDiffParser extends Phobject {
* @param string Text from a diff line after "diff --git ". * @param string Text from a diff line after "diff --git ".
* @return pair<string, string> Old and new paths. * @return pair<string, string> Old and new paths.
*/ */
public static function splitGitDiffPaths($paths) { public static function extractGitCommonFilename($paths) {
$matches = null; $matches = null;
$paths = rtrim($paths, "\r\n"); $paths = rtrim($paths, "\r\n");
$patterns = array( // Try the exact same string twice in a row separated by a
// Try quoted paths, used for unicode filenames or filenames with quotes. // space, with an optional prefix. This can hit a false
'@^(?P<old>"(?:\\\\.|[^"\\\\]+)+") (?P<new>"(?:\\\\.|[^"\\\\]+)+")$@', // positive for moves from files like "old file old" to "file",
// but such a cases will be caught by the "rename from" /
// "rename to" lines.
$prefix = '(?:[^/]+/)?';
$pattern =
"@^(?P<old>(?P<oldq>\"?){$prefix}(?P<common>.+)\\k<oldq>)"
." "
."(?P<new>(?P<newq>\"?){$prefix}\\k<common>\\k<newq>)$@";
// Try paths without spaces. if (!preg_match($pattern, $paths, $matches)) {
'@^(?P<old>[^ ]+) (?P<new>[^ ]+)$@', // A rename or some form; return null for now, and let the
// "rename from" / "rename to" lines fix it up.
// Try paths with well-known prefixes. return null;
'@^(?P<old>[abicwo12]/.*) (?P<new>[abicwo12]/.*)$@',
// Try the exact same string twice in a row separated by a space.
// This can hit a false positive for moves from files like "old file old"
// to "file", but such a case combined with custom diff prefixes is
// incredibly obscure.
'@^(?P<old>.*) (?P<new>\\1)$@',
);
foreach ($patterns as $pattern) {
if (preg_match($pattern, $paths, $matches)) {
break;
}
} }
if (!$matches) { // Use the common subpart. There may be ambiguity here: "src/file
throw new Exception( // dst/file" may _either_ be a prefix-less move, or a change with
pht( // two custom prefixes. We assume it is the latter; if it is a
"Input diff contains ambiguous line '%s'. This line is ambiguous ". // rename, diff parsing will update based on the "rename from" /
"because there are spaces in the file names, so the parser can not ". // "rename to" lines.
"determine where the file names begin and end. To resolve this ".
"ambiguity, use standard prefixes ('a/' and 'b/') when ".
"generating diffs.",
"diff --git {$paths}"));
}
$old = $matches['old']; // This re-assembles with the differing prefixes removed, but the
$old = self::unescapeFilename($old); // quoting from the original. Necessary so we know if we should
$old = self::stripGitPathPrefix($old); // unescape characters from the common string.
$new = $matches['newq'].$matches['common'].$matches['newq'];
$new = $matches['new'];
$new = self::unescapeFilename($new); $new = self::unescapeFilename($new);
$new = self::stripGitPathPrefix($new);
return array($old, $new); return $new;
} }

View file

@ -535,7 +535,14 @@ EOTEXT
$this->assertEqual(1, count($changes)); $this->assertEqual(1, count($changes));
$change = head($changes); $change = head($changes);
$this->assertEqual( $this->assertEqual(
'dst/file', 'file',
$change->getCurrentPath());
break;
case 'custom-prefixes-edit.gitdiff':
$this->assertEqual(1, count($changes));
$change = head($changes);
$this->assertEqual(
'file',
$change->getCurrentPath()); $change->getCurrentPath());
break; break;
case 'more-newlines.svndiff': case 'more-newlines.svndiff':
@ -607,81 +614,111 @@ EOTEXT
} }
} }
public function testGitPrefixStripping() { public function testGitCommonFilenameExtraction() {
static $tests = array( static $tests = array(
'a/file.c' => 'file.c', 'a/filename.c b/filename.c' => 'filename.c',
'b/file.c' => 'file.c', "a/filename.c b/filename.c\n" => 'filename.c',
'i/file.c' => 'file.c', "a/filename.c b/filename.c\r\n" => 'filename.c',
'c/file.c' => 'file.c', 'filename.c filename.c' => 'filename.c',
'w/file.c' => 'file.c', '1/filename.c 2/filename.c' => 'filename.c',
'o/file.c' => 'file.c', '"a/\\"quotes\\"" "b/\\"quotes\\""' => '"quotes"',
'1/file.c' => 'file.c', '"a/\\"quotes and spaces\\"" "b/\\"quotes and spaces\\""' =>
'2/file.c' => 'file.c', '"quotes and spaces"',
'src/file.c' => 'src/file.c', '"a/\\342\\230\\203" "b/\\342\\230\\203"' =>
'file.c' => 'file.c', "\xE2\x98\x83",
'a/Core Data/filename.c b/Core Data/filename.c' =>
'Core Data/filename.c',
'some file with spaces.c some file with spaces.c' =>
'some file with spaces.c',
'"foo bar.c" foo bar.c' => 'foo bar.c',
'"a/foo bar.c" b/foo bar.c' => 'foo bar.c',
'src/file dst/file' => 'file',
// Renames are handled by the "rename from ..." lines later in
// the diff, for simplicity of parsing; this is also how git
// itself handles it.
'a/foo.c b/bar.c' => null,
'a/foo bar.c b/baz troz.c' => null,
'"a/foo bar.c" b/baz troz.c' => null,
'a/foo bar.c "b/baz troz.c"' => null,
'"a/foo bar.c" "b/baz troz.c"' => null,
'filename file with spaces.c filename file with spaces.c' =>
'filename file with spaces.c',
); );
foreach ($tests as $input => $expect) { foreach ($tests as $input => $expect) {
$this->assertEqual( $result = ArcanistDiffParser::extractGitCommonFilename($input);
$expect,
ArcanistDiffParser::stripGitPathPrefix($input),
pht("Strip git prefix from '%s'.", $input));
}
}
public function testGitPathSplitting() {
static $tests = array(
'a/old.c b/new.c' => array('old.c', 'new.c'),
"a/old.c b/new.c\n" => array('old.c', 'new.c'),
"a/old.c b/new.c\r\n" => array('old.c', 'new.c'),
'old.c new.c' => array('old.c', 'new.c'),
'1/old.c 2/new.c' => array('old.c', 'new.c'),
'"a/\\"quotes1\\"" "b/\\"quotes2\\""' => array(
'"quotes1"',
'"quotes2"',
),
'"a/\\"quotes and spaces1\\"" "b/\\"quotes and spaces2\\""' => array(
'"quotes and spaces1"',
'"quotes and spaces2"',
),
'"a/\\342\\230\\2031" "b/\\342\\230\\2032"' => array(
"\xE2\x98\x831",
"\xE2\x98\x832",
),
'a/Core Data/old.c b/Core Data/new.c' => array(
'Core Data/old.c',
'Core Data/new.c',
),
'some file with spaces.c some file with spaces.c' => array(
'some file with spaces.c',
'some file with spaces.c',
),
);
foreach ($tests as $input => $expect) {
$result = ArcanistDiffParser::splitGitDiffPaths($input);
$this->assertEqual( $this->assertEqual(
$expect, $expect,
$result, $result,
pht('Split: %s', $input)); pht('Split: %s', $input));
} }
static $ambiguous = array(
'old file with spaces.c new file with spaces.c',
);
foreach ($ambiguous as $input) {
$caught = null;
try {
ArcanistDiffParser::splitGitDiffPaths($input);
} catch (Exception $ex) {
$caught = $ex;
}
$this->assertTrue(
($caught instanceof Exception),
pht('Ambiguous: %s', $input));
}
} }
public function runSingleRename($diffline, $from, $to, $old, $new) {
$str = "diff --git $diffline\nsimilarity index 95%\n"
."rename from $from\nrename to $to\n";
$parser = new ArcanistDiffParser();
$changes = $parser->parseDiff($str);
$this->assertTrue(
$changes !== null,
pht("Parsed:\n%s", $str));
$this->assertEqual(
$old == $new ? 1 : 2, count($changes),
pht("Parsed one change:\n%s", $str));
$change = reset($changes);
$this->assertEqual(
array($old, $new),
array($change->getOldPath(), $change->getCurrentPath()),
pht('Split: %s', $diffline));
}
public function testGitRenames() {
$this->runSingleRename('a/old.c b/new.c',
'old.c', 'new.c',
'old.c', 'new.c');
$this->runSingleRename('old.c new.c',
'old.c', 'new.c',
'old.c', 'new.c');
$this->runSingleRename('1/old.c 2/new.c',
'old.c', 'new.c',
'old.c', 'new.c');
$this->runSingleRename('from/file.c to/file.c',
'from/file.c', 'to/file.c',
'from/file.c', 'to/file.c');
$this->runSingleRename('"a/\\"quotes1\\"" "b/\\"quotes2\\""',
'"\\"quotes1\\""', '"\\"quotes2\\""',
'"quotes1"', '"quotes2"');
$this->runSingleRename('"a/\\"quotes spaces1\\"" "b/\\"quotes spaces2\\""',
'"\\"quotes spaces1\\""', '"\\"quotes spaces2\\""',
'"quotes spaces1"', '"quotes spaces2"');
$this->runSingleRename('"a/\\342\\230\\2031" "b/\\342\\230\\2032"',
'"\\342\\230\\2031"', '"\\342\\230\\2032"',
"\xE2\x98\x831", "\xE2\x98\x832");
$this->runSingleRename('a/Core Data/old.c b/Core Data/new.c',
'Core Data/old.c', 'Core Data/new.c',
'Core Data/old.c', 'Core Data/new.c');
$this->runSingleRename('file with spaces.c file with spaces.c',
'file with spaces.c', 'file with spaces.c',
'file with spaces.c', 'file with spaces.c');
$this->runSingleRename('a/non-quoted filename.c "b/quoted filename.c"',
'non-quoted filename.c', '"quoted filename.c"',
'non-quoted filename.c', 'quoted filename.c');
$this->runSingleRename('non-quoted filename.c "quoted filename.c"',
'non-quoted filename.c', '"quoted filename.c"',
'non-quoted filename.c', 'quoted filename.c');
$this->runSingleRename('"a/quoted filename.c" b/non quoted filename.c',
'"quoted filename.c"', 'non quoted filename.c',
'quoted filename.c', 'non quoted filename.c');
$this->runSingleRename('"quoted filename.c" non-quoted filename.c',
'"quoted filename.c"', 'non-quoted filename.c',
'quoted filename.c', 'non-quoted filename.c');
$this->runSingleRename('old file with spaces.c new file with spaces.c',
'old file with spaces.c', 'new file with spaces.c',
'old file with spaces.c', 'new file with spaces.c');
$this->runSingleRename('old file old file',
'old file old', 'file',
'old file old', 'file');
}
} }

View file

@ -0,0 +1,11 @@
diff --git src/file dst/file
index 711edb6..a8cd7be 100644
--- src/file
+++ dst/file
@@ -5,5 +5,5 @@
some context
other context
-old value
+new value
some context
other context