1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2025-01-22 20:51:09 +01:00

Parse git diffs of files with unicode characters in the names

Summary: This is probably just the first step down a long road of not handling
exotic filenames correctly, but if you diff a file named "∆.jpg" it currently
chokes while parsing the diff.

Test Plan: Added minimal failing unit test, fixed parser, test passed.

Reviewers: zeeg, btrahan

Reviewed By: btrahan

CC: aran, epriestley

Differential Revision: https://secure.phabricator.com/D1592
This commit is contained in:
epriestley 2012-02-08 10:04:15 -08:00
parent 35837a39d2
commit 8cb5292edf
3 changed files with 44 additions and 1 deletions

View file

@ -209,7 +209,10 @@ final class ArcanistDiffParser {
// This is a git commit message, probably from "git show".
'(?P<type>commit) (?P<hash>[a-f0-9]+)',
// This is a git diff, probably from "git show" or "git diff".
'(?P<type>diff --git) [abicwo12]/(?P<old>.+) [abicwo12]/(?P<cur>.+)',
// Note that the filenames may appear quoted.
'(?P<type>diff --git) '.
'(?P<old>"?[abicwo12]/.+"?) '.
'(?P<cur>"?[abicwo12]/.+"?)',
// This is a unified diff, probably from "diff -u" or synthetic diffing.
'(?P<type>---) (?P<old>.+)\s+\d{4}-\d{2}-\d{2}.*',
'(?P<binary>Binary) files '.
@ -238,6 +241,19 @@ final class ArcanistDiffParser {
"'diff --git' (git diff), or '--- filename' (unified diff).");
}
if (isset($match['type'])) {
if ($match['type'] == 'diff --git') {
if (isset($match['old'])) {
$match['old'] = $this->unescapeFilename($match['old']);
$match['old'] = substr($match['old'], 2);
}
if (isset($match['cur'])) {
$match['cur'] = $this->unescapeFilename($match['cur']);
$match['cur'] = substr($match['cur'], 2);
}
}
}
$change = $this->buildChange(idx($match, 'cur'));
if (isset($match['old'])) {
@ -519,10 +535,12 @@ final class ArcanistDiffParser {
}
if (!empty($match['old'])) {
$match['old'] = $this->unescapeFilename($match['old']);
$change->setOldPath($match['old']);
}
if (!empty($match['cur'])) {
$match['cur'] = $this->unescapeFilename($match['cur']);
$change->setCurrentPath($match['cur']);
}
@ -941,4 +959,15 @@ final class ArcanistDiffParser {
$message = "Parse Exception: {$message}\n\n{$context}\n";
throw new Exception($message);
}
/**
* Unescape escaped filenames, e.g. from "git diff".
*/
private function unescapeFilename($name) {
if (preg_match('/^".+"$/', $name)) {
return stripcslashes(substr($name, 1, -1));
} else {
return $name;
}
}
}

View file

@ -477,6 +477,16 @@ EOTEXT
ArcanistDiffChangeType::FILE_BINARY,
$change->getFileType());
break;
case 'git-odd-filename.gitdiff':
$this->assertEqual(2, count($changes));
$change = reset($changes);
$this->assertEqual(
'old/'."\342\210\206".'.jpg',
$change->getOldPath());
$this->assertEqual(
'new/'."\342\210\206".'.jpg',
$change->getCurrentPath());
break;
case 'hg-binary-change.hgdiff':
case 'hg-solo-binary-change.hgdiff':
$this->assertEqual(1, count($changes));

View file

@ -0,0 +1,4 @@
diff --git "a/old/\342\210\206.jpg" "b/new/\342\210\206.jpg"
similarity index 100%
rename from "old/\342\210\206.jpg"
rename to "new/\342\210\206.jpg"