mirror of
https://we.phorge.it/source/arcanist.git
synced 2025-01-22 20:51:09 +01:00
Parse git diffs of files with unicode characters in the names
Summary: This is probably just the first step down a long road of not handling exotic filenames correctly, but if you diff a file named "∆.jpg" it currently chokes while parsing the diff. Test Plan: Added minimal failing unit test, fixed parser, test passed. Reviewers: zeeg, btrahan Reviewed By: btrahan CC: aran, epriestley Differential Revision: https://secure.phabricator.com/D1592
This commit is contained in:
parent
35837a39d2
commit
8cb5292edf
3 changed files with 44 additions and 1 deletions
|
@ -209,7 +209,10 @@ final class ArcanistDiffParser {
|
|||
// This is a git commit message, probably from "git show".
|
||||
'(?P<type>commit) (?P<hash>[a-f0-9]+)',
|
||||
// This is a git diff, probably from "git show" or "git diff".
|
||||
'(?P<type>diff --git) [abicwo12]/(?P<old>.+) [abicwo12]/(?P<cur>.+)',
|
||||
// Note that the filenames may appear quoted.
|
||||
'(?P<type>diff --git) '.
|
||||
'(?P<old>"?[abicwo12]/.+"?) '.
|
||||
'(?P<cur>"?[abicwo12]/.+"?)',
|
||||
// This is a unified diff, probably from "diff -u" or synthetic diffing.
|
||||
'(?P<type>---) (?P<old>.+)\s+\d{4}-\d{2}-\d{2}.*',
|
||||
'(?P<binary>Binary) files '.
|
||||
|
@ -238,6 +241,19 @@ final class ArcanistDiffParser {
|
|||
"'diff --git' (git diff), or '--- filename' (unified diff).");
|
||||
}
|
||||
|
||||
if (isset($match['type'])) {
|
||||
if ($match['type'] == 'diff --git') {
|
||||
if (isset($match['old'])) {
|
||||
$match['old'] = $this->unescapeFilename($match['old']);
|
||||
$match['old'] = substr($match['old'], 2);
|
||||
}
|
||||
if (isset($match['cur'])) {
|
||||
$match['cur'] = $this->unescapeFilename($match['cur']);
|
||||
$match['cur'] = substr($match['cur'], 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$change = $this->buildChange(idx($match, 'cur'));
|
||||
|
||||
if (isset($match['old'])) {
|
||||
|
@ -519,10 +535,12 @@ final class ArcanistDiffParser {
|
|||
}
|
||||
|
||||
if (!empty($match['old'])) {
|
||||
$match['old'] = $this->unescapeFilename($match['old']);
|
||||
$change->setOldPath($match['old']);
|
||||
}
|
||||
|
||||
if (!empty($match['cur'])) {
|
||||
$match['cur'] = $this->unescapeFilename($match['cur']);
|
||||
$change->setCurrentPath($match['cur']);
|
||||
}
|
||||
|
||||
|
@ -941,4 +959,15 @@ final class ArcanistDiffParser {
|
|||
$message = "Parse Exception: {$message}\n\n{$context}\n";
|
||||
throw new Exception($message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unescape escaped filenames, e.g. from "git diff".
|
||||
*/
|
||||
private function unescapeFilename($name) {
|
||||
if (preg_match('/^".+"$/', $name)) {
|
||||
return stripcslashes(substr($name, 1, -1));
|
||||
} else {
|
||||
return $name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -477,6 +477,16 @@ EOTEXT
|
|||
ArcanistDiffChangeType::FILE_BINARY,
|
||||
$change->getFileType());
|
||||
break;
|
||||
case 'git-odd-filename.gitdiff':
|
||||
$this->assertEqual(2, count($changes));
|
||||
$change = reset($changes);
|
||||
$this->assertEqual(
|
||||
'old/'."\342\210\206".'.jpg',
|
||||
$change->getOldPath());
|
||||
$this->assertEqual(
|
||||
'new/'."\342\210\206".'.jpg',
|
||||
$change->getCurrentPath());
|
||||
break;
|
||||
case 'hg-binary-change.hgdiff':
|
||||
case 'hg-solo-binary-change.hgdiff':
|
||||
$this->assertEqual(1, count($changes));
|
||||
|
|
4
src/parser/diff/__tests__/data/git-odd-filename.gitdiff
Normal file
4
src/parser/diff/__tests__/data/git-odd-filename.gitdiff
Normal file
|
@ -0,0 +1,4 @@
|
|||
diff --git "a/old/\342\210\206.jpg" "b/new/\342\210\206.jpg"
|
||||
similarity index 100%
|
||||
rename from "old/\342\210\206.jpg"
|
||||
rename to "new/\342\210\206.jpg"
|
Loading…
Reference in a new issue