mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-22 14:52:41 +01:00
Handle encoding if git tells us it's still not UTF-8
Summary: Even though `--encoding` is passed to the command, git still fails in some cases to correctly convert the output. Attempt the conversion ourselves if it's non UTF-8. Test Plan: Reparsed message in a repository with ISO-8859-1 encoded commit messages. Reviewers: epriestley Reviewed By: epriestley CC: aran, Korvin Maniphest Tasks: T452 Differential Revision: https://secure.phabricator.com/D2888
This commit is contained in:
parent
2bb8150506
commit
aa95ef2463
1 changed files with 16 additions and 3 deletions
|
@ -25,14 +25,27 @@ final class PhabricatorRepositoryGitCommitMessageParserWorker
|
|||
|
||||
// NOTE: %B was introduced somewhat recently in git's history, so pull
|
||||
// commit message information with %s and %b instead.
|
||||
// Even though we pass --encoding here, git doesn't always succeed, so
|
||||
// we try a little harder, since git *does* tell us what the actual encoding
|
||||
// is correctly.
|
||||
list($info) = $repository->execxLocalCommand(
|
||||
"log -n 1 --encoding='UTF-8' " .
|
||||
"--pretty=format:%%cn%%x00%%an%%x00%%s%%n%%n%%b %s",
|
||||
"--pretty=format:%%e%%x00%%cn%%x00%%an%%x00%%s%%n%%n%%b %s",
|
||||
$commit->getCommitIdentifier());
|
||||
|
||||
list($committer, $author, $message) = explode("\0", $info);
|
||||
list($encoding, $committer, $author, $message) = explode("\0", $info);
|
||||
|
||||
// Make sure these are valid UTF-8.
|
||||
// See note above - git doesn't always convert the encoding correctly.
|
||||
if (strtoupper($encoding) != "UTF-8") {
|
||||
if (function_exists('mb_convert_encoding')) {
|
||||
$message = mb_convert_encoding($message, "UTF-8", $encoding);
|
||||
$author = mb_convert_encoding($author, "UTF-8", $encoding);
|
||||
$committer = mb_convert_encoding($committer, "UTF-8", $encoding);
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure these are valid UTF-8, even though we try
|
||||
// pretty hard just above.
|
||||
$committer = phutil_utf8ize($committer);
|
||||
$author = phutil_utf8ize($author);
|
||||
$message = phutil_utf8ize($message);
|
||||
|
|
Loading…
Reference in a new issue