1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-22 14:52:41 +01:00

Handle encoding if git tells us it's still not UTF-8

Summary:
Even though `--encoding` is passed to the command, git still fails
in some cases to correctly convert the output. Attempt the conversion
ourselves if it's non UTF-8.

Test Plan: Reparsed message in a repository with ISO-8859-1 encoded commit messages.

Reviewers: epriestley

Reviewed By: epriestley

CC: aran, Korvin

Maniphest Tasks: T452

Differential Revision: https://secure.phabricator.com/D2888
This commit is contained in:
David Reuss 2012-06-29 12:38:02 -07:00 committed by epriestley
parent 2bb8150506
commit aa95ef2463

View file

@ -25,14 +25,27 @@ final class PhabricatorRepositoryGitCommitMessageParserWorker
// NOTE: %B was introduced somewhat recently in git's history, so pull
// commit message information with %s and %b instead.
// Even though we pass --encoding here, git doesn't always succeed, so
// we try a little harder, since git *does* tell us what the actual encoding
// is correctly.
list($info) = $repository->execxLocalCommand(
"log -n 1 --encoding='UTF-8' " .
"--pretty=format:%%cn%%x00%%an%%x00%%s%%n%%n%%b %s",
"--pretty=format:%%e%%x00%%cn%%x00%%an%%x00%%s%%n%%n%%b %s",
$commit->getCommitIdentifier());
list($committer, $author, $message) = explode("\0", $info);
list($encoding, $committer, $author, $message) = explode("\0", $info);
// Make sure these are valid UTF-8.
// See note above - git doesn't always convert the encoding correctly.
if (strtoupper($encoding) != "UTF-8") {
if (function_exists('mb_convert_encoding')) {
$message = mb_convert_encoding($message, "UTF-8", $encoding);
$author = mb_convert_encoding($author, "UTF-8", $encoding);
$committer = mb_convert_encoding($committer, "UTF-8", $encoding);
}
}
// Make sure these are valid UTF-8, even though we try
// pretty hard just above.
$committer = phutil_utf8ize($committer);
$author = phutil_utf8ize($author);
$message = phutil_utf8ize($message);