mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-11-25 16:22:42 +01:00
Use 'git blame --porcelain' for git blame info
Summary: This guards against stability issues with the output format of 'git blame' (such as git config, localization (ref T5554) or future changes). For example, `git config blame.blankboundary true` breaks `arc cover` before this patch. Test Plan: * Set `git config blame.blankboundary true` on a test repo. * Ran `arc cover`. It failed with an exception ("Bad blame?"). * Applied this patch. * `arc cover` works. Reviewers: #blessed_reviewers, epriestley Reviewed By: #blessed_reviewers, epriestley Subscribers: Korvin Maniphest Tasks: T5554 Differential Revision: https://secure.phabricator.com/D13993
This commit is contained in:
parent
43f8e7eb71
commit
4c3d75401f
1 changed files with 28 additions and 23 deletions
|
@ -795,37 +795,42 @@ final class ArcanistGitAPI extends ArcanistRepositoryAPI {
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getBlame($path) {
|
public function getBlame($path) {
|
||||||
// TODO: 'git blame' supports --porcelain and we should probably use it.
|
|
||||||
list($stdout) = $this->execxLocal(
|
list($stdout) = $this->execxLocal(
|
||||||
'blame --date=iso -w -M %s -- %s',
|
'blame --porcelain -w -M %s -- %s',
|
||||||
$this->getBaseCommit(),
|
$this->getBaseCommit(),
|
||||||
$path);
|
$path);
|
||||||
|
|
||||||
|
// the --porcelain format prints at least one header line per source line,
|
||||||
|
// then the source line prefixed by a tab character
|
||||||
|
$blame_info = preg_split('/^\t.*\n/m', rtrim($stdout));
|
||||||
|
|
||||||
|
// commit info is not repeated in these headers, so cache it
|
||||||
|
$revision_data = array();
|
||||||
|
|
||||||
$blame = array();
|
$blame = array();
|
||||||
foreach (explode("\n", trim($stdout)) as $line) {
|
foreach ($blame_info as $line_info) {
|
||||||
if (!strlen($line)) {
|
$revision = substr($line_info, 0, 40);
|
||||||
continue;
|
$data = idx($revision_data, $revision, array());
|
||||||
|
|
||||||
|
if (empty($data)) {
|
||||||
|
$matches = array();
|
||||||
|
if (!preg_match('/^author (.*)$/m', $line_info, $matches)) {
|
||||||
|
throw new Exception(
|
||||||
|
pht(
|
||||||
|
'Unexpected output from %s: no author for commit %s',
|
||||||
|
'git blame',
|
||||||
|
$revision));
|
||||||
|
}
|
||||||
|
$data['author'] = $matches[1];
|
||||||
|
$data['from_first_commit'] = preg_match('/^boundary$/m', $line_info);
|
||||||
|
$revision_data[$revision] = $data;
|
||||||
}
|
}
|
||||||
|
|
||||||
// lines predating a git repo's history are blamed to the oldest revision,
|
// Ignore lines predating the git repository (on a boundary commit)
|
||||||
// with the commit hash prepended by a ^. we shouldn't count these lines
|
// rather than blaming them on the oldest diff's unfortunate author
|
||||||
// as blaming to the oldest diff's unfortunate author
|
if (!$data['from_first_commit']) {
|
||||||
if ($line[0] == '^') {
|
$blame[] = array($data['author'], $revision);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$matches = null;
|
|
||||||
$ok = preg_match(
|
|
||||||
'/^([0-9a-f]+)[^(]+?[(](.*?) +\d\d\d\d-\d\d-\d\d/',
|
|
||||||
$line,
|
|
||||||
$matches);
|
|
||||||
if (!$ok) {
|
|
||||||
throw new Exception(pht("Bad blame? `%s'", $line));
|
|
||||||
}
|
|
||||||
$revision = $matches[1];
|
|
||||||
$author = $matches[2];
|
|
||||||
|
|
||||||
$blame[] = array($author, $revision);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return $blame;
|
return $blame;
|
||||||
|
|
Loading…
Reference in a new issue