1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-25 08:12:40 +01:00

Skip "git for-each-ref" when identifying deleted commits

Summary:
Ref T13647. The ref discovery process prunes commits that no longer exist in the repository before executing "git log <new heads> --not <old heads>" to identify newly published commits.

If we don't do this, the "git log" command will fail if any old head has been pruned from the repository.

Currently, this test for missing commits starts with a call to "git for-each-ref" to attempt to resolve symbols as tag or branch names, but:

  - this is painfully slow in repositories with many refs; and
  - this is incorrect (not consistent with "git" behavior) for 40-character hex strings, which Git will never resolve as symbolic names.

Instead, when a symbol is a 40-character hex string, skip "git for-each-ref" and jump directly to "git cat-file --batch-check".

Test Plan:
  - Ran `bin/repository update` in a repository with 65K refs and extra debugging info.
    - Before: took ~30s, three calls to `git for-each-ref`.
    - After: took ~20s, two calls to `git for-each-ref`. Same resolution result on queries.

Maniphest Tasks: T13647

Differential Revision: https://secure.phabricator.com/D21658
This commit is contained in:
epriestley 2021-03-28 10:52:29 -07:00
parent 5b8b5f2141
commit aa70b008f3

View file

@ -63,48 +63,66 @@ final class DiffusionLowLevelResolveRefsQuery
$unresolved = array_fuse($this->refs);
$results = array();
// First, resolve branches and tags.
$ref_map = id(new DiffusionLowLevelGitRefQuery())
->setRepository($repository)
->withRefTypes(
array(
PhabricatorRepositoryRefCursor::TYPE_BRANCH,
PhabricatorRepositoryRefCursor::TYPE_TAG,
))
->execute();
$ref_map = mgroup($ref_map, 'getShortName');
$tag_prefix = 'refs/tags/';
$possible_symbols = array();
foreach ($unresolved as $ref) {
if (empty($ref_map[$ref])) {
// See T13647. If this symbol is exactly 40 hex characters long, it may
// never resolve as a branch or tag name. Filter these symbols out for
// consistency with Git behavior -- and to avoid an expensive
// "git for-each-ref" when resolving only commit hashes, which happens
// during repository updates.
if (preg_match('(^[a-f0-9]{40}\z)', $ref)) {
continue;
}
foreach ($ref_map[$ref] as $result) {
$fields = $result->getRawFields();
$objectname = idx($fields, 'refname');
if (!strncmp($objectname, $tag_prefix, strlen($tag_prefix))) {
$type = 'tag';
} else {
$type = 'branch';
$possible_symbols[$ref] = $ref;
}
// First, resolve branches and tags.
if ($possible_symbols) {
$ref_map = id(new DiffusionLowLevelGitRefQuery())
->setRepository($repository)
->withRefTypes(
array(
PhabricatorRepositoryRefCursor::TYPE_BRANCH,
PhabricatorRepositoryRefCursor::TYPE_TAG,
))
->execute();
$ref_map = mgroup($ref_map, 'getShortName');
$tag_prefix = 'refs/tags/';
foreach ($possible_symbols as $ref) {
if (empty($ref_map[$ref])) {
continue;
}
$info = array(
'type' => $type,
'identifier' => $result->getCommitIdentifier(),
);
if ($type == 'tag') {
$alternate = idx($fields, 'objectname');
if ($alternate) {
$info['alternate'] = $alternate;
foreach ($ref_map[$ref] as $result) {
$fields = $result->getRawFields();
$objectname = idx($fields, 'refname');
if (!strncmp($objectname, $tag_prefix, strlen($tag_prefix))) {
$type = 'tag';
} else {
$type = 'branch';
}
$info = array(
'type' => $type,
'identifier' => $result->getCommitIdentifier(),
);
if ($type == 'tag') {
$alternate = idx($fields, 'objectname');
if ($alternate) {
$info['alternate'] = $alternate;
}
}
$results[$ref][] = $info;
}
$results[$ref][] = $info;
unset($unresolved[$ref]);
}
unset($unresolved[$ref]);
}
// If we resolved everything, we're done.