From 65f13b156f5ecdbf689cce16f2bc7b9680818789 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 20 Oct 2017 10:15:07 -0700 Subject: [PATCH] Improve "refengine" performance for testing large numbers of Mercurial branches Summary: See PHI158. In the RefEngine, we test if any old branch positions have been removed from the repository. This is uncommon (but not impossible) in Mercurial, and corresponds to users deleting branches in Git. Currently, we end up running `hg log` for each position, in parallel. Because of Python's large startup overhead, this can be resource intensive for repositories with a large number of branches. We have to do this in the general case because the caller may be asking us to resolve `tip`, `newfeature`, `tip~3`, `9`, etc. However, in the specific case where the refs are 40-digit hashes, we can bulk resolve them if they exist, like this: ``` hg log ... --rev (abcd or def0 or ab12 or ...) ``` In the general case, we could probably do less of this than we currently do (instead of testing all old heads, we could prune the list by removing commits which we know are still pointed to by current heads) but that's a slightly more involved change and the effect here is already dramatic. Test Plan: Verified that CPU usage drops from ~110s -> ~0.9s: Before: ``` epriestley@orbital ~/dev/phabricator $ time ./bin/repository refs nss Updating refs in "nss"... Done. real 0m14.676s user 1m24.714s sys 0m21.645s ``` After: ``` epriestley@orbital ~/dev/phabricator $ time ./bin/repository refs nss Updating refs in "nss"... Done. real 0m0.861s user 0m0.882s sys 0m0.213s ``` - Manually resolved `blue`, `tip`, `9`, etc., got expected results. - Tried to resolve invalid hashes, got expected result (no resolution). Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D18717 --- .../DiffusionLowLevelResolveRefsQuery.php | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/applications/diffusion/query/lowlevel/DiffusionLowLevelResolveRefsQuery.php b/src/applications/diffusion/query/lowlevel/DiffusionLowLevelResolveRefsQuery.php index b649ca65a8..f9e9f74774 100644 --- a/src/applications/diffusion/query/lowlevel/DiffusionLowLevelResolveRefsQuery.php +++ b/src/applications/diffusion/query/lowlevel/DiffusionLowLevelResolveRefsQuery.php @@ -256,6 +256,66 @@ final class DiffusionLowLevelResolveRefsQuery return $results; } + // If some of the refs look like hashes, try to bulk resolve them. This + // workflow happens via RefEngine and bulk resolution is dramatically + // faster than individual resolution. See PHI158. + + $hashlike = array(); + foreach ($unresolved as $key => $ref) { + if (preg_match('/^[a-f0-9]{40}\z/', $ref)) { + $hashlike[$key] = $ref; + } + } + + if (count($hashlike) > 1) { + $hashlike_map = array(); + + $hashlike_groups = array_chunk($hashlike, 64, true); + foreach ($hashlike_groups as $hashlike_group) { + $hashlike_arg = array(); + foreach ($hashlike_group as $hashlike_ref) { + $hashlike_arg[] = hgsprintf('%s', $hashlike_ref); + } + $hashlike_arg = '('.implode(' or ', $hashlike_arg).')'; + + list($err, $refs) = $repository->execLocalCommand( + 'log --template=%s --rev %s', + '{node}\n', + $hashlike_arg); + if ($err) { + // NOTE: If any ref fails to resolve, Mercurial will exit with an + // error. We just give up on the whole group and resolve it + // individually below. In theory, we could split it into subgroups + // but the pathway where this bulk resolution matters rarely tries + // to resolve missing refs (see PHI158). + continue; + } + + $refs = phutil_split_lines($refs, false); + + foreach ($refs as $ref) { + $hashlike_map[$ref] = true; + } + } + + foreach ($unresolved as $key => $ref) { + if (!isset($hashlike_map[$ref])) { + continue; + } + + $results[$ref][] = array( + 'type' => 'commit', + 'identifier' => $ref, + ); + + unset($unresolved[$key]); + } + } + + if (!$unresolved) { + return $results; + } + // If we still have unresolved refs (which might be things like "tip"), // try to resolve them individually.