1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-11 07:11:04 +01:00

Use "git ls-remote" to guess if "git fetch" is a no-op

Summary:
Ref T12296. Ref T12392. Currently, when we're observing a remote repository, we periodically run `git fetch ...`.

Instead, periodically run `git ls-remote` (to list refs in the remote) and `git for-each-ref` (to list local refs) and only continue if the two lists are different.

The motivations for this are:

  - In T12296, it appears that doing this is //faster// than doing a no-op `git fetch`. This effect seems to reproduce locally in a clean environment (900ms for `ls-remote` + 100ms for `for-each-ref` vs about 1.4s for `fetch`). I don't have any explanation for why this is, but there it is. This isn't a huge change, although the time we're saving does appear to mostly be local CPU time, which is good for us.
  - Because we control all writes, we could cache `git for-each-ref` in the future and do fewer disk operations. This doesn't necessarily seem too valuable, though.
  - This allows us to tell if a fetch will do anything or not, and make better decisions around clustering (in particular, simplify how observed repository versioning works). With `git fetch`, we can't easily distinguish between "fetch, but nothing changed" and "legitimate fetch".

If a repository updates very regularly we end up doing slightly more work this way (that is, if `ls-remote` always comes back with changes, we do a little extra work), but this is normally very rare.

This might not get non-bare repositories quite right in some cases (i.e., incorrectly detect them as changed when they are unchanged) but we haven't created non-bare repositories for many years.

Test Plan: Ran `bin/repository update --trace --verbose PHABX`, saw sensible construction of local and remote maps and accurate detection of whether a fetch would do anything or not.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T12392, T12296

Differential Revision: https://secure.phabricator.com/D17497
This commit is contained in:
epriestley 2017-03-14 14:56:21 -07:00
parent aef2a39a81
commit 2b0ad243d1

View file

@ -120,6 +120,7 @@ final class PhabricatorRepositoryPullEngine
pht(
'Updating the working copy for repository "%s".',
$repository->getDisplayName()));
if ($is_git) {
$this->verifyGitOrigin($repository);
$this->executeGitUpdate();
@ -157,7 +158,7 @@ final class PhabricatorRepositoryPullEngine
}
private function skipPull($message) {
$this->log('%s', $message);
$this->log($message);
$this->donePull();
}
@ -172,7 +173,7 @@ final class PhabricatorRepositoryPullEngine
}
private function logPull($message) {
$this->log('%s', $message);
$this->log($message);
}
private function donePull() {
@ -190,7 +191,7 @@ final class PhabricatorRepositoryPullEngine
}
private function installHook($path, array $hook_argv = array()) {
$this->log('%s', pht('Installing commit hook to "%s"...', $path));
$this->log(pht('Installing commit hook to "%s"...', $path));
$repository = $this->getRepository();
$identifier = $this->getHookContextIdentifier($repository);
@ -339,6 +340,18 @@ final class PhabricatorRepositoryPullEngine
throw new Exception($message);
}
$remote_refs = $this->loadGitRemoteRefs($repository);
$local_refs = $this->loadGitLocalRefs($repository);
if ($remote_refs === $local_refs) {
$this->log(
pht(
'Skipping fetch because local and remote refs are already '.
'identical.'));
return false;
}
$this->logRefDifferences($remote_refs, $local_refs);
$retry = false;
do {
// This is a local command, but needs credentials.
@ -396,6 +409,75 @@ final class PhabricatorRepositoryPullEngine
$this->installHook($root.$path);
}
private function loadGitRemoteRefs(PhabricatorRepository $repository) {
$remote_envelope = $repository->getRemoteURIEnvelope();
list($stdout) = $repository->execxRemoteCommand(
'ls-remote -- %P',
$remote_envelope);
$map = array();
$lines = phutil_split_lines($stdout, false);
foreach ($lines as $line) {
list($hash, $name) = preg_split('/\s+/', $line, 2);
// If the remote has a HEAD, just ignore it.
if ($name == 'HEAD') {
continue;
}
// If the remote ref is itself a remote ref, ignore it.
if (preg_match('(^refs/remotes/)', $name)) {
continue;
}
$map[$name] = $hash;
}
ksort($map);
return $map;
}
private function loadGitLocalRefs(PhabricatorRepository $repository) {
$refs = id(new DiffusionLowLevelGitRefQuery())
->setRepository($repository)
->execute();
$map = array();
foreach ($refs as $ref) {
$fields = $ref->getRawFields();
$map[idx($fields, 'refname')] = $ref->getCommitIdentifier();
}
ksort($map);
return $map;
}
private function logRefDifferences(array $remote, array $local) {
$all = $local + $remote;
$differences = array();
foreach ($all as $key => $ignored) {
$remote_ref = idx($remote, $key, pht('<null>'));
$local_ref = idx($local, $key, pht('<null>'));
if ($remote_ref !== $local_ref) {
$differences[] = pht(
'%s (remote: "%s", local: "%s")',
$key,
$remote_ref,
$local_ref);
}
}
$this->log(
pht(
"Updating repository after detecting ref differences:\n%s",
implode("\n", $differences)));
}
/* -( Pulling Mercurial Working Copies )----------------------------------- */