1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-26 08:42:41 +01:00

Instead of retrying safe reads 3 times, retry each eligible service once

Summary: Ref T13286. When retrying a read request, keep retrying as long as we have canididate services. Since we consume a service with each attempt, there's no real reason to abort early, and trying every service allows reads to always succeed even if (for example) 8 nodes of a 16-node cluster are dead because of a severed network link between datacenters.

Test Plan: Ran `git pull` in a clustered repository with an up node and a down node; saw retry count dynamically adjust to available node count.

Maniphest Tasks: T13286

Differential Revision: https://secure.phabricator.com/D20777
This commit is contained in:
epriestley 2019-09-03 10:38:41 -07:00
parent 95fb237ab3
commit ff3d1769b4

View file

@ -126,8 +126,6 @@ final class DiffusionGitUploadPackSSHWorkflow
->setCommandChannelFromExecFuture($future) ->setCommandChannelFromExecFuture($future)
->execute(); ->execute();
$err = 1;
// TODO: Currently, when proxying, we do not write an event log on the // TODO: Currently, when proxying, we do not write an event log on the
// proxy. Perhaps we should write a "proxy log". This is not very useful // proxy. Perhaps we should write a "proxy log". This is not very useful
// for statistics or auditing, but could be useful for diagnostics. // for statistics or auditing, but could be useful for diagnostics.
@ -144,14 +142,7 @@ final class DiffusionGitUploadPackSSHWorkflow
// the same host. // the same host.
array_shift($refs); array_shift($refs);
// Check if we have more services we can try. If we do, we'll make an $should_retry = $this->shouldRetryRequest($refs);
// effort to fall back to them below. If not, we can't do anything to
// recover so just bail out.
if (!$refs) {
return $err;
}
$should_retry = $this->shouldRetryRequest();
if (!$should_retry) { if (!$should_retry) {
return $err; return $err;
} }
@ -168,7 +159,7 @@ final class DiffusionGitUploadPackSSHWorkflow
return $this; return $this;
} }
private function shouldRetryRequest() { private function shouldRetryRequest(array $remaining_refs) {
$this->requestFailures++; $this->requestFailures++;
if ($this->requestFailures > $this->requestAttempts) { if ($this->requestFailures > $this->requestAttempts) {
@ -178,11 +169,11 @@ final class DiffusionGitUploadPackSSHWorkflow
"missing call to \"didBeginRequest()\".\n")); "missing call to \"didBeginRequest()\".\n"));
} }
$max_failures = 3; if (!$remaining_refs) {
if ($this->requestFailures >= $max_failures) {
$this->writeClusterEngineLogMessage( $this->writeClusterEngineLogMessage(
pht( pht(
"# Reached maximum number of retry attempts, giving up.\n")); "# All available services failed to serve the request, ".
"giving up.\n"));
return false; return false;
} }
@ -208,7 +199,7 @@ final class DiffusionGitUploadPackSSHWorkflow
pht( pht(
"# Service request failed, retrying (making attempt %s of %s).\n", "# Service request failed, retrying (making attempt %s of %s).\n",
new PhutilNumber($this->requestAttempts + 1), new PhutilNumber($this->requestAttempts + 1),
new PhutilNumber($max_failures))); new PhutilNumber($this->requestAttempts + count($remaining_refs))));
return true; return true;
} }