mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-26 08:42:41 +01:00
Instead of retrying safe reads 3 times, retry each eligible service once
Summary: Ref T13286. When retrying a read request, keep retrying as long as we have canididate services. Since we consume a service with each attempt, there's no real reason to abort early, and trying every service allows reads to always succeed even if (for example) 8 nodes of a 16-node cluster are dead because of a severed network link between datacenters. Test Plan: Ran `git pull` in a clustered repository with an up node and a down node; saw retry count dynamically adjust to available node count. Maniphest Tasks: T13286 Differential Revision: https://secure.phabricator.com/D20777
This commit is contained in:
parent
95fb237ab3
commit
ff3d1769b4
1 changed files with 6 additions and 15 deletions
|
@ -126,8 +126,6 @@ final class DiffusionGitUploadPackSSHWorkflow
|
||||||
->setCommandChannelFromExecFuture($future)
|
->setCommandChannelFromExecFuture($future)
|
||||||
->execute();
|
->execute();
|
||||||
|
|
||||||
$err = 1;
|
|
||||||
|
|
||||||
// TODO: Currently, when proxying, we do not write an event log on the
|
// TODO: Currently, when proxying, we do not write an event log on the
|
||||||
// proxy. Perhaps we should write a "proxy log". This is not very useful
|
// proxy. Perhaps we should write a "proxy log". This is not very useful
|
||||||
// for statistics or auditing, but could be useful for diagnostics.
|
// for statistics or auditing, but could be useful for diagnostics.
|
||||||
|
@ -144,14 +142,7 @@ final class DiffusionGitUploadPackSSHWorkflow
|
||||||
// the same host.
|
// the same host.
|
||||||
array_shift($refs);
|
array_shift($refs);
|
||||||
|
|
||||||
// Check if we have more services we can try. If we do, we'll make an
|
$should_retry = $this->shouldRetryRequest($refs);
|
||||||
// effort to fall back to them below. If not, we can't do anything to
|
|
||||||
// recover so just bail out.
|
|
||||||
if (!$refs) {
|
|
||||||
return $err;
|
|
||||||
}
|
|
||||||
|
|
||||||
$should_retry = $this->shouldRetryRequest();
|
|
||||||
if (!$should_retry) {
|
if (!$should_retry) {
|
||||||
return $err;
|
return $err;
|
||||||
}
|
}
|
||||||
|
@ -168,7 +159,7 @@ final class DiffusionGitUploadPackSSHWorkflow
|
||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function shouldRetryRequest() {
|
private function shouldRetryRequest(array $remaining_refs) {
|
||||||
$this->requestFailures++;
|
$this->requestFailures++;
|
||||||
|
|
||||||
if ($this->requestFailures > $this->requestAttempts) {
|
if ($this->requestFailures > $this->requestAttempts) {
|
||||||
|
@ -178,11 +169,11 @@ final class DiffusionGitUploadPackSSHWorkflow
|
||||||
"missing call to \"didBeginRequest()\".\n"));
|
"missing call to \"didBeginRequest()\".\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
$max_failures = 3;
|
if (!$remaining_refs) {
|
||||||
if ($this->requestFailures >= $max_failures) {
|
|
||||||
$this->writeClusterEngineLogMessage(
|
$this->writeClusterEngineLogMessage(
|
||||||
pht(
|
pht(
|
||||||
"# Reached maximum number of retry attempts, giving up.\n"));
|
"# All available services failed to serve the request, ".
|
||||||
|
"giving up.\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,7 +199,7 @@ final class DiffusionGitUploadPackSSHWorkflow
|
||||||
pht(
|
pht(
|
||||||
"# Service request failed, retrying (making attempt %s of %s).\n",
|
"# Service request failed, retrying (making attempt %s of %s).\n",
|
||||||
new PhutilNumber($this->requestAttempts + 1),
|
new PhutilNumber($this->requestAttempts + 1),
|
||||||
new PhutilNumber($max_failures)));
|
new PhutilNumber($this->requestAttempts + count($remaining_refs))));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue