diff --git a/src/applications/almanac/servicetype/AlmanacClusterRepositoryServiceType.php b/src/applications/almanac/servicetype/AlmanacClusterRepositoryServiceType.php index ae3dff9995..98947f56c1 100644 --- a/src/applications/almanac/servicetype/AlmanacClusterRepositoryServiceType.php +++ b/src/applications/almanac/servicetype/AlmanacClusterRepositoryServiceType.php @@ -57,6 +57,11 @@ final class AlmanacClusterRepositoryServiceType 'protocol' => id(new PhabricatorSelectEditField()) ->setOptions(ipull($protocols, 'value', 'value')) ->setValue($default_value), + 'writable' => id(new PhabricatorBoolEditField()) + ->setOptions( + pht('Prevent Writes'), + pht('Allow Writes')) + ->setValue(true), ); } diff --git a/src/applications/diffusion/controller/DiffusionServeController.php b/src/applications/diffusion/controller/DiffusionServeController.php index 6b9c5c6802..5a5c446a29 100644 --- a/src/applications/diffusion/controller/DiffusionServeController.php +++ b/src/applications/diffusion/controller/DiffusionServeController.php @@ -437,6 +437,7 @@ final class DiffusionServeController extends DiffusionController { 'http', 'https', ), + 'writable' => !$this->isReadOnlyRequest($repository), )); if ($uri) { $future = $this->getRequest()->newClusterProxyFuture($uri); diff --git a/src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php index 400340abeb..d48abc159e 100644 --- a/src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php @@ -29,7 +29,7 @@ final class DiffusionGitReceivePackSSHWorkflow extends DiffusionGitSSHWorkflow { ->setLog($this); if ($this->shouldProxy()) { - $command = $this->getProxyCommand(); + $command = $this->getProxyCommand(true); $did_synchronize = false; if ($device) { diff --git a/src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php index 5ed42ba79d..e96030f577 100644 --- a/src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php @@ -22,7 +22,7 @@ final class DiffusionGitUploadPackSSHWorkflow extends DiffusionGitSSHWorkflow { $is_proxy = $this->shouldProxy(); if ($is_proxy) { - $command = $this->getProxyCommand(); + $command = $this->getProxyCommand(false); if ($device) { $this->writeClusterEngineLogMessage( diff --git a/src/applications/diffusion/ssh/DiffusionMercurialServeSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionMercurialServeSSHWorkflow.php index 5701a4bac1..221d1696b2 100644 --- a/src/applications/diffusion/ssh/DiffusionMercurialServeSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionMercurialServeSSHWorkflow.php @@ -45,7 +45,10 @@ final class DiffusionMercurialServeSSHWorkflow } if ($this->shouldProxy()) { - $command = $this->getProxyCommand(); + // NOTE: For now, we're always requesting a writable node. The request + // may not actually need one, but we can't currently determine whether + // it is read-only or not at this phase of evaluation. + $command = $this->getProxyCommand(true); } else { $command = csprintf( 'hg -R %s serve --stdio', diff --git a/src/applications/diffusion/ssh/DiffusionSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionSSHWorkflow.php index 2a3841892f..02cacbfe94 100644 --- a/src/applications/diffusion/ssh/DiffusionSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionSSHWorkflow.php @@ -5,7 +5,7 @@ abstract class DiffusionSSHWorkflow extends PhabricatorSSHWorkflow { private $args; private $repository; private $hasWriteAccess; - private $proxyURI; + private $shouldProxy; private $baseRequestPath; public function getRepository() { @@ -69,18 +69,34 @@ abstract class DiffusionSSHWorkflow extends PhabricatorSSHWorkflow { return php_uname('n'); } - protected function getTargetDeviceName() { - // TODO: This should use the correct device identity. - $uri = new PhutilURI($this->proxyURI); - return $uri->getDomain(); - } - protected function shouldProxy() { - return (bool)$this->proxyURI; + return $this->shouldProxy; } - protected function getProxyCommand() { - $uri = new PhutilURI($this->proxyURI); + protected function getProxyCommand($for_write) { + $viewer = $this->getSSHUser(); + $repository = $this->getRepository(); + + $is_cluster_request = $this->getIsClusterRequest(); + + $uri = $repository->getAlmanacServiceURI( + $viewer, + array( + 'neverProxy' => $is_cluster_request, + 'protocols' => array( + 'ssh', + ), + 'writable' => $for_write, + )); + + if (!$uri) { + throw new Exception( + pht( + 'Failed to generate an intracluster proxy URI even though this '. + 'request was routed as a proxy request.')); + } + + $uri = new PhutilURI($uri); $username = AlmanacKeys::getClusterSSHUser(); if ($username === null) { @@ -148,6 +164,15 @@ abstract class DiffusionSSHWorkflow extends PhabricatorSSHWorkflow { $repository = $this->identifyRepository(); $this->setRepository($repository); + // NOTE: Here, we're just figuring out if this is a proxyable request to + // a clusterized repository or not. We don't (and can't) use the URI we get + // back directly. + + // For example, we may get a read-only URI here but be handling a write + // request. We only care if we get back `null` (which means we should + // handle the request locally) or anything else (which means we should + // proxy it to an appropriate device). + $is_cluster_request = $this->getIsClusterRequest(); $uri = $repository->getAlmanacServiceURI( $viewer, @@ -157,10 +182,7 @@ abstract class DiffusionSSHWorkflow extends PhabricatorSSHWorkflow { 'ssh', ), )); - - if ($uri) { - $this->proxyURI = $uri; - } + $this->shouldProxy = (bool)$uri; try { return $this->executeRepositoryOperations(); diff --git a/src/applications/diffusion/ssh/DiffusionSubversionServeSSHWorkflow.php b/src/applications/diffusion/ssh/DiffusionSubversionServeSSHWorkflow.php index 0df4aa17fe..76dae05971 100644 --- a/src/applications/diffusion/ssh/DiffusionSubversionServeSSHWorkflow.php +++ b/src/applications/diffusion/ssh/DiffusionSubversionServeSSHWorkflow.php @@ -148,7 +148,10 @@ final class DiffusionSubversionServeSSHWorkflow } if ($this->shouldProxy()) { - $command = $this->getProxyCommand(); + // NOTE: We're always requesting a writable device here. The request + // might be read-only, but we can't currently tell, and SVN requests + // can mix reads and writes. + $command = $this->getProxyCommand(true); $this->isProxying = true; $cwd = null; } else { diff --git a/src/applications/repository/storage/PhabricatorRepository.php b/src/applications/repository/storage/PhabricatorRepository.php index 37dd5f1ec5..0a09503ee9 100644 --- a/src/applications/repository/storage/PhabricatorRepository.php +++ b/src/applications/repository/storage/PhabricatorRepository.php @@ -1909,10 +1909,12 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO array( 'neverProxy' => 'bool', 'protocols' => 'list', + 'writable' => 'optional bool', )); $never_proxy = $options['neverProxy']; $protocols = $options['protocols']; + $writable = idx($options, 'writable', false); $cache_key = $this->getAlmanacServiceCacheKey(); if (!$cache_key) { @@ -1958,7 +1960,7 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO } if (isset($protocol_map[$uri['protocol']])) { - $results[] = new PhutilURI($uri['uri']); + $results[] = $uri; } } @@ -1989,8 +1991,29 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO } } + // If we require a writable device, remove URIs which aren't writable. + if ($writable) { + foreach ($results as $key => $uri) { + if (!$uri['writable']) { + unset($results[$key]); + } + } + + if (!$results) { + throw new Exception( + pht( + 'This repository ("%s") is not writable with the given '. + 'protocols (%s). The Almanac service for this repository has no '. + 'writable bindings that support these protocols.', + $this->getDisplayName(), + implode(', ', $protocols))); + } + } + shuffle($results); - return head($results); + + $result = head($results); + return $result['uri']; } public function supportsSynchronization() { @@ -2009,7 +2032,14 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO } $repository_phid = $this->getPHID(); - return "diffusion.repository({$repository_phid}).service({$service_phid})"; + + $parts = array( + "repo({$repository_phid})", + "serv({$service_phid})", + 'v2', + ); + + return implode('.', $parts); } private function buildAlmanacServiceURIs() { @@ -2038,6 +2068,7 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO 'protocol' => $protocol, 'uri' => (string)$uri, 'device' => $device_name, + 'writable' => (bool)$binding->getAlmanacPropertyValue('writable'), ); } @@ -2091,6 +2122,10 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO 'http', 'https', ), + + // At least today, no Conduit call can ever write to a repository, + // so it's fine to send anything to a read-only node. + 'writable' => false, )); if ($uri === null) { return null; diff --git a/src/docs/user/cluster/cluster_repositories.diviner b/src/docs/user/cluster/cluster_repositories.diviner index 44003679af..8d764c2e3e 100644 --- a/src/docs/user/cluster/cluster_repositories.diviner +++ b/src/docs/user/cluster/cluster_repositories.diviner @@ -221,13 +221,33 @@ synchronized. Contracting a Cluster ===================== -To reduce the size of an existing cluster, follow these general steps: +If you want to remove working devices from a cluster (for example, to take +hosts down for maintenance), first do this for each device: - - Disable the bindings from the service to the dead device in Almanac. + - Change the `writable` property on the bindings to "Prevent Writes". + - Wait a few moments until the cluster synchronizes (see + "Monitoring Services" below). + +This will ensure that the device you're about to remove is not the only cluster +leader, even if the cluster is receiving a high write volume. You can skip this +step if the device isn't working property to start with. + +Once you've stopped writes and waited for synchronization (or if the hosts are +not working in the first place) do this for each device: + + - Disable the bindings from the service to the device in Almanac. If you are removing a device because it failed abruptly (or removing several -devices at once) it is possible that some repositories will have lost all their -leaders. See "Loss of Leaders" below to understand and resolve this. +devices at once; or you skip the "Prevent Writes" step), it is possible that +some repositories will have lost all their leaders. See "Loss of Leaders" below +to understand and resolve this. + +If you want to put the hosts back in service later: + + - Enable the bindings again. + - Change `writable` back to "Allow Writes". + +This will restore the cluster to the original state. Monitoring Services