mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-22 23:02:42 +01:00
Allow "bin/repository thaw --demote" to demote an entire service, not just a single device
Summary: Ref T13222. See PHI992. If you lose an entire cluster, you may want to aggressively demote it out of existence. You currently need to `xargs` your way through this. Allow `--demote <service>`, which demotes all devices in a service. Test Plan: Demoted with `--demote <device>` and `--demote <service>`. Hit the `--promote service` error. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13222 Differential Revision: https://secure.phabricator.com/D19850
This commit is contained in:
parent
bba4186005
commit
1a6a0181a8
2 changed files with 164 additions and 111 deletions
|
@ -15,10 +15,11 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
||||||
array(
|
array(
|
||||||
array(
|
array(
|
||||||
'name' => 'demote',
|
'name' => 'demote',
|
||||||
'param' => 'device',
|
'param' => 'device/service',
|
||||||
'help' => pht(
|
'help' => pht(
|
||||||
'Demote a device, discarding local changes. Clears stuck '.
|
'Demote a device (or all devices in a service) discarding '.
|
||||||
'write locks and recovers from lost leaders.'),
|
'local changes. Clears stuck write locks and recovers from '.
|
||||||
|
'lost leaders.'),
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
'name' => 'promote',
|
'name' => 'promote',
|
||||||
|
@ -61,15 +62,53 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
||||||
pht('Specify either --promote or --demote, but not both.'));
|
pht('Specify either --promote or --demote, but not both.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
$device_name = nonempty($promote, $demote);
|
$target_name = nonempty($promote, $demote);
|
||||||
|
|
||||||
$device = id(new AlmanacDeviceQuery())
|
$devices = id(new AlmanacDeviceQuery())
|
||||||
->setViewer($viewer)
|
->setViewer($viewer)
|
||||||
->withNames(array($device_name))
|
->withNames(array($target_name))
|
||||||
->executeOne();
|
->execute();
|
||||||
if (!$device) {
|
if (!$devices) {
|
||||||
throw new PhutilArgumentUsageException(
|
$service = id(new AlmanacServiceQuery())
|
||||||
pht('No device "%s" exists.', $device_name));
|
->setViewer($viewer)
|
||||||
|
->withNames(array($target_name))
|
||||||
|
->executeOne();
|
||||||
|
|
||||||
|
if (!$service) {
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht('No device or service named "%s" exists.', $target_name));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($promote) {
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht(
|
||||||
|
'You can not "--promote" an entire service ("%s"). Only a single '.
|
||||||
|
'device may be promoted.',
|
||||||
|
$target_name));
|
||||||
|
}
|
||||||
|
|
||||||
|
$bindings = id(new AlmanacBindingQuery())
|
||||||
|
->setViewer($viewer)
|
||||||
|
->withServicePHIDs(array($service->getPHID()))
|
||||||
|
->execute();
|
||||||
|
if (!$bindings) {
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht(
|
||||||
|
'Service "%s" is not bound to any devices.',
|
||||||
|
$target_name));
|
||||||
|
}
|
||||||
|
|
||||||
|
$interfaces = id(new AlmanacInterfaceQuery())
|
||||||
|
->setViewer($viewer)
|
||||||
|
->withPHIDs(mpull($bindings, 'getInterfacePHID'))
|
||||||
|
->execute();
|
||||||
|
|
||||||
|
$device_phids = mpull($interfaces, 'getDevicePHID');
|
||||||
|
|
||||||
|
$devices = id(new AlmanacDeviceQuery())
|
||||||
|
->setViewer($viewer)
|
||||||
|
->withPHIDs($device_phids)
|
||||||
|
->execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
$repository_names = $args->getArg('repositories');
|
$repository_names = $args->getArg('repositories');
|
||||||
|
@ -97,7 +136,7 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
||||||
|
|
||||||
$services = id(new AlmanacServiceQuery())
|
$services = id(new AlmanacServiceQuery())
|
||||||
->setViewer($viewer)
|
->setViewer($viewer)
|
||||||
->withDevicePHIDs(array($device->getPHID()))
|
->withDevicePHIDs(mpull($devices, 'getPHID'))
|
||||||
->execute();
|
->execute();
|
||||||
if ($services) {
|
if ($services) {
|
||||||
$repositories = id(new PhabricatorRepositoryQuery())
|
$repositories = id(new PhabricatorRepositoryQuery())
|
||||||
|
@ -108,7 +147,7 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
||||||
|
|
||||||
if (!$repositories) {
|
if (!$repositories) {
|
||||||
throw new PhutilArgumentUsageException(
|
throw new PhutilArgumentUsageException(
|
||||||
pht('There are no repositories on the selected device.'));
|
pht('There are no repositories on the selected device or service.'));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,126 +189,128 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
||||||
pht('User aborted the workflow.'));
|
pht('User aborted the workflow.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($repositories as $repository) {
|
foreach ($devices as $device) {
|
||||||
$repository_phid = $repository->getPHID();
|
foreach ($repositories as $repository) {
|
||||||
|
$repository_phid = $repository->getPHID();
|
||||||
|
|
||||||
$write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock(
|
$write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock(
|
||||||
$repository_phid);
|
$repository_phid);
|
||||||
|
|
||||||
echo tsprintf(
|
echo tsprintf(
|
||||||
"%s\n",
|
"%s\n",
|
||||||
pht(
|
pht(
|
||||||
'Waiting to acquire write lock for "%s"...',
|
'Waiting to acquire write lock for "%s"...',
|
||||||
$repository->getDisplayName()));
|
$repository->getDisplayName()));
|
||||||
|
|
||||||
$write_lock->lock(phutil_units('5 minutes in seconds'));
|
$write_lock->lock(phutil_units('5 minutes in seconds'));
|
||||||
try {
|
try {
|
||||||
|
|
||||||
$service = $repository->loadAlmanacService();
|
$service = $repository->loadAlmanacService();
|
||||||
if (!$service) {
|
if (!$service) {
|
||||||
throw new PhutilArgumentUsageException(
|
|
||||||
pht(
|
|
||||||
'Repository "%s" is not a cluster repository: it is not '.
|
|
||||||
'bound to an Almanac service.',
|
|
||||||
$repository->getDisplayName()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($promote) {
|
|
||||||
// You can only promote active devices. (You may demote active or
|
|
||||||
// inactive devices.)
|
|
||||||
$bindings = $service->getActiveBindings();
|
|
||||||
$bindings = mpull($bindings, null, 'getDevicePHID');
|
|
||||||
if (empty($bindings[$device->getPHID()])) {
|
|
||||||
throw new PhutilArgumentUsageException(
|
throw new PhutilArgumentUsageException(
|
||||||
pht(
|
pht(
|
||||||
'Repository "%s" has no active binding to device "%s". Only '.
|
'Repository "%s" is not a cluster repository: it is not '.
|
||||||
'actively bound devices can be promoted.',
|
'bound to an Almanac service.',
|
||||||
$repository->getDisplayName(),
|
$repository->getDisplayName()));
|
||||||
$device->getName()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions(
|
if ($promote) {
|
||||||
$repository->getPHID());
|
// You can only promote active devices. (You may demote active or
|
||||||
$versions = mpull($versions, null, 'getDevicePHID');
|
// inactive devices.)
|
||||||
|
$bindings = $service->getActiveBindings();
|
||||||
// Before we promote, make sure there are no outstanding versions on
|
$bindings = mpull($bindings, null, 'getDevicePHID');
|
||||||
// devices with inactive bindings. If there are, you need to demote
|
if (empty($bindings[$device->getPHID()])) {
|
||||||
// these first.
|
throw new PhutilArgumentUsageException(
|
||||||
$inactive = array();
|
pht(
|
||||||
foreach ($versions as $device_phid => $version) {
|
'Repository "%s" has no active binding to device "%s". '.
|
||||||
if (isset($bindings[$device_phid])) {
|
'Only actively bound devices can be promoted.',
|
||||||
continue;
|
$repository->getDisplayName(),
|
||||||
|
$device->getName()));
|
||||||
}
|
}
|
||||||
$inactive[$device_phid] = $version;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($inactive) {
|
$versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions(
|
||||||
$handles = $viewer->loadHandles(array_keys($inactive));
|
$repository->getPHID());
|
||||||
|
$versions = mpull($versions, null, 'getDevicePHID');
|
||||||
|
|
||||||
$handle_list = iterator_to_array($handles);
|
// Before we promote, make sure there are no outstanding versions
|
||||||
$handle_list = mpull($handle_list, 'getName');
|
// on devices with inactive bindings. If there are, you need to
|
||||||
$handle_list = implode(', ', $handle_list);
|
// demote these first.
|
||||||
|
$inactive = array();
|
||||||
|
foreach ($versions as $device_phid => $version) {
|
||||||
|
if (isset($bindings[$device_phid])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$inactive[$device_phid] = $version;
|
||||||
|
}
|
||||||
|
|
||||||
throw new PhutilArgumentUsageException(
|
if ($inactive) {
|
||||||
|
$handles = $viewer->loadHandles(array_keys($inactive));
|
||||||
|
|
||||||
|
$handle_list = iterator_to_array($handles);
|
||||||
|
$handle_list = mpull($handle_list, 'getName');
|
||||||
|
$handle_list = implode(', ', $handle_list);
|
||||||
|
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht(
|
||||||
|
'Repository "%s" has versions on inactive devices. Demote '.
|
||||||
|
'(or reactivate) these devices before promoting a new '.
|
||||||
|
'leader: %s.',
|
||||||
|
$repository->getDisplayName(),
|
||||||
|
$handle_list));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, make sure there are no outstanding versions on devices with
|
||||||
|
// active bindings. These also need to be demoted (or promoting is
|
||||||
|
// a mistake or already happened).
|
||||||
|
$active = array_select_keys($versions, array_keys($bindings));
|
||||||
|
if ($active) {
|
||||||
|
$handles = $viewer->loadHandles(array_keys($active));
|
||||||
|
|
||||||
|
$handle_list = iterator_to_array($handles);
|
||||||
|
$handle_list = mpull($handle_list, 'getName');
|
||||||
|
$handle_list = implode(', ', $handle_list);
|
||||||
|
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht(
|
||||||
|
'Unable to promote "%s" for repository "%s" because this '.
|
||||||
|
'cluster already has one or more unambiguous leaders: %s.',
|
||||||
|
$device->getName(),
|
||||||
|
$repository->getDisplayName(),
|
||||||
|
$handle_list));
|
||||||
|
}
|
||||||
|
|
||||||
|
PhabricatorRepositoryWorkingCopyVersion::updateVersion(
|
||||||
|
$repository->getPHID(),
|
||||||
|
$device->getPHID(),
|
||||||
|
0);
|
||||||
|
|
||||||
|
echo tsprintf(
|
||||||
|
"%s\n",
|
||||||
pht(
|
pht(
|
||||||
'Repository "%s" has versions on inactive devices. Demote '.
|
'Promoted "%s" to become a leader for "%s".',
|
||||||
'(or reactivate) these devices before promoting a new '.
|
|
||||||
'leader: %s.',
|
|
||||||
$repository->getDisplayName(),
|
|
||||||
$handle_list));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now, make sure there are no outstanding versions on devices with
|
|
||||||
// active bindings. These also need to be demoted (or promoting is a
|
|
||||||
// mistake or already happened).
|
|
||||||
$active = array_select_keys($versions, array_keys($bindings));
|
|
||||||
if ($active) {
|
|
||||||
$handles = $viewer->loadHandles(array_keys($active));
|
|
||||||
|
|
||||||
$handle_list = iterator_to_array($handles);
|
|
||||||
$handle_list = mpull($handle_list, 'getName');
|
|
||||||
$handle_list = implode(', ', $handle_list);
|
|
||||||
|
|
||||||
throw new PhutilArgumentUsageException(
|
|
||||||
pht(
|
|
||||||
'Unable to promote "%s" for repository "%s" because this '.
|
|
||||||
'cluster already has one or more unambiguous leaders: %s.',
|
|
||||||
$device->getName(),
|
$device->getName(),
|
||||||
$repository->getDisplayName(),
|
$repository->getDisplayName()));
|
||||||
$handle_list));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PhabricatorRepositoryWorkingCopyVersion::updateVersion(
|
if ($demote) {
|
||||||
$repository->getPHID(),
|
PhabricatorRepositoryWorkingCopyVersion::demoteDevice(
|
||||||
$device->getPHID(),
|
$repository->getPHID(),
|
||||||
0);
|
$device->getPHID());
|
||||||
|
|
||||||
echo tsprintf(
|
echo tsprintf(
|
||||||
"%s\n",
|
"%s\n",
|
||||||
pht(
|
pht(
|
||||||
'Promoted "%s" to become a leader for "%s".',
|
'Demoted "%s" from leadership of repository "%s".',
|
||||||
$device->getName(),
|
$device->getName(),
|
||||||
$repository->getDisplayName()));
|
$repository->getDisplayName()));
|
||||||
|
}
|
||||||
|
} catch (Exception $ex) {
|
||||||
|
$write_lock->unlock();
|
||||||
|
throw $ex;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($demote) {
|
|
||||||
PhabricatorRepositoryWorkingCopyVersion::demoteDevice(
|
|
||||||
$repository->getPHID(),
|
|
||||||
$device->getPHID());
|
|
||||||
|
|
||||||
echo tsprintf(
|
|
||||||
"%s\n",
|
|
||||||
pht(
|
|
||||||
'Demoted "%s" from leadership of repository "%s".',
|
|
||||||
$device->getName(),
|
|
||||||
$repository->getDisplayName()));
|
|
||||||
}
|
|
||||||
} catch (Exception $ex) {
|
|
||||||
$write_lock->unlock();
|
$write_lock->unlock();
|
||||||
throw $ex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$write_lock->unlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -433,6 +433,18 @@ If you do this, **you will lose unreplicated data**. You will discard any
|
||||||
changes on the affected leaders which have not replicated to other devices
|
changes on the affected leaders which have not replicated to other devices
|
||||||
in the cluster.
|
in the cluster.
|
||||||
|
|
||||||
|
If you have lost an entire cluster and replaced it with new devices that you
|
||||||
|
have restored from backups, you can aggressively wipe all memory of the old
|
||||||
|
devices by using `--demote <service>` and `--all-repositories`. **This is
|
||||||
|
dangerous and discards all unreplicated data in any repository on any device.**
|
||||||
|
|
||||||
|
```
|
||||||
|
phabricator/ $ ./bin/repository thaw --demote repo.corp.net --all-repositories
|
||||||
|
```
|
||||||
|
|
||||||
|
After you do this, continue below to promote a leader and restore the cluster
|
||||||
|
to service.
|
||||||
|
|
||||||
|
|
||||||
Ambiguous Leaders
|
Ambiguous Leaders
|
||||||
=================
|
=================
|
||||||
|
|
Loading…
Reference in a new issue