mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-23 22:10:55 +01:00
Allow "bin/repository thaw --demote" to demote an entire service, not just a single device
Summary: Ref T13222. See PHI992. If you lose an entire cluster, you may want to aggressively demote it out of existence. You currently need to `xargs` your way through this. Allow `--demote <service>`, which demotes all devices in a service. Test Plan: Demoted with `--demote <device>` and `--demote <service>`. Hit the `--promote service` error. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13222 Differential Revision: https://secure.phabricator.com/D19850
This commit is contained in:
parent
bba4186005
commit
1a6a0181a8
2 changed files with 164 additions and 111 deletions
|
@ -15,10 +15,11 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
|||
array(
|
||||
array(
|
||||
'name' => 'demote',
|
||||
'param' => 'device',
|
||||
'param' => 'device/service',
|
||||
'help' => pht(
|
||||
'Demote a device, discarding local changes. Clears stuck '.
|
||||
'write locks and recovers from lost leaders.'),
|
||||
'Demote a device (or all devices in a service) discarding '.
|
||||
'local changes. Clears stuck write locks and recovers from '.
|
||||
'lost leaders.'),
|
||||
),
|
||||
array(
|
||||
'name' => 'promote',
|
||||
|
@ -61,15 +62,53 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
|||
pht('Specify either --promote or --demote, but not both.'));
|
||||
}
|
||||
|
||||
$device_name = nonempty($promote, $demote);
|
||||
$target_name = nonempty($promote, $demote);
|
||||
|
||||
$device = id(new AlmanacDeviceQuery())
|
||||
$devices = id(new AlmanacDeviceQuery())
|
||||
->setViewer($viewer)
|
||||
->withNames(array($device_name))
|
||||
->executeOne();
|
||||
if (!$device) {
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht('No device "%s" exists.', $device_name));
|
||||
->withNames(array($target_name))
|
||||
->execute();
|
||||
if (!$devices) {
|
||||
$service = id(new AlmanacServiceQuery())
|
||||
->setViewer($viewer)
|
||||
->withNames(array($target_name))
|
||||
->executeOne();
|
||||
|
||||
if (!$service) {
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht('No device or service named "%s" exists.', $target_name));
|
||||
}
|
||||
|
||||
if ($promote) {
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht(
|
||||
'You can not "--promote" an entire service ("%s"). Only a single '.
|
||||
'device may be promoted.',
|
||||
$target_name));
|
||||
}
|
||||
|
||||
$bindings = id(new AlmanacBindingQuery())
|
||||
->setViewer($viewer)
|
||||
->withServicePHIDs(array($service->getPHID()))
|
||||
->execute();
|
||||
if (!$bindings) {
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht(
|
||||
'Service "%s" is not bound to any devices.',
|
||||
$target_name));
|
||||
}
|
||||
|
||||
$interfaces = id(new AlmanacInterfaceQuery())
|
||||
->setViewer($viewer)
|
||||
->withPHIDs(mpull($bindings, 'getInterfacePHID'))
|
||||
->execute();
|
||||
|
||||
$device_phids = mpull($interfaces, 'getDevicePHID');
|
||||
|
||||
$devices = id(new AlmanacDeviceQuery())
|
||||
->setViewer($viewer)
|
||||
->withPHIDs($device_phids)
|
||||
->execute();
|
||||
}
|
||||
|
||||
$repository_names = $args->getArg('repositories');
|
||||
|
@ -97,7 +136,7 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
|||
|
||||
$services = id(new AlmanacServiceQuery())
|
||||
->setViewer($viewer)
|
||||
->withDevicePHIDs(array($device->getPHID()))
|
||||
->withDevicePHIDs(mpull($devices, 'getPHID'))
|
||||
->execute();
|
||||
if ($services) {
|
||||
$repositories = id(new PhabricatorRepositoryQuery())
|
||||
|
@ -108,7 +147,7 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
|||
|
||||
if (!$repositories) {
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht('There are no repositories on the selected device.'));
|
||||
pht('There are no repositories on the selected device or service.'));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -150,126 +189,128 @@ final class PhabricatorRepositoryManagementThawWorkflow
|
|||
pht('User aborted the workflow.'));
|
||||
}
|
||||
|
||||
foreach ($repositories as $repository) {
|
||||
$repository_phid = $repository->getPHID();
|
||||
foreach ($devices as $device) {
|
||||
foreach ($repositories as $repository) {
|
||||
$repository_phid = $repository->getPHID();
|
||||
|
||||
$write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock(
|
||||
$repository_phid);
|
||||
$write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock(
|
||||
$repository_phid);
|
||||
|
||||
echo tsprintf(
|
||||
"%s\n",
|
||||
pht(
|
||||
'Waiting to acquire write lock for "%s"...',
|
||||
$repository->getDisplayName()));
|
||||
echo tsprintf(
|
||||
"%s\n",
|
||||
pht(
|
||||
'Waiting to acquire write lock for "%s"...',
|
||||
$repository->getDisplayName()));
|
||||
|
||||
$write_lock->lock(phutil_units('5 minutes in seconds'));
|
||||
try {
|
||||
$write_lock->lock(phutil_units('5 minutes in seconds'));
|
||||
try {
|
||||
|
||||
$service = $repository->loadAlmanacService();
|
||||
if (!$service) {
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht(
|
||||
'Repository "%s" is not a cluster repository: it is not '.
|
||||
'bound to an Almanac service.',
|
||||
$repository->getDisplayName()));
|
||||
}
|
||||
|
||||
if ($promote) {
|
||||
// You can only promote active devices. (You may demote active or
|
||||
// inactive devices.)
|
||||
$bindings = $service->getActiveBindings();
|
||||
$bindings = mpull($bindings, null, 'getDevicePHID');
|
||||
if (empty($bindings[$device->getPHID()])) {
|
||||
$service = $repository->loadAlmanacService();
|
||||
if (!$service) {
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht(
|
||||
'Repository "%s" has no active binding to device "%s". Only '.
|
||||
'actively bound devices can be promoted.',
|
||||
$repository->getDisplayName(),
|
||||
$device->getName()));
|
||||
'Repository "%s" is not a cluster repository: it is not '.
|
||||
'bound to an Almanac service.',
|
||||
$repository->getDisplayName()));
|
||||
}
|
||||
|
||||
$versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions(
|
||||
$repository->getPHID());
|
||||
$versions = mpull($versions, null, 'getDevicePHID');
|
||||
|
||||
// Before we promote, make sure there are no outstanding versions on
|
||||
// devices with inactive bindings. If there are, you need to demote
|
||||
// these first.
|
||||
$inactive = array();
|
||||
foreach ($versions as $device_phid => $version) {
|
||||
if (isset($bindings[$device_phid])) {
|
||||
continue;
|
||||
if ($promote) {
|
||||
// You can only promote active devices. (You may demote active or
|
||||
// inactive devices.)
|
||||
$bindings = $service->getActiveBindings();
|
||||
$bindings = mpull($bindings, null, 'getDevicePHID');
|
||||
if (empty($bindings[$device->getPHID()])) {
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht(
|
||||
'Repository "%s" has no active binding to device "%s". '.
|
||||
'Only actively bound devices can be promoted.',
|
||||
$repository->getDisplayName(),
|
||||
$device->getName()));
|
||||
}
|
||||
$inactive[$device_phid] = $version;
|
||||
}
|
||||
|
||||
if ($inactive) {
|
||||
$handles = $viewer->loadHandles(array_keys($inactive));
|
||||
$versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions(
|
||||
$repository->getPHID());
|
||||
$versions = mpull($versions, null, 'getDevicePHID');
|
||||
|
||||
$handle_list = iterator_to_array($handles);
|
||||
$handle_list = mpull($handle_list, 'getName');
|
||||
$handle_list = implode(', ', $handle_list);
|
||||
// Before we promote, make sure there are no outstanding versions
|
||||
// on devices with inactive bindings. If there are, you need to
|
||||
// demote these first.
|
||||
$inactive = array();
|
||||
foreach ($versions as $device_phid => $version) {
|
||||
if (isset($bindings[$device_phid])) {
|
||||
continue;
|
||||
}
|
||||
$inactive[$device_phid] = $version;
|
||||
}
|
||||
|
||||
throw new PhutilArgumentUsageException(
|
||||
if ($inactive) {
|
||||
$handles = $viewer->loadHandles(array_keys($inactive));
|
||||
|
||||
$handle_list = iterator_to_array($handles);
|
||||
$handle_list = mpull($handle_list, 'getName');
|
||||
$handle_list = implode(', ', $handle_list);
|
||||
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht(
|
||||
'Repository "%s" has versions on inactive devices. Demote '.
|
||||
'(or reactivate) these devices before promoting a new '.
|
||||
'leader: %s.',
|
||||
$repository->getDisplayName(),
|
||||
$handle_list));
|
||||
}
|
||||
|
||||
// Now, make sure there are no outstanding versions on devices with
|
||||
// active bindings. These also need to be demoted (or promoting is
|
||||
// a mistake or already happened).
|
||||
$active = array_select_keys($versions, array_keys($bindings));
|
||||
if ($active) {
|
||||
$handles = $viewer->loadHandles(array_keys($active));
|
||||
|
||||
$handle_list = iterator_to_array($handles);
|
||||
$handle_list = mpull($handle_list, 'getName');
|
||||
$handle_list = implode(', ', $handle_list);
|
||||
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht(
|
||||
'Unable to promote "%s" for repository "%s" because this '.
|
||||
'cluster already has one or more unambiguous leaders: %s.',
|
||||
$device->getName(),
|
||||
$repository->getDisplayName(),
|
||||
$handle_list));
|
||||
}
|
||||
|
||||
PhabricatorRepositoryWorkingCopyVersion::updateVersion(
|
||||
$repository->getPHID(),
|
||||
$device->getPHID(),
|
||||
0);
|
||||
|
||||
echo tsprintf(
|
||||
"%s\n",
|
||||
pht(
|
||||
'Repository "%s" has versions on inactive devices. Demote '.
|
||||
'(or reactivate) these devices before promoting a new '.
|
||||
'leader: %s.',
|
||||
$repository->getDisplayName(),
|
||||
$handle_list));
|
||||
}
|
||||
|
||||
// Now, make sure there are no outstanding versions on devices with
|
||||
// active bindings. These also need to be demoted (or promoting is a
|
||||
// mistake or already happened).
|
||||
$active = array_select_keys($versions, array_keys($bindings));
|
||||
if ($active) {
|
||||
$handles = $viewer->loadHandles(array_keys($active));
|
||||
|
||||
$handle_list = iterator_to_array($handles);
|
||||
$handle_list = mpull($handle_list, 'getName');
|
||||
$handle_list = implode(', ', $handle_list);
|
||||
|
||||
throw new PhutilArgumentUsageException(
|
||||
pht(
|
||||
'Unable to promote "%s" for repository "%s" because this '.
|
||||
'cluster already has one or more unambiguous leaders: %s.',
|
||||
'Promoted "%s" to become a leader for "%s".',
|
||||
$device->getName(),
|
||||
$repository->getDisplayName(),
|
||||
$handle_list));
|
||||
$repository->getDisplayName()));
|
||||
}
|
||||
|
||||
PhabricatorRepositoryWorkingCopyVersion::updateVersion(
|
||||
$repository->getPHID(),
|
||||
$device->getPHID(),
|
||||
0);
|
||||
if ($demote) {
|
||||
PhabricatorRepositoryWorkingCopyVersion::demoteDevice(
|
||||
$repository->getPHID(),
|
||||
$device->getPHID());
|
||||
|
||||
echo tsprintf(
|
||||
"%s\n",
|
||||
pht(
|
||||
'Promoted "%s" to become a leader for "%s".',
|
||||
$device->getName(),
|
||||
$repository->getDisplayName()));
|
||||
echo tsprintf(
|
||||
"%s\n",
|
||||
pht(
|
||||
'Demoted "%s" from leadership of repository "%s".',
|
||||
$device->getName(),
|
||||
$repository->getDisplayName()));
|
||||
}
|
||||
} catch (Exception $ex) {
|
||||
$write_lock->unlock();
|
||||
throw $ex;
|
||||
}
|
||||
|
||||
if ($demote) {
|
||||
PhabricatorRepositoryWorkingCopyVersion::demoteDevice(
|
||||
$repository->getPHID(),
|
||||
$device->getPHID());
|
||||
|
||||
echo tsprintf(
|
||||
"%s\n",
|
||||
pht(
|
||||
'Demoted "%s" from leadership of repository "%s".',
|
||||
$device->getName(),
|
||||
$repository->getDisplayName()));
|
||||
}
|
||||
} catch (Exception $ex) {
|
||||
$write_lock->unlock();
|
||||
throw $ex;
|
||||
}
|
||||
|
||||
$write_lock->unlock();
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -433,6 +433,18 @@ If you do this, **you will lose unreplicated data**. You will discard any
|
|||
changes on the affected leaders which have not replicated to other devices
|
||||
in the cluster.
|
||||
|
||||
If you have lost an entire cluster and replaced it with new devices that you
|
||||
have restored from backups, you can aggressively wipe all memory of the old
|
||||
devices by using `--demote <service>` and `--all-repositories`. **This is
|
||||
dangerous and discards all unreplicated data in any repository on any device.**
|
||||
|
||||
```
|
||||
phabricator/ $ ./bin/repository thaw --demote repo.corp.net --all-repositories
|
||||
```
|
||||
|
||||
After you do this, continue below to promote a leader and restore the cluster
|
||||
to service.
|
||||
|
||||
|
||||
Ambiguous Leaders
|
||||
=================
|
||||
|
|
Loading…
Reference in a new issue