From 1a6a0181a8985371fe1470cbea24463885850668 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 5 Dec 2018 12:22:14 -0800 Subject: [PATCH] Allow "bin/repository thaw --demote" to demote an entire service, not just a single device Summary: Ref T13222. See PHI992. If you lose an entire cluster, you may want to aggressively demote it out of existence. You currently need to `xargs` your way through this. Allow `--demote `, which demotes all devices in a service. Test Plan: Demoted with `--demote ` and `--demote `. Hit the `--promote service` error. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13222 Differential Revision: https://secure.phabricator.com/D19850 --- ...icatorRepositoryManagementThawWorkflow.php | 263 ++++++++++-------- .../user/cluster/cluster_repositories.diviner | 12 + 2 files changed, 164 insertions(+), 111 deletions(-) diff --git a/src/applications/repository/management/PhabricatorRepositoryManagementThawWorkflow.php b/src/applications/repository/management/PhabricatorRepositoryManagementThawWorkflow.php index ed2e1df3e2..3743045c34 100644 --- a/src/applications/repository/management/PhabricatorRepositoryManagementThawWorkflow.php +++ b/src/applications/repository/management/PhabricatorRepositoryManagementThawWorkflow.php @@ -15,10 +15,11 @@ final class PhabricatorRepositoryManagementThawWorkflow array( array( 'name' => 'demote', - 'param' => 'device', + 'param' => 'device/service', 'help' => pht( - 'Demote a device, discarding local changes. Clears stuck '. - 'write locks and recovers from lost leaders.'), + 'Demote a device (or all devices in a service) discarding '. + 'local changes. Clears stuck write locks and recovers from '. + 'lost leaders.'), ), array( 'name' => 'promote', @@ -61,15 +62,53 @@ final class PhabricatorRepositoryManagementThawWorkflow pht('Specify either --promote or --demote, but not both.')); } - $device_name = nonempty($promote, $demote); + $target_name = nonempty($promote, $demote); - $device = id(new AlmanacDeviceQuery()) + $devices = id(new AlmanacDeviceQuery()) ->setViewer($viewer) - ->withNames(array($device_name)) - ->executeOne(); - if (!$device) { - throw new PhutilArgumentUsageException( - pht('No device "%s" exists.', $device_name)); + ->withNames(array($target_name)) + ->execute(); + if (!$devices) { + $service = id(new AlmanacServiceQuery()) + ->setViewer($viewer) + ->withNames(array($target_name)) + ->executeOne(); + + if (!$service) { + throw new PhutilArgumentUsageException( + pht('No device or service named "%s" exists.', $target_name)); + } + + if ($promote) { + throw new PhutilArgumentUsageException( + pht( + 'You can not "--promote" an entire service ("%s"). Only a single '. + 'device may be promoted.', + $target_name)); + } + + $bindings = id(new AlmanacBindingQuery()) + ->setViewer($viewer) + ->withServicePHIDs(array($service->getPHID())) + ->execute(); + if (!$bindings) { + throw new PhutilArgumentUsageException( + pht( + 'Service "%s" is not bound to any devices.', + $target_name)); + } + + $interfaces = id(new AlmanacInterfaceQuery()) + ->setViewer($viewer) + ->withPHIDs(mpull($bindings, 'getInterfacePHID')) + ->execute(); + + $device_phids = mpull($interfaces, 'getDevicePHID'); + + $devices = id(new AlmanacDeviceQuery()) + ->setViewer($viewer) + ->withPHIDs($device_phids) + ->execute(); } $repository_names = $args->getArg('repositories'); @@ -97,7 +136,7 @@ final class PhabricatorRepositoryManagementThawWorkflow $services = id(new AlmanacServiceQuery()) ->setViewer($viewer) - ->withDevicePHIDs(array($device->getPHID())) + ->withDevicePHIDs(mpull($devices, 'getPHID')) ->execute(); if ($services) { $repositories = id(new PhabricatorRepositoryQuery()) @@ -108,7 +147,7 @@ final class PhabricatorRepositoryManagementThawWorkflow if (!$repositories) { throw new PhutilArgumentUsageException( - pht('There are no repositories on the selected device.')); + pht('There are no repositories on the selected device or service.')); } } @@ -150,126 +189,128 @@ final class PhabricatorRepositoryManagementThawWorkflow pht('User aborted the workflow.')); } - foreach ($repositories as $repository) { - $repository_phid = $repository->getPHID(); + foreach ($devices as $device) { + foreach ($repositories as $repository) { + $repository_phid = $repository->getPHID(); - $write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock( - $repository_phid); + $write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock( + $repository_phid); - echo tsprintf( - "%s\n", - pht( - 'Waiting to acquire write lock for "%s"...', - $repository->getDisplayName())); + echo tsprintf( + "%s\n", + pht( + 'Waiting to acquire write lock for "%s"...', + $repository->getDisplayName())); - $write_lock->lock(phutil_units('5 minutes in seconds')); - try { + $write_lock->lock(phutil_units('5 minutes in seconds')); + try { - $service = $repository->loadAlmanacService(); - if (!$service) { - throw new PhutilArgumentUsageException( - pht( - 'Repository "%s" is not a cluster repository: it is not '. - 'bound to an Almanac service.', - $repository->getDisplayName())); - } - - if ($promote) { - // You can only promote active devices. (You may demote active or - // inactive devices.) - $bindings = $service->getActiveBindings(); - $bindings = mpull($bindings, null, 'getDevicePHID'); - if (empty($bindings[$device->getPHID()])) { + $service = $repository->loadAlmanacService(); + if (!$service) { throw new PhutilArgumentUsageException( pht( - 'Repository "%s" has no active binding to device "%s". Only '. - 'actively bound devices can be promoted.', - $repository->getDisplayName(), - $device->getName())); + 'Repository "%s" is not a cluster repository: it is not '. + 'bound to an Almanac service.', + $repository->getDisplayName())); } - $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( - $repository->getPHID()); - $versions = mpull($versions, null, 'getDevicePHID'); - - // Before we promote, make sure there are no outstanding versions on - // devices with inactive bindings. If there are, you need to demote - // these first. - $inactive = array(); - foreach ($versions as $device_phid => $version) { - if (isset($bindings[$device_phid])) { - continue; + if ($promote) { + // You can only promote active devices. (You may demote active or + // inactive devices.) + $bindings = $service->getActiveBindings(); + $bindings = mpull($bindings, null, 'getDevicePHID'); + if (empty($bindings[$device->getPHID()])) { + throw new PhutilArgumentUsageException( + pht( + 'Repository "%s" has no active binding to device "%s". '. + 'Only actively bound devices can be promoted.', + $repository->getDisplayName(), + $device->getName())); } - $inactive[$device_phid] = $version; - } - if ($inactive) { - $handles = $viewer->loadHandles(array_keys($inactive)); + $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( + $repository->getPHID()); + $versions = mpull($versions, null, 'getDevicePHID'); - $handle_list = iterator_to_array($handles); - $handle_list = mpull($handle_list, 'getName'); - $handle_list = implode(', ', $handle_list); + // Before we promote, make sure there are no outstanding versions + // on devices with inactive bindings. If there are, you need to + // demote these first. + $inactive = array(); + foreach ($versions as $device_phid => $version) { + if (isset($bindings[$device_phid])) { + continue; + } + $inactive[$device_phid] = $version; + } - throw new PhutilArgumentUsageException( + if ($inactive) { + $handles = $viewer->loadHandles(array_keys($inactive)); + + $handle_list = iterator_to_array($handles); + $handle_list = mpull($handle_list, 'getName'); + $handle_list = implode(', ', $handle_list); + + throw new PhutilArgumentUsageException( + pht( + 'Repository "%s" has versions on inactive devices. Demote '. + '(or reactivate) these devices before promoting a new '. + 'leader: %s.', + $repository->getDisplayName(), + $handle_list)); + } + + // Now, make sure there are no outstanding versions on devices with + // active bindings. These also need to be demoted (or promoting is + // a mistake or already happened). + $active = array_select_keys($versions, array_keys($bindings)); + if ($active) { + $handles = $viewer->loadHandles(array_keys($active)); + + $handle_list = iterator_to_array($handles); + $handle_list = mpull($handle_list, 'getName'); + $handle_list = implode(', ', $handle_list); + + throw new PhutilArgumentUsageException( + pht( + 'Unable to promote "%s" for repository "%s" because this '. + 'cluster already has one or more unambiguous leaders: %s.', + $device->getName(), + $repository->getDisplayName(), + $handle_list)); + } + + PhabricatorRepositoryWorkingCopyVersion::updateVersion( + $repository->getPHID(), + $device->getPHID(), + 0); + + echo tsprintf( + "%s\n", pht( - 'Repository "%s" has versions on inactive devices. Demote '. - '(or reactivate) these devices before promoting a new '. - 'leader: %s.', - $repository->getDisplayName(), - $handle_list)); - } - - // Now, make sure there are no outstanding versions on devices with - // active bindings. These also need to be demoted (or promoting is a - // mistake or already happened). - $active = array_select_keys($versions, array_keys($bindings)); - if ($active) { - $handles = $viewer->loadHandles(array_keys($active)); - - $handle_list = iterator_to_array($handles); - $handle_list = mpull($handle_list, 'getName'); - $handle_list = implode(', ', $handle_list); - - throw new PhutilArgumentUsageException( - pht( - 'Unable to promote "%s" for repository "%s" because this '. - 'cluster already has one or more unambiguous leaders: %s.', + 'Promoted "%s" to become a leader for "%s".', $device->getName(), - $repository->getDisplayName(), - $handle_list)); + $repository->getDisplayName())); } - PhabricatorRepositoryWorkingCopyVersion::updateVersion( - $repository->getPHID(), - $device->getPHID(), - 0); + if ($demote) { + PhabricatorRepositoryWorkingCopyVersion::demoteDevice( + $repository->getPHID(), + $device->getPHID()); - echo tsprintf( - "%s\n", - pht( - 'Promoted "%s" to become a leader for "%s".', - $device->getName(), - $repository->getDisplayName())); + echo tsprintf( + "%s\n", + pht( + 'Demoted "%s" from leadership of repository "%s".', + $device->getName(), + $repository->getDisplayName())); + } + } catch (Exception $ex) { + $write_lock->unlock(); + throw $ex; } - if ($demote) { - PhabricatorRepositoryWorkingCopyVersion::demoteDevice( - $repository->getPHID(), - $device->getPHID()); - - echo tsprintf( - "%s\n", - pht( - 'Demoted "%s" from leadership of repository "%s".', - $device->getName(), - $repository->getDisplayName())); - } - } catch (Exception $ex) { $write_lock->unlock(); - throw $ex; } - - $write_lock->unlock(); } return 0; diff --git a/src/docs/user/cluster/cluster_repositories.diviner b/src/docs/user/cluster/cluster_repositories.diviner index cdf2641933..13307d0b79 100644 --- a/src/docs/user/cluster/cluster_repositories.diviner +++ b/src/docs/user/cluster/cluster_repositories.diviner @@ -433,6 +433,18 @@ If you do this, **you will lose unreplicated data**. You will discard any changes on the affected leaders which have not replicated to other devices in the cluster. +If you have lost an entire cluster and replaced it with new devices that you +have restored from backups, you can aggressively wipe all memory of the old +devices by using `--demote ` and `--all-repositories`. **This is +dangerous and discards all unreplicated data in any repository on any device.** + +``` +phabricator/ $ ./bin/repository thaw --demote repo.corp.net --all-repositories +``` + +After you do this, continue below to promote a leader and restore the cluster +to service. + Ambiguous Leaders =================