1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-19 05:12:41 +01:00

Make repository daemon locks more granular and forgiving

Summary:
Ref T4292. Currently, we hold one big lock around the whole `bin/repository update` workflow.

When running multiple daemons on different hosts, this lock can end up being contentious. In particular, we'll hold it during `git fetch` on every host globally, even though it's only useful to hold it locally per-device (that is, it's fine/good/expected if `repo001` and `repo002` happen to be fetching from a repository they are observing at the same time).

Instead, split it into two locks:

  - One lock is scoped to the current device, and held during pull (usually `git fetch`). This just keeps multiple daemons accidentally running on the same host from making a mess when trying to initialize or update a working copy.
  - One lock is scoped globally, and held during discovery. This makes sure daemons on different hosts don't step on each other when updating the database.

If we fail to acquire either lock, assume some other process is legitimately doing the work and bail more quietly instead of fataling. In approximately 100% of cases where users have hit this lock contention, that was the case: some other daemon was running somewhere doing the work and the error didn't actually represent an issue.

If there's an actual problem, we still raise a diagnostically useful message if you run `bin/repository update` manually, so there are still tools to figure out that something is hung or whatever.

Test Plan:
  - Ran `bin/repository update`, `pull`, `discover`.
  - Added `sleep(5)`, forced processes to contend, got lock exceptions and graceful exit with diagnostic message.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T4292

Differential Revision: https://secure.phabricator.com/D15903
This commit is contained in:
epriestley 2016-05-12 16:09:20 -07:00
parent 8cdafb0032
commit 1c73ad6a1b
7 changed files with 119 additions and 50 deletions

View file

@ -622,6 +622,7 @@ phutil_register_library_map(array(
'DiffusionController' => 'applications/diffusion/controller/DiffusionController.php', 'DiffusionController' => 'applications/diffusion/controller/DiffusionController.php',
'DiffusionCreateCommentConduitAPIMethod' => 'applications/diffusion/conduit/DiffusionCreateCommentConduitAPIMethod.php', 'DiffusionCreateCommentConduitAPIMethod' => 'applications/diffusion/conduit/DiffusionCreateCommentConduitAPIMethod.php',
'DiffusionCreateRepositoriesCapability' => 'applications/diffusion/capability/DiffusionCreateRepositoriesCapability.php', 'DiffusionCreateRepositoriesCapability' => 'applications/diffusion/capability/DiffusionCreateRepositoriesCapability.php',
'DiffusionDaemonLockException' => 'applications/diffusion/exception/DiffusionDaemonLockException.php',
'DiffusionDefaultEditCapability' => 'applications/diffusion/capability/DiffusionDefaultEditCapability.php', 'DiffusionDefaultEditCapability' => 'applications/diffusion/capability/DiffusionDefaultEditCapability.php',
'DiffusionDefaultPushCapability' => 'applications/diffusion/capability/DiffusionDefaultPushCapability.php', 'DiffusionDefaultPushCapability' => 'applications/diffusion/capability/DiffusionDefaultPushCapability.php',
'DiffusionDefaultViewCapability' => 'applications/diffusion/capability/DiffusionDefaultViewCapability.php', 'DiffusionDefaultViewCapability' => 'applications/diffusion/capability/DiffusionDefaultViewCapability.php',
@ -4845,6 +4846,7 @@ phutil_register_library_map(array(
'DiffusionController' => 'PhabricatorController', 'DiffusionController' => 'PhabricatorController',
'DiffusionCreateCommentConduitAPIMethod' => 'DiffusionConduitAPIMethod', 'DiffusionCreateCommentConduitAPIMethod' => 'DiffusionConduitAPIMethod',
'DiffusionCreateRepositoriesCapability' => 'PhabricatorPolicyCapability', 'DiffusionCreateRepositoriesCapability' => 'PhabricatorPolicyCapability',
'DiffusionDaemonLockException' => 'Exception',
'DiffusionDefaultEditCapability' => 'PhabricatorPolicyCapability', 'DiffusionDefaultEditCapability' => 'PhabricatorPolicyCapability',
'DiffusionDefaultPushCapability' => 'PhabricatorPolicyCapability', 'DiffusionDefaultPushCapability' => 'PhabricatorPolicyCapability',
'DiffusionDefaultViewCapability' => 'PhabricatorPolicyCapability', 'DiffusionDefaultViewCapability' => 'PhabricatorPolicyCapability',

View file

@ -10,6 +10,14 @@ final class AlmanacKeys extends Phobject {
} }
public static function getDeviceID() { public static function getDeviceID() {
// While running unit tests, ignore any configured device identity.
try {
PhabricatorTestCase::assertExecutingUnitTests();
return null;
} catch (Exception $ex) {
// Continue normally.
}
$device_id_path = self::getKeyPath('device.id'); $device_id_path = self::getKeyPath('device.id');
if (Filesystem::pathExists($device_id_path)) { if (Filesystem::pathExists($device_id_path)) {

View file

@ -0,0 +1,3 @@
<?php
final class DiffusionDaemonLockException extends Exception {}

View file

@ -37,6 +37,33 @@ final class PhabricatorRepositoryDiscoveryEngine
public function discoverCommits() { public function discoverCommits() {
$repository = $this->getRepository(); $repository = $this->getRepository();
$lock = $this->newRepositoryLock($repository, 'repo.look', false);
try {
$lock->lock();
} catch (PhutilLockException $ex) {
throw new DiffusionDaemonLockException(
pht(
'Another process is currently discovering repository "%s", '.
'skipping discovery.',
$repository->getDisplayName()));
}
try {
$result = $this->discoverCommitsWithLock();
} catch (Exception $ex) {
$lock->unlock();
throw $ex;
}
$lock->unlock();
return $result;
}
private function discoverCommitsWithLock() {
$repository = $this->getRepository();
$vcs = $repository->getVersionControlSystem(); $vcs = $repository->getVersionControlSystem();
switch ($vcs) { switch ($vcs) {
case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:

View file

@ -51,6 +51,27 @@ abstract class PhabricatorRepositoryEngine extends Phobject {
return PhabricatorUser::getOmnipotentUser(); return PhabricatorUser::getOmnipotentUser();
} }
protected function newRepositoryLock(
PhabricatorRepository $repository,
$lock_key,
$lock_device_only) {
$lock_parts = array();
$lock_parts[] = $lock_key;
$lock_parts[] = $repository->getID();
if ($lock_device_only) {
$device = AlmanacKeys::getLiveDevice();
if ($device) {
$lock_parts[] = $device->getID();
}
}
$lock_name = implode(':', $lock_parts);
return PhabricatorGlobalLock::newLock($lock_name);
}
/** /**
* Verify that the "origin" remote exists, and points at the correct URI. * Verify that the "origin" remote exists, and points at the correct URI.
* *

View file

@ -23,6 +23,33 @@ final class PhabricatorRepositoryPullEngine
public function pullRepository() { public function pullRepository() {
$repository = $this->getRepository(); $repository = $this->getRepository();
$lock = $this->newRepositoryLock($repository, 'repo.pull', true);
try {
$lock->lock();
} catch (PhutilLockException $ex) {
throw new DiffusionDaemonLockException(
pht(
'Another process is currently updating repository "%s", '.
'skipping pull.',
$repository->getDisplayName()));
}
try {
$result = $this->pullRepositoryWithLock();
} catch (Exception $ex) {
$lock->unlock();
throw $ex;
}
$lock->unlock();
return $result;
}
private function pullRepositoryWithLock() {
$repository = $this->getRepository();
$viewer = PhabricatorUser::getOmnipotentUser(); $viewer = PhabricatorUser::getOmnipotentUser();
$is_hg = false; $is_hg = false;

View file

@ -53,35 +53,14 @@ final class PhabricatorRepositoryManagementUpdateWorkflow
$repository = head($repos); $repository = head($repos);
try { try {
$lock_name = 'repository.update:'.$repository->getID();
$lock = PhabricatorGlobalLock::newLock($lock_name);
try {
$lock->lock();
} catch (PhutilLockException $ex) {
throw new PhutilProxyException(
pht(
'Another process is currently holding the update lock for '.
'repository "%s". Repositories may only be updated by one '.
'process at a time. This can happen if you are running multiple '.
'copies of the daemons. This can also happen if you manually '.
'update a repository while the daemons are also updating it '.
'(in this case, just try again in a few moments).',
$repository->getMonogram()),
$ex);
}
try {
$no_discovery = $args->getArg('no-discovery');
id(new PhabricatorRepositoryPullEngine()) id(new PhabricatorRepositoryPullEngine())
->setRepository($repository) ->setRepository($repository)
->setVerbose($this->getVerbose()) ->setVerbose($this->getVerbose())
->pullRepository(); ->pullRepository();
$no_discovery = $args->getArg('no-discovery');
if ($no_discovery) { if ($no_discovery) {
$lock->unlock(); return 0;
return;
} }
// TODO: It would be nice to discover only if we pulled something, but // TODO: It would be nice to discover only if we pulled something, but
@ -103,10 +82,13 @@ final class PhabricatorRepositoryManagementUpdateWorkflow
$repository->writeStatusMessage( $repository->writeStatusMessage(
PhabricatorRepositoryStatusMessage::TYPE_FETCH, PhabricatorRepositoryStatusMessage::TYPE_FETCH,
PhabricatorRepositoryStatusMessage::CODE_OKAY); PhabricatorRepositoryStatusMessage::CODE_OKAY);
} catch (Exception $ex) { } catch (DiffusionDaemonLockException $ex) {
$lock->unlock(); // If we miss a pull or discover because some other process is already
throw $ex; // doing the work, just bail out.
} echo tsprintf(
"%s\n",
$ex->getMessage());
return 0;
} catch (Exception $ex) { } catch (Exception $ex) {
$repository->writeStatusMessage( $repository->writeStatusMessage(
PhabricatorRepositoryStatusMessage::TYPE_FETCH, PhabricatorRepositoryStatusMessage::TYPE_FETCH,
@ -118,12 +100,11 @@ final class PhabricatorRepositoryManagementUpdateWorkflow
throw $ex; throw $ex;
} }
$lock->unlock(); echo tsprintf(
"%s\n",
$console->writeOut(
pht( pht(
'Updated repository **%s**.', 'Updated repository "%s".',
$repository->getMonogram())."\n"); $repository->getDisplayName()));
return 0; return 0;
} }