mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-23 22:10:55 +01:00
Lock resources briefly while acquiring leases on them to prevent acquiring near-death resources
Summary: Depends on D19078. Ref T13073. Currently, there is a narrow window where we can acquire a resource after a reclaim has started against it. To prevent this, briefly lock resources before acquiring them and make sure they're still good. If a resource isn't good, throw the lease back in the pool. Test Plan: This is tricky. You need: - Hoax blueprint with limits and a rule where leases of a given "flavor" can only be satisfied by resources of the same flavor. - Reduce the 3-minute "wait before resources can be released" to 3 seconds. - Limit Hoaxes to 1. - Allocate one "cherry" flavored Hoax and release the lease. - Add a `sleep(15)` to `releaseResource()` in `DrydockResourceUpdateWorker`, after the `canReclaimResource()` check, with a `print`. Now: - Run `bin/phd debug task` in two windows. - Run `bin/drydock lease --type host --attributes flavor=banana` in a third window. - This will start to reclaim the existing "cherry" resource. Once one of the `phd` windows prints the "RECLAIMING" message run `bin/drydock lease --type host --attributes flavor=cherry` in a fourth window. - Before patch: the "cherry" lease acquired immediately, then was released and destroyed moments later. - After patch: the "cherry" lease yields. Subscribers: PHID-OPKG-gm6ozazyms6q6i22gyam Maniphest Tasks: T13073 Differential Revision: https://secure.phabricator.com/D19080
This commit is contained in:
parent
619943bea0
commit
a1baedbd9a
4 changed files with 91 additions and 24 deletions
|
@ -1113,6 +1113,7 @@ phutil_register_library_map(array(
|
|||
'DrydockResourceDatasource' => 'applications/drydock/typeahead/DrydockResourceDatasource.php',
|
||||
'DrydockResourceListController' => 'applications/drydock/controller/DrydockResourceListController.php',
|
||||
'DrydockResourceListView' => 'applications/drydock/view/DrydockResourceListView.php',
|
||||
'DrydockResourceLockException' => 'applications/drydock/exception/DrydockResourceLockException.php',
|
||||
'DrydockResourcePHIDType' => 'applications/drydock/phid/DrydockResourcePHIDType.php',
|
||||
'DrydockResourceQuery' => 'applications/drydock/query/DrydockResourceQuery.php',
|
||||
'DrydockResourceReclaimLogType' => 'applications/drydock/logtype/DrydockResourceReclaimLogType.php',
|
||||
|
@ -6344,6 +6345,7 @@ phutil_register_library_map(array(
|
|||
'DrydockResourceDatasource' => 'PhabricatorTypeaheadDatasource',
|
||||
'DrydockResourceListController' => 'DrydockResourceController',
|
||||
'DrydockResourceListView' => 'AphrontView',
|
||||
'DrydockResourceLockException' => 'Exception',
|
||||
'DrydockResourcePHIDType' => 'PhabricatorPHIDType',
|
||||
'DrydockResourceQuery' => 'DrydockQuery',
|
||||
'DrydockResourceReclaimLogType' => 'DrydockLogType',
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
<?php
|
||||
|
||||
final class DrydockResourceLockException
|
||||
extends Exception {}
|
|
@ -213,30 +213,75 @@ final class DrydockLease extends DrydockDAO
|
|||
}
|
||||
}
|
||||
|
||||
$this->openTransaction();
|
||||
// Before we associate the lease with the resource, we lock the resource
|
||||
// and reload it to make sure it is still pending or active. If we don't
|
||||
// do this, the resource may have just been reclaimed. (Once we acquire
|
||||
// the resource that stops it from being released, so we're nearly safe.)
|
||||
|
||||
$resource_phid = $resource->getPHID();
|
||||
$hash = PhabricatorHash::digestForIndex($resource_phid);
|
||||
$lock_key = 'drydock.resource:'.$hash;
|
||||
$lock = PhabricatorGlobalLock::newLock($lock_key);
|
||||
|
||||
try {
|
||||
DrydockSlotLock::acquireLocks($this->getPHID(), $this->slotLocks);
|
||||
$this->slotLocks = array();
|
||||
} catch (DrydockSlotLockException $ex) {
|
||||
$this->killTransaction();
|
||||
|
||||
$this->logEvent(
|
||||
DrydockSlotLockFailureLogType::LOGCONST,
|
||||
array(
|
||||
'locks' => $ex->getLockMap(),
|
||||
));
|
||||
|
||||
throw $ex;
|
||||
$lock->lock(15);
|
||||
} catch (Exception $ex) {
|
||||
throw new DrydockResourceLockException(
|
||||
pht(
|
||||
'Failed to acquire lock for resource ("%s") while trying to '.
|
||||
'acquire lease ("%s").',
|
||||
$resource->getPHID(),
|
||||
$this->getPHID()));
|
||||
}
|
||||
|
||||
$this
|
||||
->setResourcePHID($resource->getPHID())
|
||||
->attachResource($resource)
|
||||
->setStatus($new_status)
|
||||
->save();
|
||||
$resource->reload();
|
||||
|
||||
$this->saveTransaction();
|
||||
if (($resource->getStatus() !== DrydockResourceStatus::STATUS_ACTIVE) &&
|
||||
($resource->getStatus() !== DrydockResourceStatus::STATUS_PENDING)) {
|
||||
throw new DrydockAcquiredBrokenResourceException(
|
||||
pht(
|
||||
'Trying to acquire lease ("%s") on a resource ("%s") in the '.
|
||||
'wrong status ("%s").',
|
||||
$this->getPHID(),
|
||||
$resource->getPHID(),
|
||||
$resource->getStatus()));
|
||||
}
|
||||
|
||||
$caught = null;
|
||||
try {
|
||||
$this->openTransaction();
|
||||
|
||||
try {
|
||||
DrydockSlotLock::acquireLocks($this->getPHID(), $this->slotLocks);
|
||||
$this->slotLocks = array();
|
||||
} catch (DrydockSlotLockException $ex) {
|
||||
$this->killTransaction();
|
||||
|
||||
$this->logEvent(
|
||||
DrydockSlotLockFailureLogType::LOGCONST,
|
||||
array(
|
||||
'locks' => $ex->getLockMap(),
|
||||
));
|
||||
|
||||
throw $ex;
|
||||
}
|
||||
|
||||
$this
|
||||
->setResourcePHID($resource->getPHID())
|
||||
->attachResource($resource)
|
||||
->setStatus($new_status)
|
||||
->save();
|
||||
|
||||
$this->saveTransaction();
|
||||
} catch (Exception $ex) {
|
||||
$caught = $ex;
|
||||
}
|
||||
|
||||
$lock->unlock();
|
||||
|
||||
if ($caught) {
|
||||
throw $caught;
|
||||
}
|
||||
|
||||
$this->isAcquired = true;
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ final class DrydockLeaseUpdateWorker extends DrydockWorker {
|
|||
|
||||
$lease
|
||||
->setStatus(DrydockLeaseStatus::STATUS_PENDING)
|
||||
->setResourcePHID(null)
|
||||
->save();
|
||||
|
||||
$lease->logEvent(
|
||||
|
@ -301,23 +302,38 @@ final class DrydockLeaseUpdateWorker extends DrydockWorker {
|
|||
$resources = $this->rankResources($resources, $lease);
|
||||
|
||||
$exceptions = array();
|
||||
$yields = array();
|
||||
$allocated = false;
|
||||
foreach ($resources as $resource) {
|
||||
try {
|
||||
$this->acquireLease($resource, $lease);
|
||||
$allocated = true;
|
||||
break;
|
||||
} catch (DrydockResourceLockException $ex) {
|
||||
// We need to lock the resource to actually acquire it. If we aren't
|
||||
// able to acquire the lock quickly enough, we can yield and try again
|
||||
// later.
|
||||
$yields[] = $ex;
|
||||
} catch (DrydockAcquiredBrokenResourceException $ex) {
|
||||
// If a resource was reclaimed or destroyed by the time we actually
|
||||
// got around to acquiring it, we just got unlucky. We can yield and
|
||||
// try again later.
|
||||
$yields[] = $ex;
|
||||
} catch (Exception $ex) {
|
||||
$exceptions[] = $ex;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$allocated) {
|
||||
throw new PhutilAggregateException(
|
||||
pht(
|
||||
'Unable to acquire lease "%s" on any resource.',
|
||||
$lease->getPHID()),
|
||||
$exceptions);
|
||||
if ($yields) {
|
||||
throw new PhabricatorWorkerYieldException(15);
|
||||
} else {
|
||||
throw new PhutilAggregateException(
|
||||
pht(
|
||||
'Unable to acquire lease "%s" on any resource.',
|
||||
$lease->getPHID()),
|
||||
$exceptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue