1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-22 23:02:42 +01:00

Lock resources briefly while acquiring leases on them to prevent acquiring near-death resources

Summary:
Depends on D19078. Ref T13073. Currently, there is a narrow window where we can acquire a resource after a reclaim has started against it.

To prevent this, briefly lock resources before acquiring them and make sure they're still good. If a resource isn't good, throw the lease back in the pool.

Test Plan:
This is tricky. You need:

  - Hoax blueprint with limits and a rule where leases of a given "flavor" can only be satisfied by resources of the same flavor.
  - Reduce the 3-minute "wait before resources can be released" to 3 seconds.
  - Limit Hoaxes to 1.
  - Allocate one "cherry" flavored Hoax and release the lease.
  - Add a `sleep(15)` to `releaseResource()` in `DrydockResourceUpdateWorker`, after the `canReclaimResource()` check, with a `print`.

Now:

  - Run `bin/phd debug task` in two windows.
  - Run `bin/drydock lease --type host --attributes flavor=banana` in a third window.
  - This will start to reclaim the existing "cherry" resource. Once one of the `phd` windows prints the "RECLAIMING" message run `bin/drydock lease --type host --attributes flavor=cherry` in a fourth window.
  - Before patch: the "cherry" lease acquired immediately, then was released and destroyed moments later.
  - After patch: the "cherry" lease yields.

Subscribers: PHID-OPKG-gm6ozazyms6q6i22gyam

Maniphest Tasks: T13073

Differential Revision: https://secure.phabricator.com/D19080
This commit is contained in:
epriestley 2018-02-13 10:46:14 -08:00
parent 619943bea0
commit a1baedbd9a
4 changed files with 91 additions and 24 deletions

View file

@ -1113,6 +1113,7 @@ phutil_register_library_map(array(
'DrydockResourceDatasource' => 'applications/drydock/typeahead/DrydockResourceDatasource.php',
'DrydockResourceListController' => 'applications/drydock/controller/DrydockResourceListController.php',
'DrydockResourceListView' => 'applications/drydock/view/DrydockResourceListView.php',
'DrydockResourceLockException' => 'applications/drydock/exception/DrydockResourceLockException.php',
'DrydockResourcePHIDType' => 'applications/drydock/phid/DrydockResourcePHIDType.php',
'DrydockResourceQuery' => 'applications/drydock/query/DrydockResourceQuery.php',
'DrydockResourceReclaimLogType' => 'applications/drydock/logtype/DrydockResourceReclaimLogType.php',
@ -6344,6 +6345,7 @@ phutil_register_library_map(array(
'DrydockResourceDatasource' => 'PhabricatorTypeaheadDatasource',
'DrydockResourceListController' => 'DrydockResourceController',
'DrydockResourceListView' => 'AphrontView',
'DrydockResourceLockException' => 'Exception',
'DrydockResourcePHIDType' => 'PhabricatorPHIDType',
'DrydockResourceQuery' => 'DrydockQuery',
'DrydockResourceReclaimLogType' => 'DrydockLogType',

View file

@ -0,0 +1,4 @@
<?php
final class DrydockResourceLockException
extends Exception {}

View file

@ -213,30 +213,75 @@ final class DrydockLease extends DrydockDAO
}
}
$this->openTransaction();
// Before we associate the lease with the resource, we lock the resource
// and reload it to make sure it is still pending or active. If we don't
// do this, the resource may have just been reclaimed. (Once we acquire
// the resource that stops it from being released, so we're nearly safe.)
$resource_phid = $resource->getPHID();
$hash = PhabricatorHash::digestForIndex($resource_phid);
$lock_key = 'drydock.resource:'.$hash;
$lock = PhabricatorGlobalLock::newLock($lock_key);
try {
DrydockSlotLock::acquireLocks($this->getPHID(), $this->slotLocks);
$this->slotLocks = array();
} catch (DrydockSlotLockException $ex) {
$this->killTransaction();
$this->logEvent(
DrydockSlotLockFailureLogType::LOGCONST,
array(
'locks' => $ex->getLockMap(),
));
throw $ex;
$lock->lock(15);
} catch (Exception $ex) {
throw new DrydockResourceLockException(
pht(
'Failed to acquire lock for resource ("%s") while trying to '.
'acquire lease ("%s").',
$resource->getPHID(),
$this->getPHID()));
}
$this
->setResourcePHID($resource->getPHID())
->attachResource($resource)
->setStatus($new_status)
->save();
$resource->reload();
$this->saveTransaction();
if (($resource->getStatus() !== DrydockResourceStatus::STATUS_ACTIVE) &&
($resource->getStatus() !== DrydockResourceStatus::STATUS_PENDING)) {
throw new DrydockAcquiredBrokenResourceException(
pht(
'Trying to acquire lease ("%s") on a resource ("%s") in the '.
'wrong status ("%s").',
$this->getPHID(),
$resource->getPHID(),
$resource->getStatus()));
}
$caught = null;
try {
$this->openTransaction();
try {
DrydockSlotLock::acquireLocks($this->getPHID(), $this->slotLocks);
$this->slotLocks = array();
} catch (DrydockSlotLockException $ex) {
$this->killTransaction();
$this->logEvent(
DrydockSlotLockFailureLogType::LOGCONST,
array(
'locks' => $ex->getLockMap(),
));
throw $ex;
}
$this
->setResourcePHID($resource->getPHID())
->attachResource($resource)
->setStatus($new_status)
->save();
$this->saveTransaction();
} catch (Exception $ex) {
$caught = $ex;
}
$lock->unlock();
if ($caught) {
throw $caught;
}
$this->isAcquired = true;

View file

@ -53,6 +53,7 @@ final class DrydockLeaseUpdateWorker extends DrydockWorker {
$lease
->setStatus(DrydockLeaseStatus::STATUS_PENDING)
->setResourcePHID(null)
->save();
$lease->logEvent(
@ -301,23 +302,38 @@ final class DrydockLeaseUpdateWorker extends DrydockWorker {
$resources = $this->rankResources($resources, $lease);
$exceptions = array();
$yields = array();
$allocated = false;
foreach ($resources as $resource) {
try {
$this->acquireLease($resource, $lease);
$allocated = true;
break;
} catch (DrydockResourceLockException $ex) {
// We need to lock the resource to actually acquire it. If we aren't
// able to acquire the lock quickly enough, we can yield and try again
// later.
$yields[] = $ex;
} catch (DrydockAcquiredBrokenResourceException $ex) {
// If a resource was reclaimed or destroyed by the time we actually
// got around to acquiring it, we just got unlucky. We can yield and
// try again later.
$yields[] = $ex;
} catch (Exception $ex) {
$exceptions[] = $ex;
}
}
if (!$allocated) {
throw new PhutilAggregateException(
pht(
'Unable to acquire lease "%s" on any resource.',
$lease->getPHID()),
$exceptions);
if ($yields) {
throw new PhabricatorWorkerYieldException(15);
} else {
throw new PhutilAggregateException(
pht(
'Unable to acquire lease "%s" on any resource.',
$lease->getPHID()),
$exceptions);
}
}
}