From 3ac99006bfd5998f916faa3c413fc7676c0bc018 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 21 Sep 2015 04:45:25 -0700 Subject: [PATCH] Implement optimistic "slot locks" in Drydock Summary: See discussion in D10304. There's a lot of context there, but the general idea is: - Blueprints should manage locks in a granular way during the actual allocation/acquisition phase. - Optimistic "slot locks" might a pretty good primitive to make that easy to implement and reason about in most cases. The way these locks work is that you just pick some name for the lock (like the PHID of a resource) and say that it needs to be acquired for the allocation/acquisition to work: ``` ... ->needSlotLock("mylock(PHID-XYZQ-...)") ... ``` When you fire off the acquisition or allocation, it fails unless it could acquire the slot with that name. This is really simple (no explicit lock management) and a pretty good fit for most of the locking that blueprints and leases need to do. If you need to do limit-based locks (e.g., maximum of 3 locks) you could acquire a lock like this: ``` mylock(whatever).slot(2) ``` Blueprints generally only contend with themselves, so it's normally OK for them to pick whatever strategy works best for them in naming locks. This may not work as well if you have a huge number of slots (e.g., 100TB you want to give out in 1MB chunks), or other complex needs for locks (like you have to synchronize access to some external resource), but slot locks don't need to be the only mechanism that blueprints use. If they run into a problem that slot locks aren't a good fit for, they can use something else instead. For now, slot locks seem like a good fit for the problems we currently face and most of the problems I anticipate facing. (The release workflows have other race issues which I'm not addressing here. They work fine if nothing races, but aren't race-safe.) Test Plan: To create a race where the same binding is allocated as a resource twice: - Add `sleep(10)` near the beginning of `allocateResource()`, after the free bindings are loaded but before resources are allocated. - (Comment out slot lock acquisition if you have this patch.) - Run `bin/drydock lease ...` in two windows, within 10 seconds of one another. This will reliably double-allocate the binding because both blueprints see a view of the world where the binding is free. To verify the lock works, un-comment it (or apply this patch) and run the same test again. Now, the lock fails in one process and only one resource is allocated. Reviewers: hach-que, chad Reviewed By: hach-que, chad Differential Revision: https://secure.phabricator.com/D14118 --- .../20150916.drydock.slotlocks.1.sql | 8 ++ src/__phutil_library_map__.php | 2 + ...anacServiceHostBlueprintImplementation.php | 20 ++-- .../DrydockBlueprintImplementation.php | 6 + .../drydock/storage/DrydockBlueprint.php | 11 ++ .../drydock/storage/DrydockLease.php | 23 +++- .../drydock/storage/DrydockResource.php | 24 +++- .../drydock/storage/DrydockSlotLock.php | 107 ++++++++++++++++++ .../drydock/worker/DrydockAllocatorWorker.php | 3 +- 9 files changed, 185 insertions(+), 19 deletions(-) create mode 100644 resources/sql/autopatches/20150916.drydock.slotlocks.1.sql create mode 100644 src/applications/drydock/storage/DrydockSlotLock.php diff --git a/resources/sql/autopatches/20150916.drydock.slotlocks.1.sql b/resources/sql/autopatches/20150916.drydock.slotlocks.1.sql new file mode 100644 index 0000000000..837566f1bf --- /dev/null +++ b/resources/sql/autopatches/20150916.drydock.slotlocks.1.sql @@ -0,0 +1,8 @@ +CREATE TABLE {$NAMESPACE}_drydock.drydock_slotlock ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ownerPHID VARBINARY(64) NOT NULL, + lockIndex BINARY(12) NOT NULL, + lockKey LONGTEXT NOT NULL COLLATE {$COLLATE_TEXT}, + UNIQUE KEY `key_lock` (lockIndex), + KEY `key_owner` (ownerPHID) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index e43291b393..3a2a76ba6a 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -863,6 +863,7 @@ phutil_register_library_map(array( 'DrydockResourceViewController' => 'applications/drydock/controller/DrydockResourceViewController.php', 'DrydockSFTPFilesystemInterface' => 'applications/drydock/interface/filesystem/DrydockSFTPFilesystemInterface.php', 'DrydockSSHCommandInterface' => 'applications/drydock/interface/command/DrydockSSHCommandInterface.php', + 'DrydockSlotLock' => 'applications/drydock/storage/DrydockSlotLock.php', 'DrydockWebrootInterface' => 'applications/drydock/interface/webroot/DrydockWebrootInterface.php', 'DrydockWorkingCopyBlueprintImplementation' => 'applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php', 'FeedConduitAPIMethod' => 'applications/feed/conduit/FeedConduitAPIMethod.php', @@ -4585,6 +4586,7 @@ phutil_register_library_map(array( 'DrydockResourceViewController' => 'DrydockResourceController', 'DrydockSFTPFilesystemInterface' => 'DrydockFilesystemInterface', 'DrydockSSHCommandInterface' => 'DrydockCommandInterface', + 'DrydockSlotLock' => 'DrydockDAO', 'DrydockWebrootInterface' => 'DrydockInterface', 'DrydockWorkingCopyBlueprintImplementation' => 'DrydockBlueprintImplementation', 'FeedConduitAPIMethod' => 'ConduitAPIMethod', diff --git a/src/applications/drydock/blueprint/DrydockAlmanacServiceHostBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockAlmanacServiceHostBlueprintImplementation.php index 102f8f07d2..aea46edd71 100644 --- a/src/applications/drydock/blueprint/DrydockAlmanacServiceHostBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockAlmanacServiceHostBlueprintImplementation.php @@ -67,14 +67,13 @@ final class DrydockAlmanacServiceHostBlueprintImplementation $device = $binding->getDevice(); $device_name = $device->getName(); + $binding_phid = $binding->getPHID(); + $resource = $this->newResourceTemplate($blueprint, $device_name) ->setActivateWhenAllocated(true) ->setAttribute('almanacServicePHID', $binding->getServicePHID()) - ->setAttribute('almanacBindingPHID', $binding->getPHID()); - - // TODO: This algorithm can race, and the "free" binding may not be - // free by the time we acquire it. Do slot-locking here if that works - // out, or some other kind of locking if it does not. + ->setAttribute('almanacBindingPHID', $binding_phid) + ->needSlotLock("almanac.host.binding({$binding_phid})"); try { return $resource->allocateResource(DrydockResourceStatus::STATUS_OPEN); @@ -93,8 +92,11 @@ final class DrydockAlmanacServiceHostBlueprintImplementation DrydockResource $resource, DrydockLease $lease) { - // TODO: We'll currently lease each resource an unlimited number of times, - // but should stop doing that. + // TODO: The current rule is one lease per resource, and there's no way to + // make that cheaper here than by just trying to acquire the lease below, + // so don't do any special checks for now. When we eventually permit + // multiple leases per host, we'll need to load leases anyway, so we can + // reject fully leased hosts cheaply here. return true; } @@ -104,11 +106,11 @@ final class DrydockAlmanacServiceHostBlueprintImplementation DrydockResource $resource, DrydockLease $lease) { - // TODO: Once we have limit rules, we should perform slot locking (or other - // kinds of locking) here. + $resource_phid = $resource->getPHID(); $lease ->setActivateWhenAcquired(true) + ->needSlotLock("almanac.host.lease({$resource_phid})") ->acquireOnResource($resource); } diff --git a/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php index 5f769d3f02..3a51f65ad1 100644 --- a/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockBlueprintImplementation.php @@ -106,8 +106,12 @@ abstract class DrydockBlueprintImplementation extends Phobject { DrydockLease $lease); final public function releaseLease( + DrydockBlueprint $blueprint, DrydockResource $resource, DrydockLease $lease) { + + // TODO: This is all broken nonsense. + $scope = $this->pushActiveScope(null, $lease); $released = false; @@ -117,6 +121,7 @@ abstract class DrydockBlueprintImplementation extends Phobject { $lease->reload(); if ($lease->getStatus() == DrydockLeaseStatus::STATUS_ACTIVE) { + $lease->release(); $lease->setStatus(DrydockLeaseStatus::STATUS_RELEASED); $lease->save(); $released = true; @@ -293,6 +298,7 @@ abstract class DrydockBlueprintImplementation extends Phobject { $resource = id(new DrydockResource()) ->setBlueprintPHID($blueprint->getPHID()) + ->attachBlueprint($blueprint) ->setType($this->getType()) ->setStatus(DrydockResourceStatus::STATUS_PENDING) ->setName($name); diff --git a/src/applications/drydock/storage/DrydockBlueprint.php b/src/applications/drydock/storage/DrydockBlueprint.php index cb039bd7f9..21a3933ccc 100644 --- a/src/applications/drydock/storage/DrydockBlueprint.php +++ b/src/applications/drydock/storage/DrydockBlueprint.php @@ -163,6 +163,17 @@ final class DrydockBlueprint extends DrydockDAO } + /** + * @task lease + */ + public function releaseLease( + DrydockResource $resource, + DrydockLease $lease) { + $this->getImplementation()->releaseLease($this, $resource, $lease); + return $this; + } + + /* -( PhabricatorApplicationTransactionInterface )------------------------- */ diff --git a/src/applications/drydock/storage/DrydockLease.php b/src/applications/drydock/storage/DrydockLease.php index 1401b52287..1b301d93eb 100644 --- a/src/applications/drydock/storage/DrydockLease.php +++ b/src/applications/drydock/storage/DrydockLease.php @@ -15,6 +15,7 @@ final class DrydockLease extends DrydockDAO private $releaseOnDestruction; private $isAcquired = false; private $activateWhenAcquired = false; + private $slotLocks = array(); /** * Flag this lease to be released when its destructor is called. This is @@ -128,6 +129,8 @@ final class DrydockLease extends DrydockDAO $this->setStatus(DrydockLeaseStatus::STATUS_RELEASED); $this->save(); + DrydockSlotLock::releaseLocks($this->getPHID()); + $this->resource = null; return $this; @@ -206,6 +209,11 @@ final class DrydockLease extends DrydockDAO return $this; } + public function needSlotLock($key) { + $this->slotLocks[] = $key; + return $this; + } + public function acquireOnResource(DrydockResource $resource) { $expect_status = DrydockLeaseStatus::STATUS_PENDING; $actual_status = $this->getStatus(); @@ -234,10 +242,17 @@ final class DrydockLease extends DrydockDAO } } - $this - ->setResourceID($resource->getID()) - ->setStatus($new_status) - ->save(); + $this->openTransaction(); + + $this + ->setResourceID($resource->getID()) + ->setStatus($new_status) + ->save(); + + DrydockSlotLock::acquireLocks($this->getPHID(), $this->slotLocks); + $this->slotLocks = array(); + + $this->saveTransaction(); $this->isAcquired = true; diff --git a/src/applications/drydock/storage/DrydockResource.php b/src/applications/drydock/storage/DrydockResource.php index 8d99cba154..4f6f8b83ac 100644 --- a/src/applications/drydock/storage/DrydockResource.php +++ b/src/applications/drydock/storage/DrydockResource.php @@ -17,6 +17,7 @@ final class DrydockResource extends DrydockDAO private $blueprint = self::ATTACHABLE; private $isAllocated = false; private $activateWhenAllocated = false; + private $slotLocks = array(); protected function getConfiguration() { return array( @@ -80,6 +81,11 @@ final class DrydockResource extends DrydockDAO return $this; } + public function needSlotLock($key) { + $this->slotLocks[] = $key; + return $this; + } + public function allocateResource($status) { if ($this->getID()) { throw new Exception( @@ -105,11 +111,18 @@ final class DrydockResource extends DrydockDAO $new_status = DrydockResourceStatus::STATUS_PENDING; } - $this - ->setStatus($new_status) - ->save(); + $this->openTransaction(); - $this->didAllocate = true; + $this + ->setStatus($new_status) + ->save(); + + DrydockSlotLock::acquireLocks($this->getPHID(), $this->slotLocks); + $this->slotLocks = array(); + + $this->saveTransaction(); + + $this->isAllocated = true; return $this; } @@ -151,6 +164,9 @@ final class DrydockResource extends DrydockDAO $this->setStatus(DrydockResourceStatus::STATUS_CLOSED); $this->save(); + + DrydockSlotLock::releaseLocks($this->getPHID()); + $this->saveTransaction(); } diff --git a/src/applications/drydock/storage/DrydockSlotLock.php b/src/applications/drydock/storage/DrydockSlotLock.php new file mode 100644 index 0000000000..2d52a81f1e --- /dev/null +++ b/src/applications/drydock/storage/DrydockSlotLock.php @@ -0,0 +1,107 @@ + false, + self::CONFIG_COLUMN_SCHEMA => array( + 'lockIndex' => 'bytes12', + 'lockKey' => 'text', + ), + self::CONFIG_KEY_SCHEMA => array( + 'key_lock' => array( + 'columns' => array('lockIndex'), + 'unique' => true, + ), + 'key_owner' => array( + 'columns' => array('ownerPHID'), + ), + ), + ) + parent::getConfiguration(); + } + + public static function loadLocks($owner_phid) { + return id(new DrydockSlotLock())->loadAllWhere( + 'ownerPHID = %s', + $owner_phid); + } + + +/* -( Acquiring and Releasing Locks )-------------------------------------- */ + + + /** + * Acquire a set of slot locks. + * + * This method either acquires all the locks or throws an exception (usually + * because one or more locks are held). + * + * @param phid Lock owner PHID. + * @param list List of locks to acquire. + * @return void + * @task locks + */ + public static function acquireLocks($owner_phid, array $locks) { + if (!$locks) { + return; + } + + $table = new DrydockSlotLock(); + $conn_w = $table->establishConnection('w'); + + $sql = array(); + foreach ($locks as $lock) { + $sql[] = qsprintf( + $conn_w, + '(%s, %s, %s)', + $owner_phid, + PhabricatorHash::digestForIndex($lock), + $lock); + } + + // TODO: These exceptions are pretty tricky to read. It would be good to + // figure out which locks could not be acquired and try to improve the + // exception to make debugging easier. + + queryfx( + $conn_w, + 'INSERT INTO %T (ownerPHID, lockIndex, lockKey) VALUES %Q', + $table->getTableName(), + implode(', ', $sql)); + } + + + /** + * Release all locks held by an owner. + * + * @param phid Lock owner PHID. + * @return void + * @task locks + */ + public static function releaseLocks($owner_phid) { + $table = new DrydockSlotLock(); + $conn_w = $table->establishConnection('w'); + + queryfx( + $conn_w, + 'DELETE FROM %T WHERE ownerPHID = %s', + $table->getTableName(), + $owner_phid); + } + +} diff --git a/src/applications/drydock/worker/DrydockAllocatorWorker.php b/src/applications/drydock/worker/DrydockAllocatorWorker.php index 8b26ef3571..407910ae1e 100644 --- a/src/applications/drydock/worker/DrydockAllocatorWorker.php +++ b/src/applications/drydock/worker/DrydockAllocatorWorker.php @@ -350,7 +350,7 @@ final class DrydockAllocatorWorker extends PhabricatorWorker { DrydockBlueprint $blueprint, DrydockLease $lease) { $resource = $blueprint->allocateResource($lease); - $this->validateAllocatedResource($resource); + $this->validateAllocatedResource($blueprint, $resource, $lease); return $resource; } @@ -369,7 +369,6 @@ final class DrydockAllocatorWorker extends PhabricatorWorker { DrydockBlueprint $blueprint, $resource, DrydockLease $lease) { - $blueprint = $this->getBlueprintClass(); if (!($resource instanceof DrydockResource)) { throw new Exception(