1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-12-23 05:50:55 +01:00

When a Drydock Blueprint promises us a resource but can't deliver, continue believing in it

Summary:
Ref T13073. When a Blueprint says it will be able to allocate a resource but then throws an exception while attempting that allocation, we currently fail the lease permanently.

This is excessively harsh. This blueprint may have the best of intentions and have encountered a legitimately unforseeable failure (like a `vm.new` call to build a VM failed) and be able to succeed in the future.

Even if this blueprint is a dirty liar, other blueprints (or existing resources) may be able to satisfy the lease in the future.

Even if every blueprint is implemented incorrectly, leaving the lease alive lets it converge to success after the blueprints are fixed.

Instead of failing, log the issue and yield.

(In the future, it might make sense to distinguish more narrowly between "actually, all the resources are used up" and all other failure types, since the former is likely more routine and less concerning.)

Test Plan:
  - Wrote a broken `Hoax` blueprint which always claims it can allocate but never actually allocates (just `throw` in `allocateResource()`).
  - Used `bin/phd drydock lease` to acquire a Hoax lease.
  - Before patch: lease abruptly failed permanently.
  - After patch: lease yields after allocation fails.

{F5427747}

Subscribers: PHID-OPKG-gm6ozazyms6q6i22gyam

Maniphest Tasks: T13073

Differential Revision: https://secure.phabricator.com/D19070
This commit is contained in:
epriestley 2018-02-13 03:08:58 -08:00
parent 6a4d5ce3c9
commit 4dd32dca3e
4 changed files with 92 additions and 2 deletions

View file

@ -1055,6 +1055,7 @@ phutil_register_library_map(array(
'DrydockLeaseActivatedLogType' => 'applications/drydock/logtype/DrydockLeaseActivatedLogType.php',
'DrydockLeaseActivationFailureLogType' => 'applications/drydock/logtype/DrydockLeaseActivationFailureLogType.php',
'DrydockLeaseActivationYieldLogType' => 'applications/drydock/logtype/DrydockLeaseActivationYieldLogType.php',
'DrydockLeaseAllocationFailureLogType' => 'applications/drydock/logtype/DrydockLeaseAllocationFailureLogType.php',
'DrydockLeaseController' => 'applications/drydock/controller/DrydockLeaseController.php',
'DrydockLeaseDatasource' => 'applications/drydock/typeahead/DrydockLeaseDatasource.php',
'DrydockLeaseDestroyedLogType' => 'applications/drydock/logtype/DrydockLeaseDestroyedLogType.php',
@ -1106,6 +1107,7 @@ phutil_register_library_map(array(
'DrydockResource' => 'applications/drydock/storage/DrydockResource.php',
'DrydockResourceActivationFailureLogType' => 'applications/drydock/logtype/DrydockResourceActivationFailureLogType.php',
'DrydockResourceActivationYieldLogType' => 'applications/drydock/logtype/DrydockResourceActivationYieldLogType.php',
'DrydockResourceAllocationFailureLogType' => 'applications/drydock/logtype/DrydockResourceAllocationFailureLogType.php',
'DrydockResourceController' => 'applications/drydock/controller/DrydockResourceController.php',
'DrydockResourceDatasource' => 'applications/drydock/typeahead/DrydockResourceDatasource.php',
'DrydockResourceListController' => 'applications/drydock/controller/DrydockResourceListController.php',
@ -6273,6 +6275,7 @@ phutil_register_library_map(array(
'DrydockLeaseActivatedLogType' => 'DrydockLogType',
'DrydockLeaseActivationFailureLogType' => 'DrydockLogType',
'DrydockLeaseActivationYieldLogType' => 'DrydockLogType',
'DrydockLeaseAllocationFailureLogType' => 'DrydockLogType',
'DrydockLeaseController' => 'DrydockController',
'DrydockLeaseDatasource' => 'PhabricatorTypeaheadDatasource',
'DrydockLeaseDestroyedLogType' => 'DrydockLogType',
@ -6333,6 +6336,7 @@ phutil_register_library_map(array(
),
'DrydockResourceActivationFailureLogType' => 'DrydockLogType',
'DrydockResourceActivationYieldLogType' => 'DrydockLogType',
'DrydockResourceAllocationFailureLogType' => 'DrydockLogType',
'DrydockResourceController' => 'DrydockController',
'DrydockResourceDatasource' => 'PhabricatorTypeaheadDatasource',
'DrydockResourceListController' => 'DrydockResourceController',

View file

@ -0,0 +1,26 @@
<?php
final class DrydockLeaseAllocationFailureLogType extends DrydockLogType {
const LOGCONST = 'core.lease.allocation-failure';
public function getLogTypeName() {
return pht('Allocation Failed');
}
public function getLogTypeIcon(array $data) {
return 'fa-times red';
}
public function renderLog(array $data) {
$class = idx($data, 'class');
$message = idx($data, 'message');
return pht(
'One or more blueprints promised a new resource, but failed when '.
'allocating: [%s] %s',
$class,
$message);
}
}

View file

@ -0,0 +1,26 @@
<?php
final class DrydockResourceAllocationFailureLogType extends DrydockLogType {
const LOGCONST = 'core.resource.allocation-failure';
public function getLogTypeName() {
return pht('Allocation Failed');
}
public function getLogTypeIcon(array $data) {
return 'fa-times red';
}
public function renderLog(array $data) {
$class = idx($data, 'class');
$message = idx($data, 'message');
return pht(
'Blueprint failed to allocate a resource after claiming it would '.
'be able to: [%s] %s',
$class,
$message);
}
}

View file

@ -216,17 +216,51 @@ final class DrydockLeaseUpdateWorker extends DrydockWorker {
// this.
break;
} catch (Exception $ex) {
// This failure is not normally expected, so log it. It can be
// caused by something mundane and recoverable, however (see below
// for discussion).
// We log to the blueprint separately from the log to the lease:
// the lease is not attached to a blueprint yet so the lease log
// will not show up on the blueprint; more than one blueprint may
// fail; and the lease is not really impacted (and won't log) if at
// least one blueprint actually works.
$blueprint->logEvent(
DrydockResourceAllocationFailureLogType::LOGCONST,
array(
'class' => get_class($ex),
'message' => $ex->getMessage(),
));
$exceptions[] = $ex;
}
}
if (!$resources) {
throw new PhutilAggregateException(
// If one or more blueprints claimed that they would be able to
// allocate resources but none are actually able to allocate resources,
// log the failure and yield so we try again soon.
// This can happen if some unexpected issue occurs during allocation
// (for example, a call to build a VM fails for some reason) or if we
// raced another allocator and the blueprint is now full.
$ex = new PhutilAggregateException(
pht(
'All blueprints failed to allocate a suitable new resource when '.
'trying to allocate lease "%s".',
'trying to allocate lease ("%s").',
$lease->getPHID()),
$exceptions);
$lease->logEvent(
DrydockLeaseAllocationFailureLogType::LOGCONST,
array(
'class' => get_class($ex),
'message' => $ex->getMessage(),
));
throw new PhabricatorWorkerYieldException(15);
}
$resources = $this->removeUnacquirableResources($resources, $lease);