From 84ee4cd9f62f6bc1fddbcb1fc7788e0d1a3e2c62 Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 1 Nov 2012 11:30:23 -0700 Subject: [PATCH] Factor out task execution and formalize permanent failures Summary: - Clean up a TODO about permanent failures. - Clean up a TODO about failing tasks after too many retries. - Clean up a TODO about testing for bad leases. - Make the lease/retry implementation more flexible and natural. - Make completely bogus tasks fail permanently. - Make PhabricatorMetaMTAWorker use new `getWaitBeforeRetry()` (as intended), not hackily implement logic in `getRequiredLeaseTime()`. - Document worker hooks for failures and retries. - Provide coverage on everything. Test Plan: Ran unit tests. Ran `bin/phd debug taskmaster`. Reviewers: btrahan Reviewed By: btrahan CC: aran Maniphest Tasks: T2015 Differential Revision: https://secure.phabricator.com/D3859 --- src/__phutil_library_map__.php | 6 + .../metamta/PhabricatorMetaMTAWorker.php | 6 +- .../workers/PhabricatorTaskmasterDaemon.php | 37 +--- .../daemon/workers/PhabricatorWorker.php | 74 ++++++- .../__tests__/PhabricatorTestWorker.php | 55 +++++ .../__tests__/PhabricatorWorkerTestCase.php | 202 ++++++++++++++++++ ...ricatorWorkerPermanentFailureException.php | 21 ++ .../query/PhabricatorWorkerLeaseQuery.php | 7 +- .../storage/PhabricatorWorkerActiveTask.php | 79 ++++++- .../workers/storage/PhabricatorWorkerTask.php | 10 + 10 files changed, 452 insertions(+), 45 deletions(-) create mode 100644 src/infrastructure/daemon/workers/__tests__/PhabricatorTestWorker.php create mode 100644 src/infrastructure/daemon/workers/__tests__/PhabricatorWorkerTestCase.php create mode 100644 src/infrastructure/daemon/workers/exception/PhabricatorWorkerPermanentFailureException.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index 2443dda263..7a0413305c 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -1105,6 +1105,7 @@ phutil_register_library_map(array( 'PhabricatorSyntaxHighlighter' => 'infrastructure/markup/PhabricatorSyntaxHighlighter.php', 'PhabricatorTaskmasterDaemon' => 'infrastructure/daemon/workers/PhabricatorTaskmasterDaemon.php', 'PhabricatorTestCase' => 'infrastructure/testing/PhabricatorTestCase.php', + 'PhabricatorTestWorker' => 'infrastructure/daemon/workers/__tests__/PhabricatorTestWorker.php', 'PhabricatorTimelineCursor' => 'infrastructure/daemon/timeline/storage/PhabricatorTimelineCursor.php', 'PhabricatorTimelineDAO' => 'infrastructure/daemon/timeline/storage/PhabricatorTimelineDAO.php', 'PhabricatorTimelineEvent' => 'infrastructure/daemon/timeline/storage/PhabricatorTimelineEvent.php', @@ -1144,10 +1145,12 @@ phutil_register_library_map(array( 'PhabricatorWorkerArchiveTask' => 'infrastructure/daemon/workers/storage/PhabricatorWorkerArchiveTask.php', 'PhabricatorWorkerDAO' => 'infrastructure/daemon/workers/storage/PhabricatorWorkerDAO.php', 'PhabricatorWorkerLeaseQuery' => 'infrastructure/daemon/workers/query/PhabricatorWorkerLeaseQuery.php', + 'PhabricatorWorkerPermanentFailureException' => 'infrastructure/daemon/workers/exception/PhabricatorWorkerPermanentFailureException.php', 'PhabricatorWorkerTask' => 'infrastructure/daemon/workers/storage/PhabricatorWorkerTask.php', 'PhabricatorWorkerTaskData' => 'infrastructure/daemon/workers/storage/PhabricatorWorkerTaskData.php', 'PhabricatorWorkerTaskDetailController' => 'applications/daemon/controller/PhabricatorWorkerTaskDetailController.php', 'PhabricatorWorkerTaskUpdateController' => 'applications/daemon/controller/PhabricatorWorkerTaskUpdateController.php', + 'PhabricatorWorkerTestCase' => 'infrastructure/daemon/workers/__tests__/PhabricatorWorkerTestCase.php', 'PhabricatorXHPASTViewController' => 'applications/xhpastview/controller/PhabricatorXHPASTViewController.php', 'PhabricatorXHPASTViewDAO' => 'applications/xhpastview/storage/PhabricatorXHPASTViewDAO.php', 'PhabricatorXHPASTViewFrameController' => 'applications/xhpastview/controller/PhabricatorXHPASTViewFrameController.php', @@ -2268,6 +2271,7 @@ phutil_register_library_map(array( 'PhabricatorSymbolNameLinter' => 'ArcanistXHPASTLintNamingHook', 'PhabricatorTaskmasterDaemon' => 'PhabricatorDaemon', 'PhabricatorTestCase' => 'ArcanistPhutilTestCase', + 'PhabricatorTestWorker' => 'PhabricatorWorker', 'PhabricatorTimelineCursor' => 'PhabricatorTimelineDAO', 'PhabricatorTimelineDAO' => 'PhabricatorLiskDAO', 'PhabricatorTimelineEvent' => 'PhabricatorTimelineDAO', @@ -2307,10 +2311,12 @@ phutil_register_library_map(array( 'PhabricatorWorkerArchiveTask' => 'PhabricatorWorkerTask', 'PhabricatorWorkerDAO' => 'PhabricatorLiskDAO', 'PhabricatorWorkerLeaseQuery' => 'PhabricatorQuery', + 'PhabricatorWorkerPermanentFailureException' => 'Exception', 'PhabricatorWorkerTask' => 'PhabricatorWorkerDAO', 'PhabricatorWorkerTaskData' => 'PhabricatorWorkerDAO', 'PhabricatorWorkerTaskDetailController' => 'PhabricatorDaemonController', 'PhabricatorWorkerTaskUpdateController' => 'PhabricatorDaemonController', + 'PhabricatorWorkerTestCase' => 'PhabricatorTestCase', 'PhabricatorXHPASTViewController' => 'PhabricatorController', 'PhabricatorXHPASTViewDAO' => 'PhabricatorLiskDAO', 'PhabricatorXHPASTViewFrameController' => 'PhabricatorXHPASTViewController', diff --git a/src/applications/metamta/PhabricatorMetaMTAWorker.php b/src/applications/metamta/PhabricatorMetaMTAWorker.php index 10643b0d17..4b1e11782c 100644 --- a/src/applications/metamta/PhabricatorMetaMTAWorker.php +++ b/src/applications/metamta/PhabricatorMetaMTAWorker.php @@ -21,7 +21,7 @@ final class PhabricatorMetaMTAWorker private $message; - public function getRequiredLeaseTime() { + public function getWaitBeforeRetry(PhabricatorWorkerTask $task) { $message_id = $this->getTaskData(); $this->message = id(new PhabricatorMetaMTAMail())->loadOneWhere( @@ -30,8 +30,8 @@ final class PhabricatorMetaMTAWorker return null; } - $lease_duration = max($this->message->getNextRetry() - time(), 0) + 15; - return $lease_duration; + $wait = max($this->message->getNextRetry() - time(), 0) + 15; + return $wait; } public function doWork() { diff --git a/src/infrastructure/daemon/workers/PhabricatorTaskmasterDaemon.php b/src/infrastructure/daemon/workers/PhabricatorTaskmasterDaemon.php index 3ceae21f6c..f9f275acd5 100644 --- a/src/infrastructure/daemon/workers/PhabricatorTaskmasterDaemon.php +++ b/src/infrastructure/daemon/workers/PhabricatorTaskmasterDaemon.php @@ -32,40 +32,13 @@ final class PhabricatorTaskmasterDaemon extends PhabricatorDaemon { $this->log("Working on task {$id} ({$class})..."); - // TODO: We should detect if we acquired a task with an expired lease - // and log about it / bump up failure count. - - // TODO: We should detect if we acquired a task with an excessive - // failure count and fail it permanently. - - $data = $task->getData(); - try { - if (!class_exists($class) || - !is_subclass_of($class, 'PhabricatorWorker')) { - throw new Exception( - "Task class '{$class}' does not extend PhabricatorWorker."); - } - $worker = newv($class, array($data)); - - $lease = $worker->getRequiredLeaseTime(); - if ($lease !== null) { - $task->setLeaseDuration($lease); - } - - $t_start = microtime(true); - $worker->executeTask(); - $t_end = microtime(true); - - $task->archiveTask( - PhabricatorWorkerArchiveTask::RESULT_SUCCESS, - (int)(1000000 * ($t_end - $t_start))); - $this->log("Task {$id} complete! Moved to archive."); - } catch (Exception $ex) { - $task->setFailureCount($task->getFailureCount() + 1); - $task->save(); - + $task = $task->executeTask(); + $ex = $task->getExecutionException(); + if ($ex) { $this->log("Task {$id} failed!"); throw $ex; + } else { + $this->log("Task {$id} complete! Moved to archive."); } } diff --git a/src/infrastructure/daemon/workers/PhabricatorWorker.php b/src/infrastructure/daemon/workers/PhabricatorWorker.php index 123028c558..703fae925d 100644 --- a/src/infrastructure/daemon/workers/PhabricatorWorker.php +++ b/src/infrastructure/daemon/workers/PhabricatorWorker.php @@ -16,10 +16,78 @@ * limitations under the License. */ +/** + * @task config Configuring Retries and Failures + * + * @group worker + */ abstract class PhabricatorWorker { private $data; + +/* -( Configuring Retries and Failures )----------------------------------- */ + + + /** + * Return the number of seconds this worker needs hold a lease on the task for + * while it performs work. For most tasks you can leave this at `null`, which + * will give you a short default lease (currently 60 seconds). + * + * For tasks which may take a very long time to complete, you should return + * an upper bound on the amount of time the task may require. + * + * @return int|null Number of seconds this task needs to remain leased for, + * or null for a default (currently 60 second) lease. + * + * @task config + */ + public function getRequiredLeaseTime() { + return null; + } + + + /** + * Return the maximum number of times this task may be retried before it + * is considered permanently failed. By default, tasks retry indefinitely. You + * can throw a @{class:PhabricatorWorkerPermanentFailureException} to cause an + * immediate permanent failure. + * + * @return int|null Number of times the task will retry before permanent + * failure. Return `null` to retry indefinitely. + * + * @task config + */ + public function getMaximumRetryCount() { + return null; + } + + + /** + * Return the number of seconds a task should wait after a failure before + * retrying. For most tasks you can leave this at `null`, which will give you + * a short default retry period (currently 60 seconds). + * + * @param PhabricatorWorkerTask The task itself. This object is probably + * useful mostly to examine the failure + * count if you want to implement staggered + * retries, or to examine the execution + * exception if you want to react to + * different failures in different ways. + * @param Exception The exception which caused the failure. + * @return int|null Number of seconds to wait between retries, + * or null for a default retry period + * (currently 60 seconds). + * + * @task config + */ + public function getWaitBeforeRetry(PhabricatorWorkerTask $task) { + return null; + } + + abstract protected function doWork(); + + final public function __construct($data) { $this->data = $data; } @@ -32,12 +100,6 @@ abstract class PhabricatorWorker { $this->doWork(); } - public function getRequiredLeaseTime() { - return null; - } - - abstract protected function doWork(); - final public static function scheduleTask($task_class, $data) { return id(new PhabricatorWorkerActiveTask()) ->setTaskClass($task_class) diff --git a/src/infrastructure/daemon/workers/__tests__/PhabricatorTestWorker.php b/src/infrastructure/daemon/workers/__tests__/PhabricatorTestWorker.php new file mode 100644 index 0000000000..87141f17c2 --- /dev/null +++ b/src/infrastructure/daemon/workers/__tests__/PhabricatorTestWorker.php @@ -0,0 +1,55 @@ +getTaskData(), + 'getRequiredLeaseTime', + parent::getRequiredLeaseTime()); + } + + public function getMaximumRetryCount() { + return idx( + $this->getTaskData(), + 'getMaximumRetryCount', + parent::getMaximumRetryCount()); + } + + public function getWaitBeforeRetry(PhabricatorWorkerTask $task) { + return idx( + $this->getTaskData(), + 'getWaitBeforeRetry', + parent::getWaitBeforeRetry($task)); + } + + protected function doWork() { + switch (idx($this->getTaskData(), 'doWork')) { + case 'fail-temporary': + throw new Exception( + "Temporary failure!"); + case 'fail-permanent': + throw new PhabricatorWorkerPermanentFailureException( + "Permanent failure!"); + default: + return; + } + } + +} diff --git a/src/infrastructure/daemon/workers/__tests__/PhabricatorWorkerTestCase.php b/src/infrastructure/daemon/workers/__tests__/PhabricatorWorkerTestCase.php new file mode 100644 index 0000000000..54a0fa2712 --- /dev/null +++ b/src/infrastructure/daemon/workers/__tests__/PhabricatorWorkerTestCase.php @@ -0,0 +1,202 @@ + true, + ); + } + + public function testLeaseTask() { + // Leasing should work. + + $task = $this->scheduleTask(); + + $this->expectNextLease($task); + } + + public function testMultipleLease() { + // We should not be able to lease a task multiple times. + + $task = $this->scheduleTask(); + + $this->expectNextLease($task); + $this->expectNextLease(null); + } + + public function testOldestFirst() { + // Older tasks should lease first, all else being equal. + + $task1 = $this->scheduleTask(); + $task2 = $this->scheduleTask(); + + $this->expectNextLease($task1)->executeTask(); + $this->expectNextLease($task2); + } + + public function testNewBeforeLeased() { + // Tasks not previously leased should lease before previously leased tasks. + + $task1 = $this->scheduleTask(); + $task2 = $this->scheduleTask(); + + $task1->setLeaseOwner('test'); + $task1->setLeaseExpires(time() - 100000); + $task1->forceSaveWithoutLease(); + + $this->expectNextLease($task2)->executeTask(); + $this->expectNextLease($task1); + } + + + public function testExecuteTask() { + $task = $this->scheduleAndExecuteTask(); + + $this->assertEqual(true, $task->isArchived()); + $this->assertEqual( + PhabricatorWorkerArchiveTask::RESULT_SUCCESS, + $task->getResult()); + } + + public function testPermanentTaskFailure() { + $task = $this->scheduleAndExecuteTask( + array( + 'doWork' => 'fail-permanent', + )); + + $this->assertEqual(true, $task->isArchived()); + $this->assertEqual( + PhabricatorWorkerArchiveTask::RESULT_FAILURE, + $task->getResult()); + } + + public function testTemporaryTaskFailure() { + $task = $this->scheduleAndExecuteTask( + array( + 'doWork' => 'fail-temporary', + )); + + $this->assertEqual(false, $task->isArchived()); + $this->assertEqual( + true, + ($task->getExecutionException() instanceof Exception)); + } + + public function testTooManyTaskFailures() { + // Expect temporary failures, then a permanent failure. + $task = $this->scheduleAndExecuteTask( + array( + 'doWork' => 'fail-temporary', + 'getMaximumRetryCount' => 3, + 'getWaitBeforeRetry' => -60, + )); + + // Temporary... + $this->assertEqual(false, $task->isArchived()); + $this->assertEqual( + true, + ($task->getExecutionException() instanceof Exception)); + $this->assertEqual(1, $task->getFailureCount()); + + // Temporary... + $task = $this->expectNextLease($task); + $task = $task->executeTask(); + $this->assertEqual(false, $task->isArchived()); + $this->assertEqual( + true, + ($task->getExecutionException() instanceof Exception)); + $this->assertEqual(2, $task->getFailureCount()); + + // Temporary... + $task = $this->expectNextLease($task); + $task = $task->executeTask(); + $this->assertEqual(false, $task->isArchived()); + $this->assertEqual( + true, + ($task->getExecutionException() instanceof Exception)); + $this->assertEqual(3, $task->getFailureCount()); + + // Temporary... + $task = $this->expectNextLease($task); + $task = $task->executeTask(); + $this->assertEqual(false, $task->isArchived()); + $this->assertEqual( + true, + ($task->getExecutionException() instanceof Exception)); + $this->assertEqual(4, $task->getFailureCount()); + + // Permanent. + $task = $this->expectNextLease($task); + $task = $task->executeTask(); + $this->assertEqual(true, $task->isArchived()); + $this->assertEqual( + PhabricatorWorkerArchiveTask::RESULT_FAILURE, + $task->getResult()); + } + + public function testWaitBeforeRetry() { + $task = $this->scheduleTask( + array( + 'doWork' => 'fail-temporary', + 'getWaitBeforeRetry' => 1000000, + )); + + $this->expectNextLease($task)->executeTask(); + $this->expectNextLease(null); + } + + public function testRequiredLeaseTime() { + $task = $this->scheduleAndExecuteTask( + array( + 'getRequiredLeaseTime' => 1000000, + )); + + $this->assertEqual(true, ($task->getLeaseExpires() - time()) > 1000); + } + + private function expectNextLease($task) { + $leased = id(new PhabricatorWorkerLeaseQuery()) + ->setLimit(1) + ->execute(); + + if ($task === null) { + $this->assertEqual(0, count($leased)); + return null; + } else { + $this->assertEqual(1, count($leased)); + $this->assertEqual( + (int)head($leased)->getID(), + (int)$task->getID()); + return head($leased); + } + } + + private function scheduleAndExecuteTask(array $data = array()) { + $task = $this->scheduleTask($data); + $task = $this->expectNextLease($task); + $task = $task->executeTask(); + return $task; + } + + private function scheduleTask(array $data = array()) { + return PhabricatorWorker::scheduleTask('PhabricatorTestWorker', $data); + } + +} diff --git a/src/infrastructure/daemon/workers/exception/PhabricatorWorkerPermanentFailureException.php b/src/infrastructure/daemon/workers/exception/PhabricatorWorkerPermanentFailureException.php new file mode 100644 index 0000000000..ae457d982e --- /dev/null +++ b/src/infrastructure/daemon/workers/exception/PhabricatorWorkerPermanentFailureException.php @@ -0,0 +1,21 @@ +getTableName(), $lease_ownership_name, + self::DEFAULT_LEASE_DURATION, $this->buildWhereClause($conn_w, $phase), $this->buildOrderClause($conn_w), $this->buildLimitClause($conn_w, $limit - $leased)); @@ -118,7 +121,7 @@ final class PhabricatorWorkerLeaseQuery extends PhabricatorQuery { case self::PHASE_UNLEASED: $where[] = 'leaseOwner IS NULL'; break; - case self::PHASE_Expired: + case self::PHASE_EXPIRED: $where[] = 'leaseExpires < UNIX_TIMESTAMP()'; break; default: diff --git a/src/infrastructure/daemon/workers/storage/PhabricatorWorkerActiveTask.php b/src/infrastructure/daemon/workers/storage/PhabricatorWorkerActiveTask.php index cf6f14db92..51eabb73ca 100644 --- a/src/infrastructure/daemon/workers/storage/PhabricatorWorkerActiveTask.php +++ b/src/infrastructure/daemon/workers/storage/PhabricatorWorkerActiveTask.php @@ -38,20 +38,28 @@ final class PhabricatorWorkerActiveTask extends PhabricatorWorkerTask { } public function setLeaseDuration($lease_duration) { + $this->checkLease(); $server_lease_expires = $this->serverTime + $lease_duration; $this->setLeaseExpires($server_lease_expires); - return $this->save(); + + // NOTE: This is primarily to allow unit tests to set negative lease + // durations so they don't have to wait around for leases to expire. We + // check that the lease is valid above. + return $this->forceSaveWithoutLease(); } public function save() { $this->checkLease(); + return $this->forceSaveWithoutLease(); + } + public function forceSaveWithoutLease() { $is_new = !$this->getID(); if ($is_new) { $this->failureCount = 0; } - if ($is_new && $this->getData()) { + if ($is_new && ($this->getData() !== null)) { $data = new PhabricatorWorkerTaskData(); $data->setData($this->getData()); $data->save(); @@ -101,4 +109,71 @@ final class PhabricatorWorkerActiveTask extends PhabricatorWorkerTask { return $archive; } + public function executeTask() { + // We do this outside of the try .. catch because we don't have permission + // to release the lease otherwise. + $this->checkLease(); + + try { + $id = $this->getID(); + $class = $this->getTaskClass(); + + if (!class_exists($class)) { + throw new PhabricatorWorkerPermanentFailureException( + "Task class '{$class}' does not exist!"); + } + + if (!is_subclass_of($class, 'PhabricatorWorker')) { + throw new PhabricatorWorkerPermanentFailureException( + "Task class '{$class}' does not extend PhabricatorWorker."); + } + + $worker = newv($class, array($this->getData())); + + $maximum_failures = $worker->getMaximumRetryCount(); + if ($maximum_failures !== null) { + if ($this->getFailureCount() > $maximum_failures) { + throw new PhabricatorWorkerPermanentFailureException( + "Task {$id} has exceeded the maximum number of failures ". + "({$maximum_failures})."); + } + } + + $lease = $worker->getRequiredLeaseTime(); + if ($lease !== null) { + $this->setLeaseDuration($lease); + } + + $t_start = microtime(true); + $worker->executeTask(); + $t_end = microtime(true); + $duration = (int)(1000000 * ($t_end - $t_start)); + + $result = $this->archiveTask( + PhabricatorWorkerArchiveTask::RESULT_SUCCESS, + $duration); + } catch (PhabricatorWorkerPermanentFailureException $ex) { + $result = $this->archiveTask( + PhabricatorWorkerArchiveTask::RESULT_FAILURE, + 0); + $result->setExecutionException($ex); + } catch (Exception $ex) { + $this->setExecutionException($ex); + $this->setFailureCount($this->getFailureCount() + 1); + + $retry = $worker->getWaitBeforeRetry($this); + $retry = coalesce( + $retry, + PhabricatorWorkerLeaseQuery::DEFAULT_LEASE_DURATION); + + // NOTE: As a side effect, this saves the object. + $this->setLeaseDuration($retry); + + $result = $this; + } + + return $result; + } + + } diff --git a/src/infrastructure/daemon/workers/storage/PhabricatorWorkerTask.php b/src/infrastructure/daemon/workers/storage/PhabricatorWorkerTask.php index 324ec32b49..a5cd7609e0 100644 --- a/src/infrastructure/daemon/workers/storage/PhabricatorWorkerTask.php +++ b/src/infrastructure/daemon/workers/storage/PhabricatorWorkerTask.php @@ -27,6 +27,16 @@ abstract class PhabricatorWorkerTask extends PhabricatorWorkerDAO { protected $dataID; private $data; + private $executionException; + + public function setExecutionException(Exception $execution_exception) { + $this->executionException = $execution_exception; + return $this; + } + + public function getExecutionException() { + return $this->executionException; + } public function setData($data) { $this->data = $data;