From 82cf97ad65a910cb55e9c53c3e80f0740a724a10 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 12 Aug 2019 12:07:13 -0700 Subject: [PATCH] When many commits are discovered at once, import them at lower priority Summary: Ref T13369. See that task for discussion. When the discovery daemon finds more than 64 commits to import, demote the worker queue priority of the resulting tasks. Test Plan: - Pushed one commit, ran `bin/repository discover --verbose --trace ...`, saw commit import with "at normal priority" message and priority 2500 ("PRIORITY_COMMIT"). - Pushed 3 commits, set threshold to 3, ran `bin/repository discover ...`, saw commist import with "at lower priority" message and priority 4000 ("PRIORITY_IMPORT"). Maniphest Tasks: T13369 Differential Revision: https://secure.phabricator.com/D20712 --- .../PhabricatorRepositoryDiscoveryEngine.php | 106 +++++++++++++----- .../storage/PhabricatorRepository.php | 2 + 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php index c1bb9190b3..790b95d775 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php @@ -93,6 +93,8 @@ final class PhabricatorRepositoryDiscoveryEngine // Clear the working set cache. $this->workingSet = array(); + $task_priority = $this->getImportTaskPriority($repository, $refs); + // Record discovered commits and mark them in the cache. foreach ($refs as $ref) { $this->recordCommit( @@ -100,7 +102,8 @@ final class PhabricatorRepositoryDiscoveryEngine $ref->getIdentifier(), $ref->getEpoch(), $ref->getCanCloseImmediately(), - $ref->getParents()); + $ref->getParents(), + $task_priority); $this->commitCache[$ref->getIdentifier()] = true; } @@ -536,7 +539,8 @@ final class PhabricatorRepositoryDiscoveryEngine $commit_identifier, $epoch, $close_immediately, - array $parents) { + array $parents, + $task_priority) { $commit = new PhabricatorRepositoryCommit(); $conn_w = $repository->establishConnection('w'); @@ -559,7 +563,7 @@ final class PhabricatorRepositoryDiscoveryEngine $commit_identifier); // After reviving a commit, schedule new daemons for it. - $this->didDiscoverCommit($repository, $commit, $epoch); + $this->didDiscoverCommit($repository, $commit, $epoch, $task_priority); return; } @@ -620,7 +624,7 @@ final class PhabricatorRepositoryDiscoveryEngine } $commit->saveTransaction(); - $this->didDiscoverCommit($repository, $commit, $epoch); + $this->didDiscoverCommit($repository, $commit, $epoch, $task_priority); if ($this->repairMode) { // Normally, the query should throw a duplicate key exception. If we @@ -648,9 +652,10 @@ final class PhabricatorRepositoryDiscoveryEngine private function didDiscoverCommit( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, - $epoch) { + $epoch, + $task_priority) { - $this->insertTask($repository, $commit); + $this->insertTask($repository, $commit, $task_priority); // Update the repository summary table. queryfx( @@ -677,6 +682,7 @@ final class PhabricatorRepositoryDiscoveryEngine private function insertTask( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit, + $task_priority, $data = array()) { $vcs = $repository->getVersionControlSystem(); @@ -696,27 +702,6 @@ final class PhabricatorRepositoryDiscoveryEngine $data['commitID'] = $commit->getID(); - // If the repository is importing for the first time, we schedule tasks - // at IMPORT priority, which is very low. Making progress on importing a - // new repository for the first time is less important than any other - // daemon task. - - // If the repository has finished importing and we're just catching up - // on recent commits, we schedule discovery at COMMIT priority, which is - // slightly below the default priority. - - // Note that followup tasks and triggered tasks (like those generated by - // Herald or Harbormaster) will queue at DEFAULT priority, so that each - // commit tends to fully import before we start the next one. This tends - // to give imports fairly predictable progress. See T11677 for some - // discussion. - - if ($repository->isImporting()) { - $task_priority = PhabricatorWorker::PRIORITY_IMPORT; - } else { - $task_priority = PhabricatorWorker::PRIORITY_COMMIT; - } - $options = array( 'priority' => $task_priority, ); @@ -934,4 +919,71 @@ final class PhabricatorRepositoryDiscoveryEngine $data['epoch']); } + private function getImportTaskPriority( + PhabricatorRepository $repository, + array $refs) { + + // If the repository is importing for the first time, we schedule tasks + // at IMPORT priority, which is very low. Making progress on importing a + // new repository for the first time is less important than any other + // daemon task. + + // If the repository has finished importing and we're just catching up + // on recent commits, we usually schedule discovery at COMMIT priority, + // which is slightly below the default priority. + + // Note that followup tasks and triggered tasks (like those generated by + // Herald or Harbormaster) will queue at DEFAULT priority, so that each + // commit tends to fully import before we start the next one. This tends + // to give imports fairly predictable progress. See T11677 for some + // discussion. + + if ($repository->isImporting()) { + $this->log( + pht( + 'Importing %s commit(s) at low priority ("PRIORITY_IMPORT") '. + 'because this repository is still importing.', + phutil_count($refs))); + + return PhabricatorWorker::PRIORITY_IMPORT; + } + + // See T13369. If we've discovered a lot of commits at once, import them + // at lower priority. + + // This is mostly aimed at reducing the impact that synchronizing thousands + // of commits from a remote upstream has on other repositories. The queue + // is "mostly FIFO", so queueing a thousand commit imports can stall other + // repositories. + + // In a perfect world we'd probably give repositories round-robin queue + // priority, but we don't currently have the primitives for this and there + // isn't a strong case for building them. + + // Use "a whole lot of commits showed up at once" as a heuristic for + // detecting "someone synchronized an upstream", and import them at a lower + // priority to more closely approximate fair scheduling. + + if (count($refs) >= PhabricatorRepository::LOWPRI_THRESHOLD) { + $this->log( + pht( + 'Importing %s commit(s) at low priority ("PRIORITY_IMPORT") '. + 'because many commits were discovered at once.', + phutil_count($refs))); + + return PhabricatorWorker::PRIORITY_IMPORT; + } + + // Otherwise, import at normal priority. + + if ($refs) { + $this->log( + pht( + 'Importing %s commit(s) at normal priority ("PRIORITY_COMMIT").', + phutil_count($refs))); + } + + return PhabricatorWorker::PRIORITY_COMMIT; + } + } diff --git a/src/applications/repository/storage/PhabricatorRepository.php b/src/applications/repository/storage/PhabricatorRepository.php index 30eb56cfd5..9661584851 100644 --- a/src/applications/repository/storage/PhabricatorRepository.php +++ b/src/applications/repository/storage/PhabricatorRepository.php @@ -34,6 +34,8 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO */ const IMPORT_THRESHOLD = 7; + const LOWPRI_THRESHOLD = 64; + const TABLE_PATH = 'repository_path'; const TABLE_PATHCHANGE = 'repository_pathchange'; const TABLE_FILESYSTEM = 'repository_filesystem';