From 51073b972ecef4f502bbc05639427401973623f5 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 5 Oct 2018 13:17:17 -0700 Subject: [PATCH 1/4] Try to route cluster writes to nodes which won't need to synchronize first Summary: Ref T13109. Ref T13202. See PHI905. See PHI889. When we receive a write to a repository cluster, we currently send it to a random writable node. Instead, we can prefer: - the node currently holding the write lock; or - any node which is already up to date. These should simply be better nodes to take writes in all cases. The write lock is global for the repository, so there's no scaling benefit to spreading writes across different nodes, and these particular nodes will be able to accept the write more quickly. Test Plan: - This is observable by using `fprintf(STDERR, "%s\n", ...)` in the logic, then running `git push`. I'd like to pull this routing logic out of `PhabricatorRepository` at some point, probably into a dedicated `ClusterURIQuery` sort of class, but that is a larger change. - Added some `fprintf(...)` stuff around which nodes were being selected. - Added a `sleep(10)` after grabbing the write lock. - In one window, pushed. Then pushed in a second window. - Saw the second window select the lock holder as the write target based on it currently holding the lock. - Without a concurrent push, saw pushes select up-to-date nodes based on their up-to-date-ness. Reviewers: amckinley Reviewed By: amckinley Subscribers: joshuaspence, timhirsh Maniphest Tasks: T13202, T13109 Differential Revision: https://secure.phabricator.com/D19734 --- .../storage/PhabricatorRepository.php | 66 ++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/src/applications/repository/storage/PhabricatorRepository.php b/src/applications/repository/storage/PhabricatorRepository.php index 8889040ddf..52824c2aa4 100644 --- a/src/applications/repository/storage/PhabricatorRepository.php +++ b/src/applications/repository/storage/PhabricatorRepository.php @@ -2010,12 +2010,72 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO } } - shuffle($results); + if ($writable) { + $results = $this->sortWritableAlmanacServiceURIs($results); + } else { + shuffle($results); + } $result = head($results); return $result['uri']; } + private function sortWritableAlmanacServiceURIs(array $results) { + // See T13109 for discussion of how this method routes requests. + + // In the absence of other rules, we'll send traffic to devices randomly. + // We also want to select randomly among nodes which are equally good + // candidates to receive the write, and accomplish that by shuffling the + // list up front. + shuffle($results); + + $order = array(); + + // If some device is currently holding the write lock, send all requests + // to that device. We're trying to queue writes on a single device so they + // do not need to wait for read synchronization after earlier writes + // complete. + $writer = PhabricatorRepositoryWorkingCopyVersion::loadWriter( + $this->getPHID()); + if ($writer) { + $device_phid = $writer->getWriteProperty('devicePHID'); + foreach ($results as $key => $result) { + if ($result['devicePHID'] === $device_phid) { + $order[] = $key; + } + } + } + + // If no device is currently holding the write lock, try to send requests + // to a device which is already up to date and will not need to synchronize + // before it can accept the write. + $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( + $this->getPHID()); + if ($versions) { + $max_version = (int)max(mpull($versions, 'getRepositoryVersion')); + + $max_devices = array(); + foreach ($versions as $version) { + if ($version->getRepositoryVersion() == $max_version) { + $max_devices[] = $version->getDevicePHID(); + } + } + $max_devices = array_fuse($max_devices); + + foreach ($results as $key => $result) { + if (isset($max_devices[$result['devicePHID']])) { + $order[] = $key; + } + } + } + + // Reorder the results, putting any we've selected as preferred targets for + // the write at the head of the list. + $results = array_select_keys($results, $order) + $results; + + return $results; + } + public function supportsSynchronization() { // TODO: For now, this is only supported for Git. if (!$this->isGit()) { @@ -2036,7 +2096,7 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO $parts = array( "repo({$repository_phid})", "serv({$service_phid})", - 'v2', + 'v3', ); return implode('.', $parts); @@ -2063,12 +2123,14 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO $uri = $this->getClusterRepositoryURIFromBinding($binding); $protocol = $uri->getProtocol(); $device_name = $iface->getDevice()->getName(); + $device_phid = $iface->getDevice()->getPHID(); $uris[] = array( 'protocol' => $protocol, 'uri' => (string)$uri, 'device' => $device_name, 'writable' => (bool)$binding->getAlmanacPropertyValue('writable'), + 'devicePHID' => $device_phid, ); } From bc6c8c0e93a79072b94c8c44bec91fbfc65e10c5 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 5 Oct 2018 13:55:10 -0700 Subject: [PATCH 2/4] Explicitly shuffle nodes before selecting one for cluster sync Summary: Depends on D19734. Ref T13202. Ref T13109. Ref T10884. See PHI905. See PHI889. We currently rank cluster nodes in three cases: # when performing a write, we can go to any node (D19734 should make our ranking good); # when performing a read, we can go to any node (currently random, but T10884 discusses ideas to improve our ranking); # when performing an internal synchronization before a read or a write, we must go to an up-to-date node. Currently, case (3) is not-exactly-deterministic but not random, and we won't spread intracluster traffic acrosss the cluster evenly if, say, half of it is up to date and half of it is still synchronizing. For a given write, I believe all nodes will tend to synchronize from whichever node first received the write today. Instead, shuffle the list and synchronize from any up-to-date node. (I think we could improve upon this only by knowing which nodes actually have load and selecting the least-loaded -- doable, but not trivial.) Test Plan: Poked at it locally, will deploy to `secure`. This is hard to measure/test terribly convincingly. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13202, T13109, T10884 Differential Revision: https://secure.phabricator.com/D19735 --- .../diffusion/protocol/DiffusionRepositoryClusterEngine.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php b/src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php index f85c7862fe..2e5d4215db 100644 --- a/src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php +++ b/src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php @@ -688,6 +688,9 @@ final class DiffusionRepositoryClusterEngine extends Phobject { 'fetchable.')); } + // If we can synchronize from multiple sources, choose one at random. + shuffle($fetchable); + $caught = null; foreach ($fetchable as $binding) { try { From e0dea4c4862f271edf2ce4b7363ac11668a5dc1b Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 19 Oct 2018 04:41:40 -0700 Subject: [PATCH 3/4] Fix `packages(project)` to work properly and add it to "MailableFunctionDatasource" Summary: Ref T13210. See PHI937. This function datasource isn't quite implemented correctly: it doesn't resolve `package(project)` properly, since the logic only handles users. This blames back to D14013, where it looks like `packages(..)` was added mostly as a general nice-to-have as part of a larger modernization change. Test Plan: Ran a `packages(project)` query in Differential, got accurate results (previously: no results). Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13210 Differential Revision: https://secure.phabricator.com/D19747 --- ...catorMetaMTAMailableFunctionDatasource.php | 1 + ...habricatorOwnersPackageOwnerDatasource.php | 26 ++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/applications/metamta/typeahead/PhabricatorMetaMTAMailableFunctionDatasource.php b/src/applications/metamta/typeahead/PhabricatorMetaMTAMailableFunctionDatasource.php index 4b231b959e..b5d82c9749 100644 --- a/src/applications/metamta/typeahead/PhabricatorMetaMTAMailableFunctionDatasource.php +++ b/src/applications/metamta/typeahead/PhabricatorMetaMTAMailableFunctionDatasource.php @@ -23,6 +23,7 @@ final class PhabricatorMetaMTAMailableFunctionDatasource new PhabricatorProjectMembersDatasource(), new PhabricatorProjectDatasource(), new PhabricatorOwnersPackageDatasource(), + new PhabricatorOwnersPackageOwnerDatasource(), ); } diff --git a/src/applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php b/src/applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php index 99e770d266..2a0b16e60f 100644 --- a/src/applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php +++ b/src/applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php @@ -27,7 +27,7 @@ final class PhabricatorOwnersPackageOwnerDatasource 'packages' => array( 'name' => pht('Packages: ...'), 'arguments' => pht('owner'), - 'summary' => pht("Find results in any of an owner's projects."), + 'summary' => pht("Find results in any of an owner's packages."), 'description' => pht( "This function allows you to find results associated with any ". "of the packages a specified user or project is an owner of. ". @@ -61,18 +61,21 @@ final class PhabricatorOwnersPackageOwnerDatasource $phids = $this->resolvePHIDs($phids); - $user_phids = array(); + $owner_phids = array(); foreach ($phids as $key => $phid) { - if (phid_get_type($phid) == PhabricatorPeopleUserPHIDType::TYPECONST) { - $user_phids[] = $phid; - unset($phids[$key]); + switch (phid_get_type($phid)) { + case PhabricatorPeopleUserPHIDType::TYPECONST: + case PhabricatorProjectProjectPHIDType::TYPECONST: + $owner_phids[] = $phid; + unset($phids[$key]); + break; } } - if ($user_phids) { + if ($owner_phids) { $packages = id(new PhabricatorOwnersPackageQuery()) ->setViewer($this->getViewer()) - ->withOwnerPHIDs($user_phids) + ->withOwnerPHIDs($owner_phids) ->execute(); foreach ($packages as $package) { $phids[] = $package->getPHID(); @@ -116,8 +119,13 @@ final class PhabricatorOwnersPackageOwnerDatasource $usernames = array(); foreach ($phids as $key => $phid) { - if (phid_get_type($phid) != PhabricatorPeopleUserPHIDType::TYPECONST) { - $usernames[$key] = $phid; + switch (phid_get_type($phid)) { + case PhabricatorPeopleUserPHIDType::TYPECONST: + case PhabricatorProjectProjectPHIDType::TYPECONST: + break; + default: + $usernames[$key] = $phid; + break; } } From e2cf1e42887afc62631d006726a8ee88beaa56a0 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 19 Oct 2018 13:20:39 -0700 Subject: [PATCH 4/4] Skip copied code detection for changes that are too large for it to be useful Summary: Ref T13210. See PHI944. When parsing certain large diffs (the case in PHI944 is an 2.5-million line JSON diff), we spend ~66% of runtime and ~80% of memory doing copy detection (the little yellow bar which shows up to give you a hint that code was moved around within a diff). This is pretty much pointless and copy hints are almost certainly never useful on large changes. Instead, just bail if the change is larger than some arbitrary "probably too big for copy hints to ever be useful" threshold (here, 65535 lines). Test Plan: Roughly, ran this against a 2.5 million line JSON diff: ``` $changes = id(new ArcanistDiffParser())->parseDiff($raw_diff); $diff = DifferentialDiff::newFromRawChanges($viewer, $changes); ``` Before the changes, it took 20s + 2.5GB RAM to parse. After the changes, it took 7s + 500MB RAM to parse. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13210 Differential Revision: https://secure.phabricator.com/D19748 --- .../engine/DifferentialChangesetEngine.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/applications/differential/engine/DifferentialChangesetEngine.php b/src/applications/differential/engine/DifferentialChangesetEngine.php index e8a55a1b0e..d72db025ad 100644 --- a/src/applications/differential/engine/DifferentialChangesetEngine.php +++ b/src/applications/differential/engine/DifferentialChangesetEngine.php @@ -88,6 +88,20 @@ final class DifferentialChangesetEngine extends Phobject { private function detectCopiedCode(array $changesets) { + // See PHI944. If the total number of changed lines is excessively large, + // don't bother with copied code detection. This can take a lot of time and + // memory and it's not generally of any use for very large changes. + $max_size = 65535; + + $total_size = 0; + foreach ($changesets as $changeset) { + $total_size += ($changeset->getAddLines() + $changeset->getDelLines()); + } + + if ($total_size > $max_size) { + return; + } + $min_width = 30; $min_lines = 3;