From 28ee6b8080fccb8bd001c694558e67e6bed1e69b Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 14 May 2018 06:55:22 -0700 Subject: [PATCH 1/8] Consistently require MFA on the actual user creation flow Summary: See . We currently require MFA on the screen leading into the user create flow, but not the actual create flow. That is, `/people/create/` (which is just a "choose a type of account" page) requires MFA, but `/people/new//` does not, even though this is the actual creation page. Requiring MFA to create users isn't especially critical: creating users isn't really a dangerous action. The major threat is probably just that an attacker can extend their access to an install by creating an account which they have credentials for. It also isn't consistently enforced: you can invite users or approve users without an MFA check. So there's an argument for just removing the check. However, I think the check is probably reasonable and that we'd likely prefer to add some more checks eventually (e.g., require MFA to approve or invite) since these actions are rare and could represent useful tools for an attacker even if they are not especially dangerous on their own. This is also the only way to create bot or mailing list accounts, so this check does //something// on its own, at least. Test Plan: - Visited `/people/new/standard/` as an admin with MFA configured. - Before patch: no MFA prompt. - After patch: MFA prompt. Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D19448 --- .../people/controller/PhabricatorPeopleNewController.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/applications/people/controller/PhabricatorPeopleNewController.php b/src/applications/people/controller/PhabricatorPeopleNewController.php index 9f52cf567a..3521fb840f 100644 --- a/src/applications/people/controller/PhabricatorPeopleNewController.php +++ b/src/applications/people/controller/PhabricatorPeopleNewController.php @@ -7,6 +7,11 @@ final class PhabricatorPeopleNewController $type = $request->getURIData('type'); $admin = $request->getUser(); + id(new PhabricatorAuthSessionEngine())->requireHighSecuritySession( + $admin, + $request, + $this->getApplicationURI()); + $is_bot = false; $is_list = false; switch ($type) { From 29df80b48f163c236217f1024120411685ba5605 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 14 May 2018 08:45:22 -0700 Subject: [PATCH 2/8] Fix a fatal during breadcrumb construction when viewing a dashboard you don't have permission to view Summary: Ref PHI662. Viewing a dashboard you don't have permission to view (in the Dashboard application) currently fatals while building crumbs, since we fail to build the ` ... > Dashboard 123 > ...` crumb. Test Plan: - Viewed a dashboard I didn't have permission to view in the Dashboards application. - Before patch, fatal when calling `getID()` on a non-object. - After patch, sensible policy error page. - Viewed a dashboard I can view, saw sensible crumbs. Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D19449 --- .../PhabricatorDashboardProfileController.php | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/applications/dashboard/controller/PhabricatorDashboardProfileController.php b/src/applications/dashboard/controller/PhabricatorDashboardProfileController.php index 6a23b351ac..adc85efdf3 100644 --- a/src/applications/dashboard/controller/PhabricatorDashboardProfileController.php +++ b/src/applications/dashboard/controller/PhabricatorDashboardProfileController.php @@ -44,13 +44,16 @@ abstract class PhabricatorDashboardProfileController } protected function buildApplicationCrumbs() { - $dashboard = $this->getDashboard(); - $id = $dashboard->getID(); - $dashboard_uri = $this->getApplicationURI("/view/{$id}/"); - $crumbs = parent::buildApplicationCrumbs(); - $crumbs->addTextCrumb($dashboard->getName(), $dashboard_uri); $crumbs->setBorder(true); + + $dashboard = $this->getDashboard(); + if ($dashboard) { + $id = $dashboard->getID(); + $dashboard_uri = $this->getApplicationURI("/view/{$id}/"); + $crumbs->addTextCrumb($dashboard->getName(), $dashboard_uri); + } + return $crumbs; } From 7aa12f192a6bde7edc34cc230b37db94182b2927 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 14 May 2018 09:16:09 -0700 Subject: [PATCH 3/8] Add PhabricatorQueryIterator, for buffered iteration over a CursorPagedPolicyAwareQuery Summary: See D19446. This should make it easier to process larger, more complex result sets in constant memory. Today, `LiskMigrationIterator` takes constant memory but can't apply `needX()` reqeusts or `withY(...)` constraints. Using a raw `Query` can handle this stuff, but requires memory proportional to the size of the result set. Offer the best of both worlds: constant memory and full access to the power of `Query` classes. Test Plan: Used this script to iterate over every commit, saw sensible behavior: ```name=list-commits.php setViewer($viewer); $iterator = new PhabricatorQueryIterator($query); foreach ($iterator as $commit) { echo $commit->getID()."\n"; } ``` Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D19450 --- src/__phutil_library_map__.php | 2 + .../storage/lisk/PhabricatorQueryIterator.php | 41 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 src/infrastructure/storage/lisk/PhabricatorQueryIterator.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index e1b13dbecb..fab67c4087 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -4032,6 +4032,7 @@ phutil_register_library_map(array( 'PhabricatorPygmentSetupCheck' => 'applications/config/check/PhabricatorPygmentSetupCheck.php', 'PhabricatorQuery' => 'infrastructure/query/PhabricatorQuery.php', 'PhabricatorQueryConstraint' => 'infrastructure/query/constraint/PhabricatorQueryConstraint.php', + 'PhabricatorQueryIterator' => 'infrastructure/storage/lisk/PhabricatorQueryIterator.php', 'PhabricatorQueryOrderItem' => 'infrastructure/query/order/PhabricatorQueryOrderItem.php', 'PhabricatorQueryOrderTestCase' => 'infrastructure/query/order/__tests__/PhabricatorQueryOrderTestCase.php', 'PhabricatorQueryOrderVector' => 'infrastructure/query/order/PhabricatorQueryOrderVector.php', @@ -9876,6 +9877,7 @@ phutil_register_library_map(array( 'PhabricatorPygmentSetupCheck' => 'PhabricatorSetupCheck', 'PhabricatorQuery' => 'Phobject', 'PhabricatorQueryConstraint' => 'Phobject', + 'PhabricatorQueryIterator' => 'PhutilBufferedIterator', 'PhabricatorQueryOrderItem' => 'Phobject', 'PhabricatorQueryOrderTestCase' => 'PhabricatorTestCase', 'PhabricatorQueryOrderVector' => array( diff --git a/src/infrastructure/storage/lisk/PhabricatorQueryIterator.php b/src/infrastructure/storage/lisk/PhabricatorQueryIterator.php new file mode 100644 index 0000000000..c48600eb02 --- /dev/null +++ b/src/infrastructure/storage/lisk/PhabricatorQueryIterator.php @@ -0,0 +1,41 @@ +query = $query; + } + + protected function didRewind() { + $this->pager = new AphrontCursorPagerView(); + } + + public function key() { + return $this->current()->getID(); + } + + protected function loadPage() { + if (!$this->pager) { + return array(); + } + + $pager = clone $this->pager; + $query = clone $this->query; + + $results = $query->executeWithCursorPager($pager); + + // If we got less than a full page of results, this was the last set of + // results. Throw away the pager so we end iteration. + if (count($results) < $pager->getPageSize()) { + $this->pager = null; + } else { + $this->pager->setAfterID($pager->getNextPageID()); + } + + return $results; + } + +} From 3544620209cf25fc556c74e0df1c681ab4cdd44b Mon Sep 17 00:00:00 2001 From: epriestley Date: Tue, 15 May 2018 15:37:40 -0700 Subject: [PATCH 4/8] Parse unusual Subversion protocol frames which contain extra whitespace Summary: Fixes T13140. See PHI660. Recent versions of Subversion can send a `(get-file true false false )` protocol frame with extra space between "false" and "false". This is allowed by the protocol spec, but never normally happens, and we do not parse it correctly. Instead, parse it correctly. Test Plan: - Added unit tests. - Ran `svn proplist svn+ssh://.../diffusion/X/file.c` under SVN 1.10 before and after the change. - Before: indefinite hang. - After: completed in finite time. Reviewers: amckinley, asherkin Reviewed By: amckinley, asherkin Maniphest Tasks: T13140 Differential Revision: https://secure.phabricator.com/D19451 --- .../DiffusionSubversionWireProtocol.php | 27 ++++++++- ...iffusionSubversionWireProtocolTestCase.php | 59 ++++++++++++++++++- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/src/applications/diffusion/protocol/DiffusionSubversionWireProtocol.php b/src/applications/diffusion/protocol/DiffusionSubversionWireProtocol.php index 1cb0f107aa..251967f314 100644 --- a/src/applications/diffusion/protocol/DiffusionSubversionWireProtocol.php +++ b/src/applications/diffusion/protocol/DiffusionSubversionWireProtocol.php @@ -33,7 +33,24 @@ final class DiffusionSubversionWireProtocol extends Phobject { $messages = array(); while (true) { - if ($this->state == 'item') { + if ($this->state == 'space') { + // Consume zero or more extra spaces after matching an item. The + // protocol requires at least one space, but allows more than one. + + $matches = null; + if (!preg_match('/^(\s*)\S/', $this->buffer, $matches)) { + // Wait for more data. + break; + } + + // We have zero or more spaces and then some other character, so throw + // the spaces away and continue parsing frames. + if (strlen($matches[1])) { + $this->buffer = substr($this->buffer, strlen($matches[1])); + } + + $this->state = 'item'; + } else if ($this->state == 'item') { $match = null; $result = null; $buf = $this->buffer; @@ -69,6 +86,12 @@ final class DiffusionSubversionWireProtocol extends Phobject { ); $this->raw = ''; } + + // Consume any extra whitespace after an item. If we're in the + // "bytes" state, we aren't looking for whitespace. + if ($this->state == 'item') { + $this->state = 'space'; + } } else { // No matches yet, wait for more data. break; @@ -90,7 +113,7 @@ final class DiffusionSubversionWireProtocol extends Phobject { // Strip off the terminal space. $this->pushItem(substr($this->byteBuffer, 0, -1), 'string'); $this->byteBuffer = ''; - $this->state = 'item'; + $this->state = 'space'; } } else { throw new Exception(pht("Invalid state '%s'!", $this->state)); diff --git a/src/applications/diffusion/protocol/__tests__/DiffusionSubversionWireProtocolTestCase.php b/src/applications/diffusion/protocol/__tests__/DiffusionSubversionWireProtocolTestCase.php index bd99b82035..f4309b0e7b 100644 --- a/src/applications/diffusion/protocol/__tests__/DiffusionSubversionWireProtocolTestCase.php +++ b/src/applications/diffusion/protocol/__tests__/DiffusionSubversionWireProtocolTestCase.php @@ -59,6 +59,50 @@ final class DiffusionSubversionWireProtocolTestCase ), ), )); + + // This is testing that multiple spaces are parsed correctly. See T13140 + // for discussion. + $this->assertSameSubversionMessages( + '( get-file true false ) ', + // ^-- Note extra space! + array( + array( + array( + 'type' => 'word', + 'value' => 'get-file', + ), + array( + 'type' => 'word', + 'value' => 'true', + ), + array( + 'type' => 'word', + 'value' => 'false', + ), + ), + ), + '( get-file true false ) '); + + $this->assertSameSubversionMessages( + '( duck 5:quack moo ) ', + array( + array( + array( + 'type' => 'word', + 'value' => 'duck', + ), + array( + 'type' => 'string', + 'value' => 'quack', + ), + array( + 'type' => 'word', + 'value' => 'moo', + ), + ), + ), + '( duck 5:quack moo ) '); + } public function testSubversionWireProtocolPartialFrame() { @@ -86,7 +130,11 @@ final class DiffusionSubversionWireProtocolTestCase ipull($msg2, 'structure')); } - private function assertSameSubversionMessages($string, array $structs) { + private function assertSameSubversionMessages( + $string, + array $structs, + $serial_string = null) { + $proto = new DiffusionSubversionWireProtocol(); // Verify that the wire message parses into the structs. @@ -100,6 +148,13 @@ final class DiffusionSubversionWireProtocolTestCase $serial[] = $proto->serializeStruct($struct); } $serial = implode('', $serial); - $this->assertEqual($string, $serial, 'serialize<'.$string.'>'); + + if ($serial_string === null) { + $expect_serial = $string; + } else { + $expect_serial = $serial_string; + } + + $this->assertEqual($expect_serial, $serial, 'serialize<'.$string.'>'); } } From 79fdf5c127078fc7858bc47e4eb27c323f2c764e Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 16 May 2018 08:39:08 -0700 Subject: [PATCH 5/8] Separate changeset analysis code from DifferentialDiff and provide a standalone `rebuild-changesets` workflow Summary: Ref T13137. The "analyze/cache data about changesets" step is becoming more involved. We recently added detection for generated code to support "Ignore generated changes" in Owners, and I now plan to hash the new file content so we can hide changes which have no effect. Before adding this new hashing step, pull the "detect copied code" and "detect generated code" stuff out and move them to a separate `ChangesetEngine`. Then support doing a changeset rebuild directly with `bin/differential rebuild-changesets`. This simplifies things a bit and makes testing easier since you don't need to keep creating new revisions to re-run copy/generated/hash logic. Test Plan: Ran `bin/differential rebuild-changesets --revision Dxxx`, saw changesets rebuild. See also next change. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13137 Differential Revision: https://secure.phabricator.com/D19452 --- src/__phutil_library_map__.php | 4 + .../engine/DifferentialChangesetEngine.php | 223 ++++++++++++++++++ ...rDifferentialRebuildChangesetsWorkflow.php | 92 ++++++++ .../parser/DifferentialChangesetParser.php | 161 ------------- .../differential/storage/DifferentialDiff.php | 59 +---- 5 files changed, 323 insertions(+), 216 deletions(-) create mode 100644 src/applications/differential/engine/DifferentialChangesetEngine.php create mode 100644 src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index fab67c4087..8ad0f6fc40 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -432,6 +432,7 @@ phutil_register_library_map(array( 'DifferentialChangesSinceLastUpdateField' => 'applications/differential/customfield/DifferentialChangesSinceLastUpdateField.php', 'DifferentialChangeset' => 'applications/differential/storage/DifferentialChangeset.php', 'DifferentialChangesetDetailView' => 'applications/differential/view/DifferentialChangesetDetailView.php', + 'DifferentialChangesetEngine' => 'applications/differential/engine/DifferentialChangesetEngine.php', 'DifferentialChangesetFileTreeSideNavBuilder' => 'applications/differential/view/DifferentialChangesetFileTreeSideNavBuilder.php', 'DifferentialChangesetHTMLRenderer' => 'applications/differential/render/DifferentialChangesetHTMLRenderer.php', 'DifferentialChangesetListController' => 'applications/differential/controller/DifferentialChangesetListController.php', @@ -2869,6 +2870,7 @@ phutil_register_library_map(array( 'PhabricatorDifferentialExtractWorkflow' => 'applications/differential/management/PhabricatorDifferentialExtractWorkflow.php', 'PhabricatorDifferentialManagementWorkflow' => 'applications/differential/management/PhabricatorDifferentialManagementWorkflow.php', 'PhabricatorDifferentialMigrateHunkWorkflow' => 'applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php', + 'PhabricatorDifferentialRebuildChangesetsWorkflow' => 'applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php', 'PhabricatorDifferentialRevisionTestDataGenerator' => 'applications/differential/lipsum/PhabricatorDifferentialRevisionTestDataGenerator.php', 'PhabricatorDiffusionApplication' => 'applications/diffusion/application/PhabricatorDiffusionApplication.php', 'PhabricatorDiffusionBlameSetting' => 'applications/settings/setting/PhabricatorDiffusionBlameSetting.php', @@ -5729,6 +5731,7 @@ phutil_register_library_map(array( 'PhabricatorDestructibleInterface', ), 'DifferentialChangesetDetailView' => 'AphrontView', + 'DifferentialChangesetEngine' => 'Phobject', 'DifferentialChangesetFileTreeSideNavBuilder' => 'Phobject', 'DifferentialChangesetHTMLRenderer' => 'DifferentialChangesetRenderer', 'DifferentialChangesetListController' => 'DifferentialController', @@ -8530,6 +8533,7 @@ phutil_register_library_map(array( 'PhabricatorDifferentialExtractWorkflow' => 'PhabricatorDifferentialManagementWorkflow', 'PhabricatorDifferentialManagementWorkflow' => 'PhabricatorManagementWorkflow', 'PhabricatorDifferentialMigrateHunkWorkflow' => 'PhabricatorDifferentialManagementWorkflow', + 'PhabricatorDifferentialRebuildChangesetsWorkflow' => 'PhabricatorDifferentialManagementWorkflow', 'PhabricatorDifferentialRevisionTestDataGenerator' => 'PhabricatorTestDataGenerator', 'PhabricatorDiffusionApplication' => 'PhabricatorApplication', 'PhabricatorDiffusionBlameSetting' => 'PhabricatorInternalSetting', diff --git a/src/applications/differential/engine/DifferentialChangesetEngine.php b/src/applications/differential/engine/DifferentialChangesetEngine.php new file mode 100644 index 0000000000..ccb4381e43 --- /dev/null +++ b/src/applications/differential/engine/DifferentialChangesetEngine.php @@ -0,0 +1,223 @@ +detectGeneratedCode($changeset); + } + + $this->detectCopiedCode($changesets); + } + + +/* -( Generated Code )----------------------------------------------------- */ + + + private function detectGeneratedCode(DifferentialChangeset $changeset) { + $is_generated_trusted = $this->isTrustedGeneratedCode($changeset); + if ($is_generated_trusted) { + $changeset->setTrustedChangesetAttribute( + DifferentialChangeset::ATTRIBUTE_GENERATED, + $is_generated_trusted); + } + + $is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset); + if ($is_generated_untrusted) { + $changeset->setUntrustedChangesetAttribute( + DifferentialChangeset::ATTRIBUTE_GENERATED, + $is_generated_untrusted); + } + } + + private function isTrustedGeneratedCode(DifferentialChangeset $changeset) { + + $filename = $changeset->getFilename(); + + $paths = PhabricatorEnv::getEnvConfig('differential.generated-paths'); + foreach ($paths as $regexp) { + if (preg_match($regexp, $filename)) { + return true; + } + } + + return false; + } + + private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) { + + if ($changeset->getHunks()) { + $new_data = $changeset->makeNewFile(); + if (strpos($new_data, '@'.'generated') !== false) { + return true; + } + } + + return false; + } + + +/* -( Copied Code )-------------------------------------------------------- */ + + + private function detectCopiedCode(array $changesets) { + $min_width = 30; + $min_lines = 3; + + $map = array(); + $files = array(); + $types = array(); + foreach ($changesets as $changeset) { + $file = $changeset->getFilename(); + foreach ($changeset->getHunks() as $hunk) { + $lines = $hunk->getStructuredOldFile(); + foreach ($lines as $line => $info) { + $type = $info['type']; + if ($type == '\\') { + continue; + } + $types[$file][$line] = $type; + + $text = $info['text']; + $text = trim($text); + $files[$file][$line] = $text; + + if (strlen($text) >= $min_width) { + $map[$text][] = array($file, $line); + } + } + } + } + + foreach ($changesets as $changeset) { + $copies = array(); + foreach ($changeset->getHunks() as $hunk) { + $added = $hunk->getStructuredNewFile(); + $atype = array(); + + foreach ($added as $line => $info) { + $atype[$line] = $info['type']; + $added[$line] = trim($info['text']); + } + + $skip_lines = 0; + foreach ($added as $line => $code) { + if ($skip_lines) { + // We're skipping lines that we already processed because we + // extended a block above them downward to include them. + $skip_lines--; + continue; + } + + if ($atype[$line] !== '+') { + // This line hasn't been changed in the new file, so don't try + // to figure out where it came from. + continue; + } + + if (empty($map[$code])) { + // This line was too short to trigger copy/move detection. + continue; + } + + if (count($map[$code]) > 16) { + // If there are a large number of identical lines in this diff, + // don't try to figure out where this block came from: the analysis + // is O(N^2), since we need to compare every line against every + // other line. Even if we arrive at a result, it is unlikely to be + // meaningful. See T5041. + continue; + } + + $best_length = 0; + + // Explore all candidates. + foreach ($map[$code] as $val) { + list($file, $orig_line) = $val; + $length = 1; + + // Search backward and forward to find all of the adjacent lines + // which match. + foreach (array(-1, 1) as $direction) { + $offset = $direction; + while (true) { + if (isset($copies[$line + $offset])) { + // If we run into a block above us which we've already + // attributed to a move or copy from elsewhere, stop + // looking. + break; + } + + if (!isset($added[$line + $offset])) { + // If we've run off the beginning or end of the new file, + // stop looking. + break; + } + + if (!isset($files[$file][$orig_line + $offset])) { + // If we've run off the beginning or end of the original + // file, we also stop looking. + break; + } + + $old = $files[$file][$orig_line + $offset]; + $new = $added[$line + $offset]; + if ($old !== $new) { + // If the old line doesn't match the new line, stop + // looking. + break; + } + + $length++; + $offset += $direction; + } + } + + if ($length < $best_length) { + // If we already know of a better source (more matching lines) + // for this move/copy, stick with that one. We prefer long + // copies/moves which match a lot of context over short ones. + continue; + } + + if ($length == $best_length) { + if (idx($types[$file], $orig_line) != '-') { + // If we already know of an equally good source (same number + // of matching lines) and this isn't a move, stick with the + // other one. We prefer moves over copies. + continue; + } + } + + $best_length = $length; + // ($offset - 1) contains number of forward matching lines. + $best_offset = $offset - 1; + $best_file = $file; + $best_line = $orig_line; + } + + $file = ($best_file == $changeset->getFilename() ? '' : $best_file); + for ($i = $best_length; $i--; ) { + $type = idx($types[$best_file], $best_line + $best_offset - $i); + $copies[$line + $best_offset - $i] = ($best_length < $min_lines + ? array() // Ignore short blocks. + : array($file, $best_line + $best_offset - $i, $type)); + } + + $skip_lines = $best_offset; + } + } + + $copies = array_filter($copies); + if ($copies) { + $metadata = $changeset->getMetadata(); + $metadata['copy:lines'] = $copies; + $changeset->setMetadata($metadata); + } + } + + } + +} diff --git a/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php b/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php new file mode 100644 index 0000000000..29b116fc3c --- /dev/null +++ b/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php @@ -0,0 +1,92 @@ +setName('rebuild-changesets') + ->setExamples('**rebuild-changesets** --revision __revision__') + ->setSynopsis(pht('Rebuild changesets for a revision.')) + ->setArguments( + array( + array( + 'name' => 'revision', + 'param' => 'revision', + 'help' => pht('Revision to rebuild changesets for.'), + ), + )); + } + + public function execute(PhutilArgumentParser $args) { + $viewer = $this->getViewer(); + + $revision_identifier = $args->getArg('revision'); + if (!$revision_identifier) { + throw new PhutilArgumentUsageException( + pht('Specify a revision to rebuild changesets for with "--revision".')); + } + + $revision = id(new PhabricatorObjectQuery()) + ->setViewer($viewer) + ->withNames(array($revision_identifier)) + ->executeOne(); + if ($revision) { + if (!($revision instanceof DifferentialRevision)) { + throw new PhutilArgumentUsageException( + pht( + 'Object "%s" specified by "--revision" must be a Differential '. + 'revision.')); + } + } else { + $revision = id(new DifferentialRevisionQuery()) + ->setViewer($viewer) + ->withIDs(array($revision_identifier)) + ->executeOne(); + } + + if (!$revision) { + throw new PhutilArgumentUsageException( + pht( + 'No revision "%s" exists.', + $revision_identifier)); + } + + $diffs = id(new DifferentialDiffQuery()) + ->setViewer($viewer) + ->withRevisionIDs(array($revision->getID())) + ->execute(); + + $changesets = id(new DifferentialChangesetQuery()) + ->setViewer($viewer) + ->withDiffs($diffs) + ->needHunks(true) + ->execute(); + + $changeset_groups = mgroup($changesets, 'getDiffID'); + + foreach ($changeset_groups as $diff_id => $changesets) { + echo tsprintf( + "%s\n", + pht( + 'Rebuilding %s changeset(s) for diff ID %d.', + phutil_count($changesets), + $diff_id)); + + foreach ($changesets as $changeset) { + echo tsprintf( + " %s\n", + $changeset->getFilename()); + } + + id(new DifferentialChangesetEngine()) + ->rebuildChangesets($changesets); + + echo tsprintf( + "%s\n", + pht('Done.')); + } + } + + +} diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php index beb1b5a9a7..596ea0e426 100644 --- a/src/applications/differential/parser/DifferentialChangesetParser.php +++ b/src/applications/differential/parser/DifferentialChangesetParser.php @@ -1419,167 +1419,6 @@ final class DifferentialChangesetParser extends Phobject { return sprintf('%d%%', 100 * ($covered / ($covered + $not_covered))); } - public function detectCopiedCode( - array $changesets, - $min_width = 30, - $min_lines = 3) { - - assert_instances_of($changesets, 'DifferentialChangeset'); - - $map = array(); - $files = array(); - $types = array(); - foreach ($changesets as $changeset) { - $file = $changeset->getFilename(); - foreach ($changeset->getHunks() as $hunk) { - $lines = $hunk->getStructuredOldFile(); - foreach ($lines as $line => $info) { - $type = $info['type']; - if ($type == '\\') { - continue; - } - $types[$file][$line] = $type; - - $text = $info['text']; - $text = trim($text); - $files[$file][$line] = $text; - - if (strlen($text) >= $min_width) { - $map[$text][] = array($file, $line); - } - } - } - } - - foreach ($changesets as $changeset) { - $copies = array(); - foreach ($changeset->getHunks() as $hunk) { - $added = $hunk->getStructuredNewFile(); - $atype = array(); - - foreach ($added as $line => $info) { - $atype[$line] = $info['type']; - $added[$line] = trim($info['text']); - } - - $skip_lines = 0; - foreach ($added as $line => $code) { - if ($skip_lines) { - // We're skipping lines that we already processed because we - // extended a block above them downward to include them. - $skip_lines--; - continue; - } - - if ($atype[$line] !== '+') { - // This line hasn't been changed in the new file, so don't try - // to figure out where it came from. - continue; - } - - if (empty($map[$code])) { - // This line was too short to trigger copy/move detection. - continue; - } - - if (count($map[$code]) > 16) { - // If there are a large number of identical lines in this diff, - // don't try to figure out where this block came from: the analysis - // is O(N^2), since we need to compare every line against every - // other line. Even if we arrive at a result, it is unlikely to be - // meaningful. See T5041. - continue; - } - - $best_length = 0; - - // Explore all candidates. - foreach ($map[$code] as $val) { - list($file, $orig_line) = $val; - $length = 1; - - // Search backward and forward to find all of the adjacent lines - // which match. - foreach (array(-1, 1) as $direction) { - $offset = $direction; - while (true) { - if (isset($copies[$line + $offset])) { - // If we run into a block above us which we've already - // attributed to a move or copy from elsewhere, stop - // looking. - break; - } - - if (!isset($added[$line + $offset])) { - // If we've run off the beginning or end of the new file, - // stop looking. - break; - } - - if (!isset($files[$file][$orig_line + $offset])) { - // If we've run off the beginning or end of the original - // file, we also stop looking. - break; - } - - $old = $files[$file][$orig_line + $offset]; - $new = $added[$line + $offset]; - if ($old !== $new) { - // If the old line doesn't match the new line, stop - // looking. - break; - } - - $length++; - $offset += $direction; - } - } - - if ($length < $best_length) { - // If we already know of a better source (more matching lines) - // for this move/copy, stick with that one. We prefer long - // copies/moves which match a lot of context over short ones. - continue; - } - - if ($length == $best_length) { - if (idx($types[$file], $orig_line) != '-') { - // If we already know of an equally good source (same number - // of matching lines) and this isn't a move, stick with the - // other one. We prefer moves over copies. - continue; - } - } - - $best_length = $length; - // ($offset - 1) contains number of forward matching lines. - $best_offset = $offset - 1; - $best_file = $file; - $best_line = $orig_line; - } - - $file = ($best_file == $changeset->getFilename() ? '' : $best_file); - for ($i = $best_length; $i--; ) { - $type = idx($types[$best_file], $best_line + $best_offset - $i); - $copies[$line + $best_offset - $i] = ($best_length < $min_lines - ? array() // Ignore short blocks. - : array($file, $best_line + $best_offset - $i, $type)); - } - - $skip_lines = $best_offset; - } - } - - $copies = array_filter($copies); - if ($copies) { - $metadata = $changeset->getMetadata(); - $metadata['copy:lines'] = $copies; - $changeset->setMetadata($metadata); - } - } - return $changesets; - } - /** * Build maps from lines comments appear on to actual lines. */ diff --git a/src/applications/differential/storage/DifferentialDiff.php b/src/applications/differential/storage/DifferentialDiff.php index f121a1cb67..5f229f39b3 100644 --- a/src/applications/differential/storage/DifferentialDiff.php +++ b/src/applications/differential/storage/DifferentialDiff.php @@ -226,23 +226,18 @@ final class DifferentialDiff $changeset->setAddLines($add_lines); $changeset->setDelLines($del_lines); - self::detectGeneratedCode($changeset); - $diff->addUnsavedChangeset($changeset); } $diff->setLineCount($lines); - $parser = new DifferentialChangesetParser(); - $changesets = $parser->detectCopiedCode( - $diff->getChangesets(), - $min_width = 30, - $min_lines = 3); - $diff->attachChangesets($changesets); + $changesets = $diff->getChangesets(); + + id(new DifferentialChangesetEngine()) + ->rebuildChangesets($changesets); return $diff; } - public function getDiffDict() { $dict = array( 'id' => $this->getID(), @@ -823,50 +818,4 @@ final class DifferentialDiff ); } - private static function detectGeneratedCode( - DifferentialChangeset $changeset) { - - $is_generated_trusted = self::isTrustedGeneratedCode($changeset); - if ($is_generated_trusted) { - $changeset->setTrustedChangesetAttribute( - DifferentialChangeset::ATTRIBUTE_GENERATED, - $is_generated_trusted); - } - - $is_generated_untrusted = self::isUntrustedGeneratedCode($changeset); - if ($is_generated_untrusted) { - $changeset->setUntrustedChangesetAttribute( - DifferentialChangeset::ATTRIBUTE_GENERATED, - $is_generated_untrusted); - } - } - - private static function isTrustedGeneratedCode( - DifferentialChangeset $changeset) { - - $filename = $changeset->getFilename(); - - $paths = PhabricatorEnv::getEnvConfig('differential.generated-paths'); - foreach ($paths as $regexp) { - if (preg_match($regexp, $filename)) { - return true; - } - } - - return false; - } - - private static function isUntrustedGeneratedCode( - DifferentialChangeset $changeset) { - - if ($changeset->getHunks()) { - $new_data = $changeset->makeNewFile(); - if (strpos($new_data, '@'.'generated') !== false) { - return true; - } - } - - return false; - } - } From 8f9b9484471ae7cd7b13e75df140ef842444c69d Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 16 May 2018 05:25:03 -0700 Subject: [PATCH 6/8] When showing a diff-of-diffs, hide files which didn't get any more changes and have no inlines Summary: Ref T13137. See that task for discussion. When we show a diff-of-diffs, we often render stubs for files which didn't change between the diffs. These stubs usually aren't a big deal, but for certain types of changes (like refactors) they can create a lot of clutter. Instead, hide these stubs and show a notice that we hid them. Test Plan: - Created a revision affecting 4 files. - Updated it with a diff that changed only 1 of the 4 files. - Added an inline comment to a different file. - Viewed the diff of diffs. - Before: 4 changesets with two "nothing changed" stubs. - After: 2 changesets with the stubs hidden. {F5621083} Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13137 Differential Revision: https://secure.phabricator.com/D19453 --- .../DifferentialRevisionViewController.php | 101 ++++++++++++++---- .../engine/DifferentialChangesetEngine.php | 25 +++++ ...rDifferentialRebuildChangesetsWorkflow.php | 4 + .../storage/DifferentialChangeset.php | 43 ++++++++ 4 files changed, 153 insertions(+), 20 deletions(-) diff --git a/src/applications/differential/controller/DifferentialRevisionViewController.php b/src/applications/differential/controller/DifferentialRevisionViewController.php index 6b13e893d5..cd71c62a1e 100644 --- a/src/applications/differential/controller/DifferentialRevisionViewController.php +++ b/src/applications/differential/controller/DifferentialRevisionViewController.php @@ -5,6 +5,7 @@ final class DifferentialRevisionViewController private $revisionID; private $changesetCount; + private $hiddenChangesets; public function shouldAllowPublic() { return true; @@ -169,6 +170,7 @@ final class DifferentialRevisionViewController } $handles = $this->loadViewerHandles($object_phids); + $warnings = array(); $request_uri = $request->getRequestURI(); @@ -203,11 +205,19 @@ final class DifferentialRevisionViewController pht('Expand All Files'))), ); - $warning = id(new PHUIInfoView()) + $warnings[] = id(new PHUIInfoView()) ->setTitle(pht('Large Diff')) ->setSeverity(PHUIInfoView::SEVERITY_WARNING) ->appendChild($message); + $folded_changesets = $changesets; + } else { + $folded_changesets = array(); + } + + // Don't hide or fold changesets which have inline comments. + $hidden_changesets = $this->hiddenChangesets; + if ($hidden_changesets || $folded_changesets) { $old = array_select_keys($changesets, $old_ids); $new = array_select_keys($changesets, $new_ids); @@ -222,16 +232,47 @@ final class DifferentialRevisionViewController $new, $revision); - $visible_changesets = array(); foreach ($inlines as $inline) { $changeset_id = $inline->getChangesetID(); - if (isset($changesets[$changeset_id])) { - $visible_changesets[$changeset_id] = $changesets[$changeset_id]; + if (!isset($changesets[$changeset_id])) { + continue; } + + unset($hidden_changesets[$changeset_id]); + unset($folded_changesets[$changeset_id]); } - } else { - $warning = null; - $visible_changesets = $changesets; + } + + // If we would hide only one changeset, don't hide anything. The notice + // we'd render about it is about the same size as the changeset. + if (count($hidden_changesets) < 2) { + $hidden_changesets = array(); + } + + // Update the set of hidden changesets, since we may have just un-hidden + // some of them. + if ($hidden_changesets) { + $warnings[] = id(new PHUIInfoView()) + ->setTitle(pht('Showing Only Differences')) + ->setSeverity(PHUIInfoView::SEVERITY_NOTICE) + ->appendChild( + pht( + 'This revision modifies %s more files that are hidden because '. + 'they were not modified between selected diffs and they have no '. + 'inline comments.', + phutil_count($hidden_changesets))); + } + + // Compute the unfolded changesets. By default, everything is unfolded. + $unfolded_changesets = $changesets; + foreach ($folded_changesets as $changeset_id => $changeset) { + unset($unfolded_changesets[$changeset_id]); + } + + // Throw away any hidden changesets. + foreach ($hidden_changesets as $changeset_id => $changeset) { + unset($changesets[$changeset_id]); + unset($unfolded_changesets[$changeset_id]); } $commit_hashes = mpull($diffs, 'getSourceControlBaseRevision'); @@ -267,7 +308,7 @@ final class DifferentialRevisionViewController if ($repository) { $symbol_indexes = $this->buildSymbolIndexes( $repository, - $visible_changesets); + $unfolded_changesets); } else { $symbol_indexes = array(); } @@ -328,7 +369,7 @@ final class DifferentialRevisionViewController } else { $changeset_view = id(new DifferentialChangesetListView()) ->setChangesets($changesets) - ->setVisibleChangesets($visible_changesets) + ->setVisibleChangesets($unfolded_changesets) ->setStandaloneURI('/differential/changeset/') ->setRawFileURIs( '/differential/changeset/?view=old', @@ -405,7 +446,7 @@ final class DifferentialRevisionViewController $toc_view = $this->buildTableOfContents( $changesets, - $visible_changesets, + $unfolded_changesets, $target->loadCoverageMap($viewer)); // Attach changesets to each reviewer so we can show which Owners package @@ -520,7 +561,7 @@ final class DifferentialRevisionViewController $footer[] = array( $anchor, - $warning, + $warnings, $tab_view, $changeset_view, ); @@ -807,6 +848,7 @@ final class DifferentialRevisionViewController DifferentialDiff $target, DifferentialDiff $diff_vs = null, PhabricatorRepository $repository = null) { + $viewer = $this->getViewer(); $load_diffs = array($target); if ($diff_vs) { @@ -814,7 +856,7 @@ final class DifferentialRevisionViewController } $raw_changesets = id(new DifferentialChangesetQuery()) - ->setViewer($this->getRequest()->getUser()) + ->setViewer($viewer) ->withDiffs($load_diffs) ->execute(); $changeset_groups = mgroup($raw_changesets, 'getDiffID'); @@ -822,17 +864,19 @@ final class DifferentialRevisionViewController $changesets = idx($changeset_groups, $target->getID(), array()); $changesets = mpull($changesets, null, 'getID'); - $refs = array(); - $vs_map = array(); + $refs = array(); + $vs_map = array(); $vs_changesets = array(); + $must_compare = array(); if ($diff_vs) { - $vs_id = $diff_vs->getID(); + $vs_id = $diff_vs->getID(); $vs_changesets_path_map = array(); foreach (idx($changeset_groups, $vs_id, array()) as $changeset) { $path = $changeset->getAbsoluteRepositoryPath($repository, $diff_vs); $vs_changesets_path_map[$path] = $changeset; $vs_changesets[$changeset->getID()] = $changeset; } + foreach ($changesets as $key => $changeset) { $path = $changeset->getAbsoluteRepositoryPath($repository, $target); if (isset($vs_changesets_path_map[$path])) { @@ -841,15 +885,20 @@ final class DifferentialRevisionViewController $refs[$changeset->getID()] = $changeset->getID().'/'.$vs_changesets_path_map[$path]->getID(); unset($vs_changesets_path_map[$path]); + + $must_compare[] = $changeset->getID(); + } else { $refs[$changeset->getID()] = $changeset->getID(); } } + foreach ($vs_changesets_path_map as $path => $changeset) { $changesets[$changeset->getID()] = $changeset; - $vs_map[$changeset->getID()] = -1; - $refs[$changeset->getID()] = $changeset->getID().'/-1'; + $vs_map[$changeset->getID()] = -1; + $refs[$changeset->getID()] = $changeset->getID().'/-1'; } + } else { foreach ($changesets as $changeset) { $refs[$changeset->getID()] = $changeset->getID(); @@ -858,13 +907,25 @@ final class DifferentialRevisionViewController $changesets = msort($changesets, 'getSortKey'); + // See T13137. When displaying the diff between two updates, hide any + // changesets which haven't actually changed. + $this->hiddenChangesets = array(); + foreach ($must_compare as $changeset_id) { + $changeset = $changesets[$changeset_id]; + $vs_changeset = $vs_changesets[$vs_map[$changeset_id]]; + + if ($changeset->hasSameEffectAs($vs_changeset)) { + $this->hiddenChangesets[$changeset_id] = $changesets[$changeset_id]; + } + } + return array($changesets, $vs_map, $vs_changesets, $refs); } private function buildSymbolIndexes( PhabricatorRepository $repository, - array $visible_changesets) { - assert_instances_of($visible_changesets, 'DifferentialChangeset'); + array $unfolded_changesets) { + assert_instances_of($unfolded_changesets, 'DifferentialChangeset'); $engine = PhabricatorSyntaxHighlighter::newEngine(); @@ -889,7 +950,7 @@ final class DifferentialRevisionViewController $sources); $indexed_langs = array_fill_keys($langs, true); - foreach ($visible_changesets as $key => $changeset) { + foreach ($unfolded_changesets as $key => $changeset) { $lang = $engine->getLanguageFromFilename($changeset->getFilename()); if (empty($indexed_langs) || isset($indexed_langs[$lang])) { $symbol_indexes[$key] = array( diff --git a/src/applications/differential/engine/DifferentialChangesetEngine.php b/src/applications/differential/engine/DifferentialChangesetEngine.php index ccb4381e43..e8a55a1b0e 100644 --- a/src/applications/differential/engine/DifferentialChangesetEngine.php +++ b/src/applications/differential/engine/DifferentialChangesetEngine.php @@ -7,6 +7,7 @@ final class DifferentialChangesetEngine extends Phobject { foreach ($changesets as $changeset) { $this->detectGeneratedCode($changeset); + $this->computeHashes($changeset); } $this->detectCopiedCode($changesets); @@ -59,6 +60,30 @@ final class DifferentialChangesetEngine extends Phobject { } +/* -( Content Hashes )----------------------------------------------------- */ + + + private function computeHashes(DifferentialChangeset $changeset) { + + $effect_key = DifferentialChangeset::METADATA_EFFECT_HASH; + + $effect_hash = $this->newEffectHash($changeset); + if ($effect_hash !== null) { + $changeset->setChangesetMetadata($effect_key, $effect_hash); + } + } + + private function newEffectHash(DifferentialChangeset $changeset) { + + if ($changeset->getHunks()) { + $new_data = $changeset->makeNewFile(); + return PhabricatorHash::digestForIndex($new_data); + } + + return null; + } + + /* -( Copied Code )-------------------------------------------------------- */ diff --git a/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php b/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php index 29b116fc3c..068771284b 100644 --- a/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php +++ b/src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php @@ -82,6 +82,10 @@ final class PhabricatorDifferentialRebuildChangesetsWorkflow id(new DifferentialChangesetEngine()) ->rebuildChangesets($changesets); + foreach ($changesets as $changeset) { + $changeset->save(); + } + echo tsprintf( "%s\n", pht('Done.')); diff --git a/src/applications/differential/storage/DifferentialChangeset.php b/src/applications/differential/storage/DifferentialChangeset.php index 27d3ad4d64..00af84bc4e 100644 --- a/src/applications/differential/storage/DifferentialChangeset.php +++ b/src/applications/differential/storage/DifferentialChangeset.php @@ -26,6 +26,7 @@ final class DifferentialChangeset const METADATA_TRUSTED_ATTRIBUTES = 'attributes.trusted'; const METADATA_UNTRUSTED_ATTRIBUTES = 'attributes.untrusted'; + const METADATA_EFFECT_HASH = 'hash.effect'; const ATTRIBUTE_GENERATED = 'generated'; @@ -143,6 +144,48 @@ final class DifferentialChangeset return $ret; } + /** + * Test if this changeset and some other changeset put the affected file in + * the same state. + * + * @param DifferentialChangeset Changeset to compare against. + * @return bool True if the two changesets have the same effect. + */ + public function hasSameEffectAs(DifferentialChangeset $other) { + if ($this->getFilename() !== $other->getFilename()) { + return false; + } + + $hash_key = self::METADATA_EFFECT_HASH; + + $u_hash = $this->getChangesetMetadata($hash_key); + if ($u_hash === null) { + return false; + } + + $v_hash = $other->getChangesetMetadata($hash_key); + if ($v_hash === null) { + return false; + } + + if ($u_hash !== $v_hash) { + return false; + } + + // Make sure the final states for the file properties (like the "+x" + // executable bit) match one another. + $u_props = $this->getNewProperties(); + $v_props = $other->getNewProperties(); + ksort($u_props); + ksort($v_props); + + if ($u_props !== $v_props) { + return false; + } + + return true; + } + public function getSortKey() { $sort_key = $this->getFilename(); // Sort files with ".h" in them first, so headers (.h, .hpp) come before From 3c5668b4a54f66277328e710744aa92093d781c4 Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 17 May 2018 05:56:43 -0700 Subject: [PATCH 7/8] When database connection exceptions occur, raise them to the setup layer Summary: Ref T13141. Currently, during first-time setup we don't surface all the details about connection exceptions that we could: the underlying exception is discarded inside cluster connection management. This isn't a huge issue since the reason for connection problems is usually fairly obvious, but in at least one case (see T13141) we hit a less-than-obvious exception. Instead, store the original exception and propagate the message up the stack so users have more information about the problem. Test Plan: - Configured an intentionally bad MySQL username. - Restarted Apache and loaded Phabricator. - Got a more helpful exception with a specific authentication error message. {F5622361} Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13141 Differential Revision: https://secure.phabricator.com/D19454 --- .../cluster/PhabricatorDatabaseRef.php | 7 +++++ .../storage/lisk/PhabricatorLiskDAO.php | 29 ++++++++++++++----- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/infrastructure/cluster/PhabricatorDatabaseRef.php b/src/infrastructure/cluster/PhabricatorDatabaseRef.php index 5a32ef7c11..7dc55427ca 100644 --- a/src/infrastructure/cluster/PhabricatorDatabaseRef.php +++ b/src/infrastructure/cluster/PhabricatorDatabaseRef.php @@ -29,6 +29,7 @@ final class PhabricatorDatabaseRef private $connectionLatency; private $connectionStatus; private $connectionMessage; + private $connectionException; private $replicaStatus; private $replicaMessage; @@ -453,6 +454,7 @@ final class PhabricatorDatabaseRef return false; } + $this->connectionException = null; try { $connection->openConnection(); $reachable = true; @@ -463,6 +465,7 @@ final class PhabricatorDatabaseRef // yet. throw $ex; } catch (Exception $ex) { + $this->connectionException = $ex; $reachable = false; } @@ -506,6 +509,10 @@ final class PhabricatorDatabaseRef return $this->healthRecord; } + public function getConnectionException() { + return $this->connectionException; + } + public static function getActiveDatabaseRefs() { $refs = array(); diff --git a/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php b/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php index 743ca9b759..b3b324e951 100644 --- a/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php +++ b/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php @@ -80,6 +80,8 @@ abstract class PhabricatorLiskDAO extends LiskDAO { $master = PhabricatorDatabaseRef::getMasterDatabaseRefForApplication( $application); + $master_exception = null; + if ($master && !$master->isSevered()) { $connection = $master->newApplicationConnection($database); if ($master->isReachable($connection)) { @@ -91,6 +93,8 @@ abstract class PhabricatorLiskDAO extends LiskDAO { PhabricatorEnv::setReadOnly( true, PhabricatorEnv::READONLY_UNREACHABLE); + + $master_exception = $master->getConnectionException(); } } @@ -108,7 +112,7 @@ abstract class PhabricatorLiskDAO extends LiskDAO { $this->raiseUnconfigured($database); } - $this->raiseUnreachable($database); + $this->raiseUnreachable($database, $master_exception); } private function raiseImproperWrite($database) { @@ -136,13 +140,22 @@ abstract class PhabricatorLiskDAO extends LiskDAO { $database)); } - private function raiseUnreachable($database) { - throw new PhabricatorClusterStrandedException( - pht( - 'Unable to establish a connection to any database host '. - '(while trying "%s"). All masters and replicas are completely '. - 'unreachable.', - $database)); + private function raiseUnreachable($database, Exception $proxy = null) { + $message = pht( + 'Unable to establish a connection to any database host '. + '(while trying "%s"). All masters and replicas are completely '. + 'unreachable.', + $database); + + if ($proxy) { + $proxy_message = pht( + '%s: %s', + get_class($proxy), + $proxy->getMessage()); + $message = $message."\n\n".$proxy_message; + } + + throw new PhabricatorClusterStrandedException($message); } From de999af61422e4f48bd62bdf0eac46dfa72775c3 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 18 May 2018 12:23:23 -0700 Subject: [PATCH 8/8] Improve some behaviors around memory pressure when pushing many and/or large changes Summary: Ref T13142. When commits are pushed, we try to handle them on one of two pathways: - Normal changes: we load these into memory and potentially apply Herald content rules to them. - "Enormous" changes: we don't load these into memory and skip content rules for them. The goal is to degrade gracefully when users push huge changes: they should work, just not support all the features. However, some changes can slip through the cracks right now: - If you push a lot of commits at once, we'll try to cache all of the changes smaller than 1GB in memory. This can require an arbitrarily large amount of RAM. - We calculate sizes by just looking at the `strlen()` of the diff, but a changeset takes more RAM in PHP than the raw diff does. So even if a diff is "only" 500MB, it can take much more memory than that. On systems with relatively little memory available, this may result in OOM while processing changes that are close to the "enormous" limit. This change makes two improvements: - Instead of caching everything, cache only 64MB of things. - For most pushes, this is the same, since they have less than 64MB of diffs. - For pushes of single very large changes, this is a bit slower (more CPU) since we have to do some work twice. - For pushes of many changes, this is slower (more CPU) since we have to do some work twice, but, critically, doesn't require unlimited memory. - Instead of flagging changes as "enormous" at 1GB, flag them as "enormous" at 256MB. - This reduces how much memory is required to process the largest "non-enormous" changes. - This also gets us under Git's hard-coded 512MB "always binary" cutoff; see T13143. - This is still completely gigantic and way larger than any normal change should be. An additional improvement would be to try to reduce the amount of memory we need to use to hold a change in process memory. I think the other changes here alone will fix the immediate issue in PHI657, but it would be nice if the "largest non-enormous change" required only a couple gigs of RAM. Test Plan: - Used `ini_set('memory_limit', '1G')` to artificially limit memory to 1GB. - Pushed a series of two commits which add two 550MB text files (Temporarily, I added a `--binary` flag to trick Git into showing real diffs for these, see T13143.) - Got a memory limit error. - Applied the "cache only 64MB of stuff" and "consider 256MB, not 1GB, to be enormous" changes. - Pushed again, got properly rejected as enormous. - Added `memory_get_usage()` calls to measure how actual memory size and reported "size" estimate compare. For these changes, saw a 639MB diff require 31,479MB of memory, i.e. a factor of about 50x. This is, uh, pretty not great. - Allowed enormous changes, pushed again, push went through. Reviewers: amckinley Maniphest Tasks: T13142 Differential Revision: https://secure.phabricator.com/D19455 --- .../engine/DiffusionCommitHookEngine.php | 26 ++++++++++++++++--- .../diffusion/herald/HeraldCommitAdapter.php | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/applications/diffusion/engine/DiffusionCommitHookEngine.php b/src/applications/diffusion/engine/DiffusionCommitHookEngine.php index 3bbb7c082b..857c939a23 100644 --- a/src/applications/diffusion/engine/DiffusionCommitHookEngine.php +++ b/src/applications/diffusion/engine/DiffusionCommitHookEngine.php @@ -37,6 +37,7 @@ final class DiffusionCommitHookEngine extends Phobject { private $rejectDetails; private $emailPHIDs = array(); private $changesets = array(); + private $changesetsSize = 0; /* -( Config )------------------------------------------------------------- */ @@ -1121,11 +1122,22 @@ final class DiffusionCommitHookEngine extends Phobject { return; } + // See T13142. Don't cache more than 64MB of changesets. For normal small + // pushes, caching everything here can let us hit the cache from Herald if + // we need to run content rules, which speeds things up a bit. For large + // pushes, we may not be able to hold everything in memory. + $cache_limit = 1024 * 1024 * 64; + foreach ($content_updates as $update) { $identifier = $update->getRefNew(); try { - $changesets = $this->loadChangesetsForCommit($identifier); - $this->changesets[$identifier] = $changesets; + $info = $this->loadChangesetsForCommit($identifier); + list($changesets, $size) = $info; + + if ($this->changesetsSize + $size <= $cache_limit) { + $this->changesets[$identifier] = $changesets; + $this->changesetsSize += $size; + } } catch (Exception $ex) { $this->changesets[$identifier] = $ex; @@ -1207,7 +1219,11 @@ final class DiffusionCommitHookEngine extends Phobject { $changes = $parser->parseDiff($raw_diff); $diff = DifferentialDiff::newEphemeralFromRawChanges( $changes); - return $diff->getChangesets(); + + $changesets = $diff->getChangesets(); + $size = strlen($raw_diff); + + return array($changesets, $size); } public function getChangesetsForCommit($identifier) { @@ -1221,7 +1237,9 @@ final class DiffusionCommitHookEngine extends Phobject { return $cached; } - return $this->loadChangesetsForCommit($identifier); + $info = $this->loadChangesetsForCommit($identifier); + list($changesets, $size) = $info; + return $changesets; } public function loadCommitRefForCommit($identifier) { diff --git a/src/applications/diffusion/herald/HeraldCommitAdapter.php b/src/applications/diffusion/herald/HeraldCommitAdapter.php index fc3c8c4f5b..f3a0ef53ab 100644 --- a/src/applications/diffusion/herald/HeraldCommitAdapter.php +++ b/src/applications/diffusion/herald/HeraldCommitAdapter.php @@ -207,7 +207,7 @@ final class HeraldCommitAdapter } public static function getEnormousByteLimit() { - return 1024 * 1024 * 1024; // 1GB + return 256 * 1024 * 1024; // 256MB. See T13142 and T13143. } public static function getEnormousTimeLimit() {