From 72cb3d3c84905c0d75074e4ecf74c493e3a2d527 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 30 Aug 2017 11:08:50 -0700 Subject: [PATCH] Limit the damage that degenerate project name typeahead queries can cause Summary: See PHI47. When users copy/paste a wall of text into a project tokenizer, we can end up performing a very large number of JOINs. These JOINs seem okay locally and on `secure`, but the install in PHI47 reports hitting issues. Since these queries are almost certainly illegitimate (I think no one uses 5+ words to find a project), just limit the search to the 5 longest tokens. Note that typing 6 tokens will still almost always work, since the UI does additional filtering. However, if you have 100+ projects named "a b c d e ..." and search for "a b c d e z", you may not hit it. This is so degenerate that it's hard to imagine any users encountering it. This is a stopgap fix, I'll file something longer-term as a followup. Test Plan: Used `/typeahead/class/PhabricatorProjectDatasource/` to run queries. Saw the same results with shorter query plans for all reasonable queries. Reviewers: chad Reviewed By: chad Differential Revision: https://secure.phabricator.com/D18506 --- .../project/query/PhabricatorProjectQuery.php | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/applications/project/query/PhabricatorProjectQuery.php b/src/applications/project/query/PhabricatorProjectQuery.php index 780f072678..955eba9972 100644 --- a/src/applications/project/query/PhabricatorProjectQuery.php +++ b/src/applications/project/query/PhabricatorProjectQuery.php @@ -609,7 +609,8 @@ final class PhabricatorProjectQuery } if ($this->nameTokens !== null) { - foreach ($this->nameTokens as $key => $token) { + $name_tokens = $this->getNameTokensForQuery($this->nameTokens); + foreach ($name_tokens as $key => $token) { $token_table = 'token_'.$key; $joins[] = qsprintf( $conn, @@ -797,4 +798,22 @@ final class PhabricatorProjectQuery } } + private function getNameTokensForQuery(array $tokens) { + // When querying for projects by name, only actually search for the five + // longest tokens. MySQL can get grumpy with a large number of JOINs + // with LIKEs and queries for more than 5 tokens are essentially never + // legitimate searches for projects, but users copy/pasting nonsense. + // See also PHI47. + + $length_map = array(); + foreach ($tokens as $token) { + $length_map[$token] = strlen($token); + } + arsort($length_map); + + $length_map = array_slice($length_map, 0, 5, true); + + return array_keys($length_map); + } + }