From 625d5235a5bc15467224d809477006cfb2efcf20 Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 17 Nov 2016 09:34:54 -0800 Subject: [PATCH] Prevent typeahead sources from querying against empty tokens Summary: Certain unusual queries, like `[-]`, could tokenize into a list which included the empty string. This would then convert into a query for `... LIKE "%"` which just joins the entire table. Instead: tokenize smarter; never return the empty token; add some test cases. Test Plan: Ran unit tests. Queried for `[[blah blah]]`, saw a reasonable query come out the other end. Reviewers: chad Reviewed By: chad Subscribers: 20after4 Differential Revision: https://secure.phabricator.com/D16888 --- PhabricatorTypeaheadDatasourceTestCase.php | 39 +++++++++++++++++++ .../PhabricatorTypeaheadDatasource.php | 14 ++++++- 2 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 PhabricatorTypeaheadDatasourceTestCase.php diff --git a/PhabricatorTypeaheadDatasourceTestCase.php b/PhabricatorTypeaheadDatasourceTestCase.php new file mode 100644 index 0000000000..92544db4f7 --- /dev/null +++ b/PhabricatorTypeaheadDatasourceTestCase.php @@ -0,0 +1,39 @@ +assertTokenization( + 'The quick brown fox', + array('the', 'quick', 'brown', 'fox')); + + $this->assertTokenization( + 'Quack quack QUACK', + array('quack')); + + $this->assertTokenization( + '', + array()); + + $this->assertTokenization( + ' [ - ] ', + array()); + + $this->assertTokenization( + 'jury-rigged', + array('jury', 'rigged')); + + $this->assertTokenization( + '[[ brackets ]] [-] ]-[ tie-fighters', + array('brackets', 'tie', 'fighters')); + } + + private function assertTokenization($input, $expect) { + $this->assertEqual( + $expect, + PhabricatorTypeaheadDatasource::tokenizeString($input), + pht('Tokenization of "%s"', $input)); + } + +} diff --git a/src/applications/typeahead/datasource/PhabricatorTypeaheadDatasource.php b/src/applications/typeahead/datasource/PhabricatorTypeaheadDatasource.php index 163b6c40b3..cfea3bd30c 100644 --- a/src/applications/typeahead/datasource/PhabricatorTypeaheadDatasource.php +++ b/src/applications/typeahead/datasource/PhabricatorTypeaheadDatasource.php @@ -141,8 +141,18 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject { return array(); } - $tokens = preg_split('/\s+|[-\[\]]/u', $string); - return array_unique($tokens); + $tokens = preg_split('/[\s\[\]-]+/u', $string); + $tokens = array_unique($tokens); + + // Make sure we don't return the empty token, as this will boil down to a + // JOIN against every token. + foreach ($tokens as $key => $value) { + if (!strlen($value)) { + unset($tokens[$key]); + } + } + + return array_values($tokens); } public function getTokens() {