From e5a495f435105397f0100024feb50d48306151c6 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 30 Aug 2017 08:37:05 -0700 Subject: [PATCH] Parse raw Ferret queries into tokens before processing them Summary: Ref T12819. Depends on D18492. Instead of passing a raw query into the Query layer, parse it first. This allows the query layer to figure out which parts should be substring vs term match, and would allow the SearchEngine layer to do `author:...` eventually by picking it out before sending it to the Ferret engine. Test Plan: Ran some Ferret queries. They work like before, except that nonsense like `-+"quack"` raises an exception now. Reviewers: chad Reviewed By: chad Maniphest Tasks: T12819 Differential Revision: https://secure.phabricator.com/D18499 --- .../query/ManiphestTaskSearchEngine.php | 17 +++++++++++- ...PhabricatorCursorPagedPolicyAwareQuery.php | 27 +++++++++++-------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/applications/maniphest/query/ManiphestTaskSearchEngine.php b/src/applications/maniphest/query/ManiphestTaskSearchEngine.php index 8eb4a416e3..956b8c168e 100644 --- a/src/applications/maniphest/query/ManiphestTaskSearchEngine.php +++ b/src/applications/maniphest/query/ManiphestTaskSearchEngine.php @@ -232,9 +232,24 @@ final class ManiphestTaskSearchEngine } if (strlen($map['ferret'])) { + $raw_query = $map['ferret']; + + $compiler = id(new PhutilSearchQueryCompiler()) + ->setEnableFunctions(true); + + $raw_tokens = $compiler->newTokens($raw_query); + + $fulltext_tokens = array(); + foreach ($raw_tokens as $raw_token) { + $fulltext_token = id(new PhabricatorFulltextToken()) + ->setToken($raw_token); + + $fulltext_tokens[] = $fulltext_token; + } + $query->withFerretConstraint( id(new ManiphestTask())->newFerretEngine(), - $map['ferret']); + $fulltext_tokens); } if ($map['parentIDs']) { diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php index c0b2bbc100..437ca0ce4f 100644 --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -28,7 +28,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery private $spaceIsArchived; private $ngrams = array(); private $ferretEngine; - private $ferretConstraints; + private $ferretTokens; protected function getPageCursors(array $page) { return array( @@ -1386,7 +1386,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery public function withFerretConstraint( PhabricatorFerretEngine $engine, - $raw_query) { + array $fulltext_tokens) { if ($this->ferretEngine) { throw new Exception( @@ -1394,12 +1394,12 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery 'Query may not have multiple fulltext constraints.')); } - if (!strlen($raw_query)) { + if (!$fulltext_tokens) { return $this; } $this->ferretEngine = $engine; - $this->ferretConstraints = preg_split('/\s+/', $raw_query); + $this->ferretTokens = $fulltext_tokens; return $this; } @@ -1416,9 +1416,11 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $ngram_table_name = $ngram_table->getTableName(); $flat = array(); - foreach ($this->ferretConstraints as $term) { - $value = $term; - $length = count(phutil_utf8v($term)); + foreach ($this->ferretTokens as $fulltext_token) { + $raw_token = $fulltext_token->getToken(); + $value = $raw_token->getValue(); + + $length = count(phutil_utf8v($value)); if ($length >= 3) { $ngrams = $ngram_engine->getNgramsFromString($value, 'query'); @@ -1509,19 +1511,22 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery } $where = array(); - foreach ($this->ferretConstraints as $constraint) { + foreach ($this->ferretTokens as $fulltext_token) { + $raw_token = $fulltext_token->getToken(); + $value = $raw_token->getValue(); + $where[] = qsprintf( $conn, '(ftfield.rawCorpus LIKE %~ OR ftfield.normalCorpus LIKE %~)', - $constraint, - $constraint); + $value, + $value); } return $where; } protected function shouldGroupFerretResultRows() { - return (bool)$this->ferretConstraints; + return (bool)$this->ferretTokens; }