From 2748f83e12252d79beedca4f270e7967a8993000 Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 16 Apr 2020 11:42:32 -0700 Subject: [PATCH] Modularize Ferret fulltext functions Summary: Ref T13511. Currently, Ferret fulltext field functions (like "title:") are hard-coded. Modularize them so extensions may define new ones. Test Plan: Added a new custom field which emits data for the indexer, searched for "animal-noises:moo", "animal-noises:-", etc., in global search and application search. Maniphest Tasks: T13511 Differential Revision: https://secure.phabricator.com/D21131 --- src/__phutil_library_map__.php | 4 + .../compiler/PhutilSearchQueryCompiler.php | 2 +- .../PhutilSearchQueryCompilerTestCase.php | 8 ++ ...abricatorFerretFulltextEngineExtension.php | 20 +++ .../search/ferret/PhabricatorFerretEngine.php | 48 ++++--- .../FerretConfigurableSearchFunction.php | 31 +++++ .../ferret/function/FerretSearchFunction.php | 122 ++++++++++++++++++ .../PhabricatorFulltextEngineExtension.php | 4 + .../search/query/PhabricatorFulltextToken.php | 16 ++- ...PhabricatorCursorPagedPolicyAwareQuery.php | 38 +++++- 10 files changed, 261 insertions(+), 32 deletions(-) create mode 100644 src/applications/search/ferret/function/FerretConfigurableSearchFunction.php create mode 100644 src/applications/search/ferret/function/FerretSearchFunction.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index cf618237d4..840575911f 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -1288,6 +1288,8 @@ phutil_register_library_map(array( 'FeedPushWorker' => 'applications/feed/worker/FeedPushWorker.php', 'FeedQueryConduitAPIMethod' => 'applications/feed/conduit/FeedQueryConduitAPIMethod.php', 'FeedStoryNotificationGarbageCollector' => 'applications/notification/garbagecollector/FeedStoryNotificationGarbageCollector.php', + 'FerretConfigurableSearchFunction' => 'applications/search/ferret/function/FerretConfigurableSearchFunction.php', + 'FerretSearchFunction' => 'applications/search/ferret/function/FerretSearchFunction.php', 'FileAllocateConduitAPIMethod' => 'applications/files/conduit/FileAllocateConduitAPIMethod.php', 'FileConduitAPIMethod' => 'applications/files/conduit/FileConduitAPIMethod.php', 'FileCreateMailReceiver' => 'applications/files/mail/FileCreateMailReceiver.php', @@ -7402,6 +7404,8 @@ phutil_register_library_map(array( 'FeedPushWorker' => 'PhabricatorWorker', 'FeedQueryConduitAPIMethod' => 'FeedConduitAPIMethod', 'FeedStoryNotificationGarbageCollector' => 'PhabricatorGarbageCollector', + 'FerretConfigurableSearchFunction' => 'FerretSearchFunction', + 'FerretSearchFunction' => 'Phobject', 'FileAllocateConduitAPIMethod' => 'FileConduitAPIMethod', 'FileConduitAPIMethod' => 'ConduitAPIMethod', 'FileCreateMailReceiver' => 'PhabricatorApplicationMailReceiver', diff --git a/src/applications/search/compiler/PhutilSearchQueryCompiler.php b/src/applications/search/compiler/PhutilSearchQueryCompiler.php index 891cfcce28..8f99682f37 100644 --- a/src/applications/search/compiler/PhutilSearchQueryCompiler.php +++ b/src/applications/search/compiler/PhutilSearchQueryCompiler.php @@ -148,7 +148,7 @@ final class PhutilSearchQueryCompiler if ($enable_functions) { $found = false; for ($jj = $ii; $jj < $length; $jj++) { - if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) { + if (preg_match('/^[a-zA-Z-]\z/u', $query[$jj])) { continue; } if ($query[$jj] == ':') { diff --git a/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php index dca1b16e94..8576e800c7 100644 --- a/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php +++ b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php @@ -197,6 +197,14 @@ final class PhutilSearchQueryCompilerTestCase // impossible. 'title:- title:x' => false, 'title:- title:~' => false, + + 'abcdefghijklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPQRSTUVWXYZ:xyz' => array( + array( + 'abcdefghijklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPQRSTUVWXYZ', + $op_and, + 'xyz', + ), + ), ); $this->assertCompileFunctionQueries($function_tests); diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php index a4d6637ad1..4c3382641d 100644 --- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php +++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php @@ -253,4 +253,24 @@ final class PhabricatorFerretFulltextEngineExtension $old_id); } + public function newFerretSearchFunctions() { + return array( + id(new FerretConfigurableSearchFunction()) + ->setFerretFunctionName('all') + ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL), + id(new FerretConfigurableSearchFunction()) + ->setFerretFunctionName('title') + ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_TITLE), + id(new FerretConfigurableSearchFunction()) + ->setFerretFunctionName('body') + ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_BODY), + id(new FerretConfigurableSearchFunction()) + ->setFerretFunctionName('core') + ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_CORE), + id(new FerretConfigurableSearchFunction()) + ->setFerretFunctionName('comment') + ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_COMMENT), + ); + } + } diff --git a/src/applications/search/ferret/PhabricatorFerretEngine.php b/src/applications/search/ferret/PhabricatorFerretEngine.php index ffba15c369..3f36893b40 100644 --- a/src/applications/search/ferret/PhabricatorFerretEngine.php +++ b/src/applications/search/ferret/PhabricatorFerretEngine.php @@ -2,6 +2,10 @@ abstract class PhabricatorFerretEngine extends Phobject { + private $fieldMap = array(); + private $ferretFunctions; + private $templateObject; + abstract public function getApplicationName(); abstract public function getScopeName(); abstract public function newSearchEngine(); @@ -14,39 +18,31 @@ abstract class PhabricatorFerretEngine extends Phobject { return 1000; } - public function getFieldForFunction($function) { - $function = phutil_utf8_strtolower($function); + final public function getFunctionForName($raw_name) { + if (isset($this->fieldMap[$raw_name])) { + return $this->fieldMap[$raw_name]; + } - $map = $this->getFunctionMap(); - if (!isset($map[$function])) { + $normalized_name = + FerretSearchFunction::getNormalizedFunctionName($raw_name); + + if ($this->ferretFunctions === null) { + $functions = FerretSearchFunction::newFerretSearchFunctions(); + $this->ferretFunctions = $functions; + } + + if (!isset($this->ferretFunctions[$normalized_name])) { throw new PhutilSearchQueryCompilerSyntaxException( pht( 'Unknown search function "%s". Supported functions are: %s.', - $function, - implode(', ', array_keys($map)))); + $raw_name, + implode(', ', array_keys($this->ferretFunctions)))); } - return $map[$function]['field']; - } + $function = $this->ferretFunctions[$normalized_name]; + $this->fieldMap[$raw_name] = $function; - private function getFunctionMap() { - return array( - 'all' => array( - 'field' => PhabricatorSearchDocumentFieldType::FIELD_ALL, - ), - 'title' => array( - 'field' => PhabricatorSearchDocumentFieldType::FIELD_TITLE, - ), - 'body' => array( - 'field' => PhabricatorSearchDocumentFieldType::FIELD_BODY, - ), - 'core' => array( - 'field' => PhabricatorSearchDocumentFieldType::FIELD_CORE, - ), - 'comment' => array( - 'field' => PhabricatorSearchDocumentFieldType::FIELD_COMMENT, - ), - ); + return $this->fieldMap[$raw_name]; } public function newStemmer() { diff --git a/src/applications/search/ferret/function/FerretConfigurableSearchFunction.php b/src/applications/search/ferret/function/FerretConfigurableSearchFunction.php new file mode 100644 index 0000000000..2e622a8ee7 --- /dev/null +++ b/src/applications/search/ferret/function/FerretConfigurableSearchFunction.php @@ -0,0 +1,31 @@ +ferretFunctionName = $ferret_function_name; + return $this; + } + + public function getFerretFunctionName() { + return $this->ferretFunctionName; + } + + public function setFerretFieldKey($ferret_field_key) { + $this->ferretFieldKey = $ferret_field_key; + return $this; + } + + public function getFerretFieldKey() { + return $this->ferretFieldKey; + } + +} diff --git a/src/applications/search/ferret/function/FerretSearchFunction.php b/src/applications/search/ferret/function/FerretSearchFunction.php new file mode 100644 index 0000000000..60886c2fb6 --- /dev/null +++ b/src/applications/search/ferret/function/FerretSearchFunction.php @@ -0,0 +1,122 @@ +newFerretSearchFunctions(); + + if (!is_array($functions)) { + throw new Exception( + pht( + 'Expected fulltext engine extension ("%s") to return a '. + 'list of functions from "newFerretSearchFunctions()", '. + 'got "%s".', + get_class($extension), + phutil_describe_type($functions))); + } + + foreach ($functions as $idx => $function) { + if (!($function instanceof FerretSearchFunction)) { + throw new Exception( + pht( + 'Expected fulltext engine extension ("%s") to return a list '. + 'of "FerretSearchFunction" objects from '. + '"newFerretSearchFunctions()", but found something else '. + '("%s") at index "%s".', + get_class($extension), + phutil_describe_type($function), + $idx)); + } + + $function_name = $function->getFerretFunctionName(); + + self::validateFerretFunctionName($function_name); + + $normal_name = self::getNormalizedFunctionName( + $function_name); + if ($normal_name !== $function_name) { + throw new Exception( + pht( + 'Ferret function "%s" is specified with a denormalized name. '. + 'Instead, specify the function using the normalized '. + 'function name ("%s").', + $normal_name)); + } + + if (isset($function_map[$function_name])) { + $other_extension = $function_map[$function_name]; + throw new Exception( + pht( + 'Two different fulltext engine extensions ("%s" and "%s") '. + 'both define a search function with the same name ("%s"). '. + 'Each function must have a unique name.', + get_class($extension), + get_class($other_extension), + $function_name)); + } + $function_map[$function_name] = $extension; + + $field_key = $function->getFerretFieldKey(); + + self::validateFerretFunctionFieldKey($field_key); + + if (isset($field_map[$field_key])) { + $other_extension = $field_map[$field_key]; + throw new Exception( + pht( + 'Two different fulltext engine extensions ("%s" and "%s") '. + 'both define a search function with the same key ("%s"). '. + 'Each function must have a unique key.', + get_class($extension), + get_class($other_extension), + $field_key)); + } + $field_map[$field_key] = $extension; + + $results[$function_name] = $function; + } + } + + ksort($results); + + return $results; + } + +} diff --git a/src/applications/search/index/PhabricatorFulltextEngineExtension.php b/src/applications/search/index/PhabricatorFulltextEngineExtension.php index 52aabe7c8b..e8761b2e01 100644 --- a/src/applications/search/index/PhabricatorFulltextEngineExtension.php +++ b/src/applications/search/index/PhabricatorFulltextEngineExtension.php @@ -39,4 +39,8 @@ abstract class PhabricatorFulltextEngineExtension extends Phobject { ->execute(); } + public function newFerretSearchFunctions() { + return array(); + } + } diff --git a/src/applications/search/query/PhabricatorFulltextToken.php b/src/applications/search/query/PhabricatorFulltextToken.php index e99c2151c7..ba5f76e1e1 100644 --- a/src/applications/search/query/PhabricatorFulltextToken.php +++ b/src/applications/search/query/PhabricatorFulltextToken.php @@ -43,6 +43,12 @@ final class PhabricatorFulltextToken extends Phobject { $tip = null; $icon = null; + $name = $token->getValue(); + $function = $token->getFunction(); + if ($function !== null) { + $name = pht('%s: %s', $function, $name); + } + if ($this->getIsShort()) { $shade = PHUITagView::COLOR_GREY; $tip = pht('Ignored Short Word'); @@ -64,6 +70,14 @@ final class PhabricatorFulltextToken extends Phobject { $tip = pht('Exact Search'); $shade = PHUITagView::COLOR_GREEN; break; + case PhutilSearchQueryCompiler::OPERATOR_PRESENT: + $name = pht('Field Present: %s', $function); + $shade = PHUITagView::COLOR_GREEN; + break; + case PhutilSearchQueryCompiler::OPERATOR_ABSENT: + $name = pht('Field Absent: %s', $function); + $shade = PHUITagView::COLOR_RED; + break; default: $shade = PHUITagView::COLOR_BLUE; break; @@ -73,7 +87,7 @@ final class PhabricatorFulltextToken extends Phobject { $tag = id(new PHUITagView()) ->setType(PHUITagView::TYPE_SHADE) ->setColor($shade) - ->setName($token->getValue()); + ->setName($name); if ($tip !== null) { Javelin::initBehavior('phabricator-tooltips'); diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php index dd5826bff3..cb3a62c6ab 100644 --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -1815,7 +1815,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $function = $default_function; } - $raw_field = $engine->getFieldForFunction($function); + $function_def = $engine->getFunctionForName($function); // NOTE: The query compiler guarantees that a query can not make a // field both "present" and "absent", so it's safe to just use the @@ -1829,7 +1829,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $alias = 'ftfield_'.$idx++; $table_map[$function] = array( 'alias' => $alias, - 'key' => $raw_field, + 'function' => $function_def, 'optional' => $is_optional, ); } @@ -1838,7 +1838,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery // Join the title field separately so we can rank results. $table_map['rank'] = array( 'alias' => 'ft_rank', - 'key' => PhabricatorSearchDocumentFieldType::FIELD_TITLE, + 'function' => $engine->getFunctionForName('title'), // See T13345. Not every document has a title, so we want to LEFT JOIN // this table to avoid excluding documents with no title that match @@ -2130,6 +2130,36 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $ngram); } + $object = $this->newResultObject(); + if (!$object) { + throw new Exception( + pht( + 'Query class ("%s") must define "newResultObject()" to use '. + 'Ferret constraints.', + get_class($this))); + } + + // See T13511. If we have a fulltext query which uses valid field + // functions, but at least one of the functions applies to a field which + // the object can never have, the query can never match anything. Detect + // this and return an empty result set. + + // (Even if the query is "field is absent" or "field does not contain + // such-and-such", the interpretation is that these constraints are + // not meaningful when applied to an object which can never have the + // field.) + + $functions = ipull($this->ferretTables, 'function'); + $functions = mpull($functions, null, 'getFerretFunctionName'); + foreach ($functions as $function) { + if (!$function->supportsObject($object)) { + throw new PhabricatorEmptyQueryException( + pht( + 'This query uses a fulltext function which this document '. + 'type does not support.')); + } + } + foreach ($this->ferretTables as $table) { $alias = $table['alias']; @@ -2148,7 +2178,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $alias, $alias, $alias, - $table['key']); + $table['function']->getFerretFieldKey()); } return $joins;