1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-11 07:11:04 +01:00

Modularize Ferret fulltext functions

Summary: Ref T13511. Currently, Ferret fulltext field functions (like "title:") are hard-coded. Modularize them so extensions may define new ones.

Test Plan: Added a new custom field which emits data for the indexer, searched for "animal-noises:moo", "animal-noises:-", etc., in global search and application search.

Maniphest Tasks: T13511

Differential Revision: https://secure.phabricator.com/D21131
This commit is contained in:
epriestley 2020-04-16 11:42:32 -07:00
parent 894d9b6587
commit 2748f83e12
10 changed files with 261 additions and 32 deletions

View file

@ -1288,6 +1288,8 @@ phutil_register_library_map(array(
'FeedPushWorker' => 'applications/feed/worker/FeedPushWorker.php',
'FeedQueryConduitAPIMethod' => 'applications/feed/conduit/FeedQueryConduitAPIMethod.php',
'FeedStoryNotificationGarbageCollector' => 'applications/notification/garbagecollector/FeedStoryNotificationGarbageCollector.php',
'FerretConfigurableSearchFunction' => 'applications/search/ferret/function/FerretConfigurableSearchFunction.php',
'FerretSearchFunction' => 'applications/search/ferret/function/FerretSearchFunction.php',
'FileAllocateConduitAPIMethod' => 'applications/files/conduit/FileAllocateConduitAPIMethod.php',
'FileConduitAPIMethod' => 'applications/files/conduit/FileConduitAPIMethod.php',
'FileCreateMailReceiver' => 'applications/files/mail/FileCreateMailReceiver.php',
@ -7402,6 +7404,8 @@ phutil_register_library_map(array(
'FeedPushWorker' => 'PhabricatorWorker',
'FeedQueryConduitAPIMethod' => 'FeedConduitAPIMethod',
'FeedStoryNotificationGarbageCollector' => 'PhabricatorGarbageCollector',
'FerretConfigurableSearchFunction' => 'FerretSearchFunction',
'FerretSearchFunction' => 'Phobject',
'FileAllocateConduitAPIMethod' => 'FileConduitAPIMethod',
'FileConduitAPIMethod' => 'ConduitAPIMethod',
'FileCreateMailReceiver' => 'PhabricatorApplicationMailReceiver',

View file

@ -148,7 +148,7 @@ final class PhutilSearchQueryCompiler
if ($enable_functions) {
$found = false;
for ($jj = $ii; $jj < $length; $jj++) {
if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) {
if (preg_match('/^[a-zA-Z-]\z/u', $query[$jj])) {
continue;
}
if ($query[$jj] == ':') {

View file

@ -197,6 +197,14 @@ final class PhutilSearchQueryCompilerTestCase
// impossible.
'title:- title:x' => false,
'title:- title:~' => false,
'abcdefghijklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPQRSTUVWXYZ:xyz' => array(
array(
'abcdefghijklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPQRSTUVWXYZ',
$op_and,
'xyz',
),
),
);
$this->assertCompileFunctionQueries($function_tests);

View file

@ -253,4 +253,24 @@ final class PhabricatorFerretFulltextEngineExtension
$old_id);
}
public function newFerretSearchFunctions() {
return array(
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('all')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL),
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('title')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_TITLE),
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('body')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_BODY),
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('core')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_CORE),
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('comment')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_COMMENT),
);
}
}

View file

@ -2,6 +2,10 @@
abstract class PhabricatorFerretEngine extends Phobject {
private $fieldMap = array();
private $ferretFunctions;
private $templateObject;
abstract public function getApplicationName();
abstract public function getScopeName();
abstract public function newSearchEngine();
@ -14,39 +18,31 @@ abstract class PhabricatorFerretEngine extends Phobject {
return 1000;
}
public function getFieldForFunction($function) {
$function = phutil_utf8_strtolower($function);
final public function getFunctionForName($raw_name) {
if (isset($this->fieldMap[$raw_name])) {
return $this->fieldMap[$raw_name];
}
$map = $this->getFunctionMap();
if (!isset($map[$function])) {
$normalized_name =
FerretSearchFunction::getNormalizedFunctionName($raw_name);
if ($this->ferretFunctions === null) {
$functions = FerretSearchFunction::newFerretSearchFunctions();
$this->ferretFunctions = $functions;
}
if (!isset($this->ferretFunctions[$normalized_name])) {
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Unknown search function "%s". Supported functions are: %s.',
$function,
implode(', ', array_keys($map))));
$raw_name,
implode(', ', array_keys($this->ferretFunctions))));
}
return $map[$function]['field'];
}
$function = $this->ferretFunctions[$normalized_name];
$this->fieldMap[$raw_name] = $function;
private function getFunctionMap() {
return array(
'all' => array(
'field' => PhabricatorSearchDocumentFieldType::FIELD_ALL,
),
'title' => array(
'field' => PhabricatorSearchDocumentFieldType::FIELD_TITLE,
),
'body' => array(
'field' => PhabricatorSearchDocumentFieldType::FIELD_BODY,
),
'core' => array(
'field' => PhabricatorSearchDocumentFieldType::FIELD_CORE,
),
'comment' => array(
'field' => PhabricatorSearchDocumentFieldType::FIELD_COMMENT,
),
);
return $this->fieldMap[$raw_name];
}
public function newStemmer() {

View file

@ -0,0 +1,31 @@
<?php
final class FerretConfigurableSearchFunction
extends FerretSearchFunction {
private $ferretFunctionName;
private $ferretFieldKey;
public function supportsObject(PhabricatorFerretInterface $object) {
return true;
}
public function setFerretFunctionName($ferret_function_name) {
$this->ferretFunctionName = $ferret_function_name;
return $this;
}
public function getFerretFunctionName() {
return $this->ferretFunctionName;
}
public function setFerretFieldKey($ferret_field_key) {
$this->ferretFieldKey = $ferret_field_key;
return $this;
}
public function getFerretFieldKey() {
return $this->ferretFieldKey;
}
}

View file

@ -0,0 +1,122 @@
<?php
abstract class FerretSearchFunction
extends Phobject {
abstract public function getFerretFunctionName();
abstract public function getFerretFieldKey();
abstract public function supportsObject(PhabricatorFerretInterface $object);
final public static function getNormalizedFunctionName($name) {
return phutil_utf8_strtolower($name);
}
final public static function validateFerretFunctionName($function_name) {
if (!preg_match('/^[a-zA-Z-]+\z/', $function_name)) {
throw new Exception(
pht(
'Ferret search engine function name ("%s") is invalid. Function '.
'names must be nonempty and may only contain latin letters and '.
'hyphens.'));
}
}
final public static function validateFerretFunctionFieldKey($field_key) {
if (!preg_match('/^[a-z]{4}\z/', $field_key)) {
throw new Exception(
pht(
'Ferret search engine field key ("%s") is invalid. Field keys '.
'must be exactly four characters long and contain only '.
'lowercase latin letters.',
$field_key));
}
}
final public static function newFerretSearchFunctions() {
$extensions = PhabricatorFulltextEngineExtension::getAllExtensions();
$function_map = array();
$field_map = array();
$results = array();
foreach ($extensions as $extension) {
$functions = $extension->newFerretSearchFunctions();
if (!is_array($functions)) {
throw new Exception(
pht(
'Expected fulltext engine extension ("%s") to return a '.
'list of functions from "newFerretSearchFunctions()", '.
'got "%s".',
get_class($extension),
phutil_describe_type($functions)));
}
foreach ($functions as $idx => $function) {
if (!($function instanceof FerretSearchFunction)) {
throw new Exception(
pht(
'Expected fulltext engine extension ("%s") to return a list '.
'of "FerretSearchFunction" objects from '.
'"newFerretSearchFunctions()", but found something else '.
'("%s") at index "%s".',
get_class($extension),
phutil_describe_type($function),
$idx));
}
$function_name = $function->getFerretFunctionName();
self::validateFerretFunctionName($function_name);
$normal_name = self::getNormalizedFunctionName(
$function_name);
if ($normal_name !== $function_name) {
throw new Exception(
pht(
'Ferret function "%s" is specified with a denormalized name. '.
'Instead, specify the function using the normalized '.
'function name ("%s").',
$normal_name));
}
if (isset($function_map[$function_name])) {
$other_extension = $function_map[$function_name];
throw new Exception(
pht(
'Two different fulltext engine extensions ("%s" and "%s") '.
'both define a search function with the same name ("%s"). '.
'Each function must have a unique name.',
get_class($extension),
get_class($other_extension),
$function_name));
}
$function_map[$function_name] = $extension;
$field_key = $function->getFerretFieldKey();
self::validateFerretFunctionFieldKey($field_key);
if (isset($field_map[$field_key])) {
$other_extension = $field_map[$field_key];
throw new Exception(
pht(
'Two different fulltext engine extensions ("%s" and "%s") '.
'both define a search function with the same key ("%s"). '.
'Each function must have a unique key.',
get_class($extension),
get_class($other_extension),
$field_key));
}
$field_map[$field_key] = $extension;
$results[$function_name] = $function;
}
}
ksort($results);
return $results;
}
}

View file

@ -39,4 +39,8 @@ abstract class PhabricatorFulltextEngineExtension extends Phobject {
->execute();
}
public function newFerretSearchFunctions() {
return array();
}
}

View file

@ -43,6 +43,12 @@ final class PhabricatorFulltextToken extends Phobject {
$tip = null;
$icon = null;
$name = $token->getValue();
$function = $token->getFunction();
if ($function !== null) {
$name = pht('%s: %s', $function, $name);
}
if ($this->getIsShort()) {
$shade = PHUITagView::COLOR_GREY;
$tip = pht('Ignored Short Word');
@ -64,6 +70,14 @@ final class PhabricatorFulltextToken extends Phobject {
$tip = pht('Exact Search');
$shade = PHUITagView::COLOR_GREEN;
break;
case PhutilSearchQueryCompiler::OPERATOR_PRESENT:
$name = pht('Field Present: %s', $function);
$shade = PHUITagView::COLOR_GREEN;
break;
case PhutilSearchQueryCompiler::OPERATOR_ABSENT:
$name = pht('Field Absent: %s', $function);
$shade = PHUITagView::COLOR_RED;
break;
default:
$shade = PHUITagView::COLOR_BLUE;
break;
@ -73,7 +87,7 @@ final class PhabricatorFulltextToken extends Phobject {
$tag = id(new PHUITagView())
->setType(PHUITagView::TYPE_SHADE)
->setColor($shade)
->setName($token->getValue());
->setName($name);
if ($tip !== null) {
Javelin::initBehavior('phabricator-tooltips');

View file

@ -1815,7 +1815,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$function = $default_function;
}
$raw_field = $engine->getFieldForFunction($function);
$function_def = $engine->getFunctionForName($function);
// NOTE: The query compiler guarantees that a query can not make a
// field both "present" and "absent", so it's safe to just use the
@ -1829,7 +1829,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$alias = 'ftfield_'.$idx++;
$table_map[$function] = array(
'alias' => $alias,
'key' => $raw_field,
'function' => $function_def,
'optional' => $is_optional,
);
}
@ -1838,7 +1838,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
// Join the title field separately so we can rank results.
$table_map['rank'] = array(
'alias' => 'ft_rank',
'key' => PhabricatorSearchDocumentFieldType::FIELD_TITLE,
'function' => $engine->getFunctionForName('title'),
// See T13345. Not every document has a title, so we want to LEFT JOIN
// this table to avoid excluding documents with no title that match
@ -2130,6 +2130,36 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$ngram);
}
$object = $this->newResultObject();
if (!$object) {
throw new Exception(
pht(
'Query class ("%s") must define "newResultObject()" to use '.
'Ferret constraints.',
get_class($this)));
}
// See T13511. If we have a fulltext query which uses valid field
// functions, but at least one of the functions applies to a field which
// the object can never have, the query can never match anything. Detect
// this and return an empty result set.
// (Even if the query is "field is absent" or "field does not contain
// such-and-such", the interpretation is that these constraints are
// not meaningful when applied to an object which can never have the
// field.)
$functions = ipull($this->ferretTables, 'function');
$functions = mpull($functions, null, 'getFerretFunctionName');
foreach ($functions as $function) {
if (!$function->supportsObject($object)) {
throw new PhabricatorEmptyQueryException(
pht(
'This query uses a fulltext function which this document '.
'type does not support.'));
}
}
foreach ($this->ferretTables as $table) {
$alias = $table['alias'];
@ -2148,7 +2178,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$alias,
$alias,
$alias,
$table['key']);
$table['function']->getFerretFieldKey());
}
return $joins;