1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-26 08:42:41 +01:00

Add support for relevance-ranking Ferret engine results

Summary: Ref T12819. "Relevance" here just means "how many of your search terms are present in the title?" but that's about the best we can do anyway.

Test Plan: Indexed tasks "A B", "A Z", "Z B", and "Z Z" (all with "A B" in comments). Searched for "A B". Got results ranked in the listed order, with "A B" as the most relevant hit for query "A B".

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T12819

Differential Revision: https://secure.phabricator.com/D18539
This commit is contained in:
epriestley 2017-09-05 15:58:34 -07:00
parent af7c92f2c6
commit 64b7778f32

View file

@ -251,6 +251,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
} }
$select[] = $this->buildEdgeLogicSelectClause($conn); $select[] = $this->buildEdgeLogicSelectClause($conn);
$select[] = $this->buildFerretSelectClause($conn);
return $select; return $select;
} }
@ -769,6 +770,13 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
} }
} }
if ($this->supportsFerretEngine()) {
$orders['relevance'] = array(
'vector' => array('rank', 'id'),
'name' => pht('Relevence'),
);
}
return $orders; return $orders;
} }
@ -961,6 +969,14 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
} }
} }
if ($this->supportsFerretEngine()) {
$columns['rank'] = array(
'table' => null,
'column' => '_ft_rank',
'type' => 'int',
);
}
$cache->setKey($cache_key, $columns); $cache->setKey($cache_key, $columns);
return $columns; return $columns;
@ -1385,10 +1401,23 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
/* -( Ferret )------------------------------------------------------------- */ /* -( Ferret )------------------------------------------------------------- */
public function supportsFerretEngine() {
$object = $this->newResultObject();
return ($object instanceof PhabricatorFerretInterface);
}
public function withFerretConstraint( public function withFerretConstraint(
PhabricatorFerretEngine $engine, PhabricatorFerretEngine $engine,
array $fulltext_tokens) { array $fulltext_tokens) {
if (!$this->supportsFerretEngine()) {
throw new Exception(
pht(
'Query ("%s") does not support the Ferret fulltext engine.',
get_class($this)));
}
if ($this->ferretEngine) { if ($this->ferretEngine) {
throw new Exception( throw new Exception(
pht( pht(
@ -1416,7 +1445,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$raw_field = $engine->getFieldForFunction($function); $raw_field = $engine->getFieldForFunction($function);
if (!isset($table_map[$function])) { if (!isset($table_map[$function])) {
$alias = 'ftfield'.$idx++; $alias = 'ftfield_'.$idx++;
$table_map[$function] = array( $table_map[$function] = array(
'alias' => $alias, 'alias' => $alias,
'key' => $raw_field, 'key' => $raw_field,
@ -1426,11 +1455,101 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$current_function = $function; $current_function = $function;
} }
// Join the title field separately so we can rank results.
$table_map['rank'] = array(
'alias' => 'ft_rank',
'key' => PhabricatorSearchDocumentFieldType::FIELD_TITLE,
);
$this->ferretTables = $table_map; $this->ferretTables = $table_map;
return $this; return $this;
} }
protected function buildFerretSelectClause(AphrontDatabaseConnection $conn) {
$select = array();
if (!$this->supportsFerretEngine()) {
return $select;
}
if (!$this->ferretEngine) {
$select[] = '0 _ft_rank';
return $select;
}
$engine = $this->ferretEngine;
$stemmer = $engine->newStemmer();
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
$table_alias = 'ft_rank';
$parts = array();
foreach ($this->ferretTokens as $fulltext_token) {
$raw_token = $fulltext_token->getToken();
$value = $raw_token->getValue();
if ($raw_token->getOperator() == $op_not) {
// Ignore "not" terms when ranking, since they aren't useful.
continue;
}
if ($raw_token->getOperator() == $op_sub) {
$is_substring = true;
} else {
$is_substring = false;
}
if ($is_substring) {
$parts[] = qsprintf(
$conn,
'IF(%T.rawCorpus LIKE %~, 2, 0)',
$table_alias,
$value);
continue;
}
if ($raw_token->isQuoted()) {
$is_quoted = true;
$is_stemmed = false;
} else {
$is_quoted = false;
$is_stemmed = true;
}
$term_constraints = array();
$term_value = $engine->newTermsCorpus($value);
$parts[] = qsprintf(
$conn,
'IF(%T.termCorpus LIKE %~, 2, 0)',
$table_alias,
$term_value);
if ($is_stemmed) {
$stem_value = $stemmer->stemToken($value);
$stem_value = $engine->newTermsCorpus($stem_value);
$parts[] = qsprintf(
$conn,
'IF(%T.normalCorpus LIKE %~, 1, 0)',
$table_alias,
$stem_value);
}
$parts[] = '0';
}
$select[] = qsprintf(
$conn,
'%Q _ft_rank',
implode(' + ', $parts));
return $select;
}
protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) { protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) {
if (!$this->ferretEngine) { if (!$this->ferretEngine) {
return array(); return array();