mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-26 08:42:41 +01:00
Add support for relevance-ranking Ferret engine results
Summary: Ref T12819. "Relevance" here just means "how many of your search terms are present in the title?" but that's about the best we can do anyway. Test Plan: Indexed tasks "A B", "A Z", "Z B", and "Z Z" (all with "A B" in comments). Searched for "A B". Got results ranked in the listed order, with "A B" as the most relevant hit for query "A B". Reviewers: chad Reviewed By: chad Maniphest Tasks: T12819 Differential Revision: https://secure.phabricator.com/D18539
This commit is contained in:
parent
af7c92f2c6
commit
64b7778f32
1 changed files with 120 additions and 1 deletions
|
@ -251,6 +251,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
}
|
}
|
||||||
|
|
||||||
$select[] = $this->buildEdgeLogicSelectClause($conn);
|
$select[] = $this->buildEdgeLogicSelectClause($conn);
|
||||||
|
$select[] = $this->buildFerretSelectClause($conn);
|
||||||
|
|
||||||
return $select;
|
return $select;
|
||||||
}
|
}
|
||||||
|
@ -769,6 +770,13 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($this->supportsFerretEngine()) {
|
||||||
|
$orders['relevance'] = array(
|
||||||
|
'vector' => array('rank', 'id'),
|
||||||
|
'name' => pht('Relevence'),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return $orders;
|
return $orders;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -961,6 +969,14 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($this->supportsFerretEngine()) {
|
||||||
|
$columns['rank'] = array(
|
||||||
|
'table' => null,
|
||||||
|
'column' => '_ft_rank',
|
||||||
|
'type' => 'int',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
$cache->setKey($cache_key, $columns);
|
$cache->setKey($cache_key, $columns);
|
||||||
|
|
||||||
return $columns;
|
return $columns;
|
||||||
|
@ -1385,10 +1401,23 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
/* -( Ferret )------------------------------------------------------------- */
|
/* -( Ferret )------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
|
public function supportsFerretEngine() {
|
||||||
|
$object = $this->newResultObject();
|
||||||
|
return ($object instanceof PhabricatorFerretInterface);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public function withFerretConstraint(
|
public function withFerretConstraint(
|
||||||
PhabricatorFerretEngine $engine,
|
PhabricatorFerretEngine $engine,
|
||||||
array $fulltext_tokens) {
|
array $fulltext_tokens) {
|
||||||
|
|
||||||
|
if (!$this->supportsFerretEngine()) {
|
||||||
|
throw new Exception(
|
||||||
|
pht(
|
||||||
|
'Query ("%s") does not support the Ferret fulltext engine.',
|
||||||
|
get_class($this)));
|
||||||
|
}
|
||||||
|
|
||||||
if ($this->ferretEngine) {
|
if ($this->ferretEngine) {
|
||||||
throw new Exception(
|
throw new Exception(
|
||||||
pht(
|
pht(
|
||||||
|
@ -1416,7 +1445,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
$raw_field = $engine->getFieldForFunction($function);
|
$raw_field = $engine->getFieldForFunction($function);
|
||||||
|
|
||||||
if (!isset($table_map[$function])) {
|
if (!isset($table_map[$function])) {
|
||||||
$alias = 'ftfield'.$idx++;
|
$alias = 'ftfield_'.$idx++;
|
||||||
$table_map[$function] = array(
|
$table_map[$function] = array(
|
||||||
'alias' => $alias,
|
'alias' => $alias,
|
||||||
'key' => $raw_field,
|
'key' => $raw_field,
|
||||||
|
@ -1426,11 +1455,101 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
$current_function = $function;
|
$current_function = $function;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Join the title field separately so we can rank results.
|
||||||
|
$table_map['rank'] = array(
|
||||||
|
'alias' => 'ft_rank',
|
||||||
|
'key' => PhabricatorSearchDocumentFieldType::FIELD_TITLE,
|
||||||
|
);
|
||||||
|
|
||||||
$this->ferretTables = $table_map;
|
$this->ferretTables = $table_map;
|
||||||
|
|
||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected function buildFerretSelectClause(AphrontDatabaseConnection $conn) {
|
||||||
|
$select = array();
|
||||||
|
|
||||||
|
if (!$this->supportsFerretEngine()) {
|
||||||
|
return $select;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$this->ferretEngine) {
|
||||||
|
$select[] = '0 _ft_rank';
|
||||||
|
return $select;
|
||||||
|
}
|
||||||
|
|
||||||
|
$engine = $this->ferretEngine;
|
||||||
|
$stemmer = $engine->newStemmer();
|
||||||
|
|
||||||
|
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
|
||||||
|
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
|
||||||
|
$table_alias = 'ft_rank';
|
||||||
|
|
||||||
|
$parts = array();
|
||||||
|
foreach ($this->ferretTokens as $fulltext_token) {
|
||||||
|
$raw_token = $fulltext_token->getToken();
|
||||||
|
$value = $raw_token->getValue();
|
||||||
|
|
||||||
|
if ($raw_token->getOperator() == $op_not) {
|
||||||
|
// Ignore "not" terms when ranking, since they aren't useful.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($raw_token->getOperator() == $op_sub) {
|
||||||
|
$is_substring = true;
|
||||||
|
} else {
|
||||||
|
$is_substring = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($is_substring) {
|
||||||
|
$parts[] = qsprintf(
|
||||||
|
$conn,
|
||||||
|
'IF(%T.rawCorpus LIKE %~, 2, 0)',
|
||||||
|
$table_alias,
|
||||||
|
$value);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($raw_token->isQuoted()) {
|
||||||
|
$is_quoted = true;
|
||||||
|
$is_stemmed = false;
|
||||||
|
} else {
|
||||||
|
$is_quoted = false;
|
||||||
|
$is_stemmed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$term_constraints = array();
|
||||||
|
|
||||||
|
$term_value = $engine->newTermsCorpus($value);
|
||||||
|
|
||||||
|
$parts[] = qsprintf(
|
||||||
|
$conn,
|
||||||
|
'IF(%T.termCorpus LIKE %~, 2, 0)',
|
||||||
|
$table_alias,
|
||||||
|
$term_value);
|
||||||
|
|
||||||
|
if ($is_stemmed) {
|
||||||
|
$stem_value = $stemmer->stemToken($value);
|
||||||
|
$stem_value = $engine->newTermsCorpus($stem_value);
|
||||||
|
|
||||||
|
$parts[] = qsprintf(
|
||||||
|
$conn,
|
||||||
|
'IF(%T.normalCorpus LIKE %~, 1, 0)',
|
||||||
|
$table_alias,
|
||||||
|
$stem_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
$parts[] = '0';
|
||||||
|
}
|
||||||
|
|
||||||
|
$select[] = qsprintf(
|
||||||
|
$conn,
|
||||||
|
'%Q _ft_rank',
|
||||||
|
implode(' + ', $parts));
|
||||||
|
|
||||||
|
return $select;
|
||||||
|
}
|
||||||
|
|
||||||
protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) {
|
protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) {
|
||||||
if (!$this->ferretEngine) {
|
if (!$this->ferretEngine) {
|
||||||
return array();
|
return array();
|
||||||
|
|
Loading…
Reference in a new issue