From 64b7778f3257ca49f40fe05a5ccb43788baa3e81 Mon Sep 17 00:00:00 2001 From: epriestley Date: Tue, 5 Sep 2017 15:58:34 -0700 Subject: [PATCH] Add support for relevance-ranking Ferret engine results Summary: Ref T12819. "Relevance" here just means "how many of your search terms are present in the title?" but that's about the best we can do anyway. Test Plan: Indexed tasks "A B", "A Z", "Z B", and "Z Z" (all with "A B" in comments). Searched for "A B". Got results ranked in the listed order, with "A B" as the most relevant hit for query "A B". Reviewers: chad Reviewed By: chad Maniphest Tasks: T12819 Differential Revision: https://secure.phabricator.com/D18539 --- ...PhabricatorCursorPagedPolicyAwareQuery.php | 121 +++++++++++++++++- 1 file changed, 120 insertions(+), 1 deletion(-) diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php index 6aa493f1e0..04fafa7cd2 100644 --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -251,6 +251,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery } $select[] = $this->buildEdgeLogicSelectClause($conn); + $select[] = $this->buildFerretSelectClause($conn); return $select; } @@ -769,6 +770,13 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery } } + if ($this->supportsFerretEngine()) { + $orders['relevance'] = array( + 'vector' => array('rank', 'id'), + 'name' => pht('Relevence'), + ); + } + return $orders; } @@ -961,6 +969,14 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery } } + if ($this->supportsFerretEngine()) { + $columns['rank'] = array( + 'table' => null, + 'column' => '_ft_rank', + 'type' => 'int', + ); + } + $cache->setKey($cache_key, $columns); return $columns; @@ -1385,10 +1401,23 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery /* -( Ferret )------------------------------------------------------------- */ + public function supportsFerretEngine() { + $object = $this->newResultObject(); + return ($object instanceof PhabricatorFerretInterface); + } + + public function withFerretConstraint( PhabricatorFerretEngine $engine, array $fulltext_tokens) { + if (!$this->supportsFerretEngine()) { + throw new Exception( + pht( + 'Query ("%s") does not support the Ferret fulltext engine.', + get_class($this))); + } + if ($this->ferretEngine) { throw new Exception( pht( @@ -1416,7 +1445,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $raw_field = $engine->getFieldForFunction($function); if (!isset($table_map[$function])) { - $alias = 'ftfield'.$idx++; + $alias = 'ftfield_'.$idx++; $table_map[$function] = array( 'alias' => $alias, 'key' => $raw_field, @@ -1426,11 +1455,101 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $current_function = $function; } + // Join the title field separately so we can rank results. + $table_map['rank'] = array( + 'alias' => 'ft_rank', + 'key' => PhabricatorSearchDocumentFieldType::FIELD_TITLE, + ); + $this->ferretTables = $table_map; return $this; } + protected function buildFerretSelectClause(AphrontDatabaseConnection $conn) { + $select = array(); + + if (!$this->supportsFerretEngine()) { + return $select; + } + + if (!$this->ferretEngine) { + $select[] = '0 _ft_rank'; + return $select; + } + + $engine = $this->ferretEngine; + $stemmer = $engine->newStemmer(); + + $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; + $op_not = PhutilSearchQueryCompiler::OPERATOR_NOT; + $table_alias = 'ft_rank'; + + $parts = array(); + foreach ($this->ferretTokens as $fulltext_token) { + $raw_token = $fulltext_token->getToken(); + $value = $raw_token->getValue(); + + if ($raw_token->getOperator() == $op_not) { + // Ignore "not" terms when ranking, since they aren't useful. + continue; + } + + if ($raw_token->getOperator() == $op_sub) { + $is_substring = true; + } else { + $is_substring = false; + } + + if ($is_substring) { + $parts[] = qsprintf( + $conn, + 'IF(%T.rawCorpus LIKE %~, 2, 0)', + $table_alias, + $value); + continue; + } + + if ($raw_token->isQuoted()) { + $is_quoted = true; + $is_stemmed = false; + } else { + $is_quoted = false; + $is_stemmed = true; + } + + $term_constraints = array(); + + $term_value = $engine->newTermsCorpus($value); + + $parts[] = qsprintf( + $conn, + 'IF(%T.termCorpus LIKE %~, 2, 0)', + $table_alias, + $term_value); + + if ($is_stemmed) { + $stem_value = $stemmer->stemToken($value); + $stem_value = $engine->newTermsCorpus($stem_value); + + $parts[] = qsprintf( + $conn, + 'IF(%T.normalCorpus LIKE %~, 1, 0)', + $table_alias, + $stem_value); + } + + $parts[] = '0'; + } + + $select[] = qsprintf( + $conn, + '%Q _ft_rank', + implode(' + ', $parts)); + + return $select; + } + protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) { if (!$this->ferretEngine) { return array();