From a2a2b3f7f4b590ce211f8e02f8a76060313729cb Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 7 Sep 2017 09:09:39 -0700 Subject: [PATCH] Sort global fulltext results by overall relevance Summary: Ref T12819. Currently, under the Ferret engine, we query each application's index separately and then aggregate the results. At the moment, results are aggregated by type first, then by actual rank. For example, all the revisions appear first, then all the tasks. Instead, surface the internal ranking data from the underlying query and sort by it. Test Plan: Searched for "A B" with a task named "A B" and a revision named "A". Saw task first. Broadly, saw mixed task and revision order in result sets. Reviewers: chad Reviewed By: chad Maniphest Tasks: T12819 Differential Revision: https://secure.phabricator.com/D18551 --- src/__phutil_library_map__.php | 2 + .../query/DifferentialRevisionQuery.php | 2 +- .../maniphest/query/ManiphestTaskQuery.php | 1 + .../ferret/PhabricatorFerretMetadata.php | 41 +++++++++++++++++++ ...PhabricatorFerretFulltextStorageEngine.php | 13 ++++++ ...PhabricatorCursorPagedPolicyAwareQuery.php | 35 ++++++++++++++++ 6 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 src/applications/search/ferret/PhabricatorFerretMetadata.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index e81239f9c0..ce3a86b98c 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -2840,6 +2840,7 @@ phutil_register_library_map(array( 'PhabricatorFerretFulltextEngineExtension' => 'applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php', 'PhabricatorFerretFulltextStorageEngine' => 'applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php', 'PhabricatorFerretInterface' => 'applications/search/ferret/PhabricatorFerretInterface.php', + 'PhabricatorFerretMetadata' => 'applications/search/ferret/PhabricatorFerretMetadata.php', 'PhabricatorFerretNgrams' => 'applications/search/ferret/PhabricatorFerretNgrams.php', 'PhabricatorFerretSearchEngineExtension' => 'applications/search/engineextension/PhabricatorFerretSearchEngineExtension.php', 'PhabricatorFile' => 'applications/files/storage/PhabricatorFile.php', @@ -8175,6 +8176,7 @@ phutil_register_library_map(array( 'PhabricatorFerretField' => 'PhabricatorSearchDAO', 'PhabricatorFerretFulltextEngineExtension' => 'PhabricatorFulltextEngineExtension', 'PhabricatorFerretFulltextStorageEngine' => 'PhabricatorFulltextStorageEngine', + 'PhabricatorFerretMetadata' => 'Phobject', 'PhabricatorFerretNgrams' => 'PhabricatorSearchDAO', 'PhabricatorFerretSearchEngineExtension' => 'PhabricatorSearchEngineExtension', 'PhabricatorFile' => array( diff --git a/src/applications/differential/query/DifferentialRevisionQuery.php b/src/applications/differential/query/DifferentialRevisionQuery.php index 622eba564a..8fd91cbd7e 100644 --- a/src/applications/differential/query/DifferentialRevisionQuery.php +++ b/src/applications/differential/query/DifferentialRevisionQuery.php @@ -320,7 +320,7 @@ final class DifferentialRevisionQuery */ protected function loadPage() { $data = $this->loadData(); - + $data = $this->didLoadRawRows($data); $table = $this->newResultObject(); return $table->loadAllFromArray($data); } diff --git a/src/applications/maniphest/query/ManiphestTaskQuery.php b/src/applications/maniphest/query/ManiphestTaskQuery.php index 704a67f548..f009e5c2ef 100644 --- a/src/applications/maniphest/query/ManiphestTaskQuery.php +++ b/src/applications/maniphest/query/ManiphestTaskQuery.php @@ -247,6 +247,7 @@ final class ManiphestTaskQuery extends PhabricatorCursorPagedPolicyAwareQuery { break; } + $data = $this->didLoadRawRows($data); $tasks = $task_dao->loadAllFromArray($data); switch ($this->groupBy) { diff --git a/src/applications/search/ferret/PhabricatorFerretMetadata.php b/src/applications/search/ferret/PhabricatorFerretMetadata.php new file mode 100644 index 0000000000..8ab5e37e45 --- /dev/null +++ b/src/applications/search/ferret/PhabricatorFerretMetadata.php @@ -0,0 +1,41 @@ +engine = $engine; + return $this; + } + + public function getEngine() { + return $this->engine; + } + + public function setPHID($phid) { + $this->phid = $phid; + return $this; + } + + public function getPHID() { + return $this->phid; + } + + public function setRelevance($relevance) { + $this->relevance = $relevance; + return $this; + } + + public function getRelevance() { + return $this->relevance; + } + + public function getRelevanceSortVector() { + return id(new PhutilSortVector()) + ->addInt(-$this->getRelevance()); + } + +} diff --git a/src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php index b67cd7d43c..74e8771de7 100644 --- a/src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php +++ b/src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php @@ -52,6 +52,7 @@ final class PhabricatorFerretFulltextStorageEngine $viewer = PhabricatorUser::getOmnipotentUser(); $type_results = array(); + $metadata = array(); foreach ($type_map as $type => $spec) { $engine = $spec['engine']; $object = $spec['object']; @@ -83,6 +84,8 @@ final class PhabricatorFerretFulltextStorageEngine $results = $engine_query->execute(); $results = mpull($results, null, 'getPHID'); $type_results[$type] = $results; + + $metadata += $engine_query->getFerretMetadata(); } $list = array(); @@ -90,6 +93,16 @@ final class PhabricatorFerretFulltextStorageEngine $list += $results; } + // Currently, the list is grouped by object type. For example, all the + // tasks might be first, then all the revisions, and so on. In each group, + // the results are ordered properly. + + // Reorder the results so that the highest-ranking results come first, + // no matter which object types they belong to. + + $metadata = msort($metadata, 'getRelevanceSortVector'); + $list = array_select_keys($list, array_keys($metadata)) + $list; + $result_slice = array_slice($list, $offset, $limit, true); return array_keys($result_slice); } diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php index 569f79d824..42e6ed7b4e 100644 --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -31,6 +31,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery private $ferretTokens = array(); private $ferretTables = array(); private $ferretQuery; + private $ferretMetadata = array(); protected function getPageCursors(array $page) { return array( @@ -82,6 +83,18 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery return $this->beforeID; } + final public function getFerretMetadata() { + if (!$this->supportsFerretEngine()) { + throw new Exception( + pht( + 'Unable to retrieve Ferret engine metadata, this class ("%s") does '. + 'not support the Ferret engine.', + get_class($this))); + } + + return $this->ferretMetadata; + } + protected function loadStandardPage(PhabricatorLiskDAO $table) { $rows = $this->loadStandardPageRows($table); return $table->loadAllFromArray($rows); @@ -111,6 +124,27 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $this->buildOrderClause($conn), $this->buildLimitClause($conn)); + $rows = $this->didLoadRawRows($rows); + + return $rows; + } + + protected function didLoadRawRows(array $rows) { + if ($this->ferretEngine) { + foreach ($rows as $row) { + $phid = $row['phid']; + + $metadata = id(new PhabricatorFerretMetadata()) + ->setPHID($phid) + ->setEngine($this->ferretEngine) + ->setRelevance(idx($row, '_ft_rank')); + + $this->ferretMetadata[$phid] = $metadata; + + unset($row['_ft_rank']); + } + } + return $rows; } @@ -172,6 +206,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery if ($this->beforeID) { $results = array_reverse($results, $preserve_keys = true); } + return $results; }