mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-18 11:30:55 +01:00
Separate sever-side typeahead queries into "prefix" and "content" phases
Summary: Ref T8510. When users type "platypus" into a typeahead, they want "Platypus Playground" to be a higher-ranked match than "AAA Platypus", even though the latter is alphabetically first. Specifically, the rule is: results which match the query as a prefix of the result text should rank above results which do not. I believe we now always get this right on the client side. However, WMF has at least one case (described in T8510) where we do not get it right on the server side, and thus the user sees the wrong result. The remaining issue is that if "platypus" matches more than 100 results, the result "Platypus Playground" may not appear in the result set at all, beacuse there are 100 copies of "AAA Platypus 1", "AAA Platypus 2", etc., first. So even though the client will apply the correct sort, it doesn't have the result the user wants and can't show it to them. To fix this, split the server-side query into two phases: - In the first phase, the "prefix" phase, we find results that **start with** "platypus". - In the second phase, the "content" phase, we find results that contain "platypus" anywhere. We skip the "prefix" phase if the user has not typed a query (for example, in the browse view). Test Plan: This is a lot of stuff, but the new ranking here puts projects which start with "w" at the top of the list. Lower down the list, you can see some projects which contain "w" but do not appear at the top (like "Serious Work"). {F1913931} Reviewers: chad Reviewed By: chad Maniphest Tasks: T8510 Differential Revision: https://secure.phabricator.com/D16838
This commit is contained in:
parent
6dc94fc10a
commit
9a1d59ad5b
8 changed files with 219 additions and 11 deletions
|
@ -17,6 +17,7 @@ final class PhabricatorPeopleQuery
|
|||
private $isApproved;
|
||||
private $nameLike;
|
||||
private $nameTokens;
|
||||
private $namePrefixes;
|
||||
|
||||
private $needPrimaryEmail;
|
||||
private $needProfile;
|
||||
|
@ -95,6 +96,11 @@ final class PhabricatorPeopleQuery
|
|||
return $this;
|
||||
}
|
||||
|
||||
public function withNamePrefixes(array $prefixes) {
|
||||
$this->namePrefixes = $prefixes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function needPrimaryEmail($need) {
|
||||
$this->needPrimaryEmail = $need;
|
||||
return $this;
|
||||
|
@ -256,6 +262,17 @@ final class PhabricatorPeopleQuery
|
|||
$this->usernames);
|
||||
}
|
||||
|
||||
if ($this->namePrefixes) {
|
||||
$parts = array();
|
||||
foreach ($this->namePrefixes as $name_prefix) {
|
||||
$parts[] = qsprintf(
|
||||
$conn,
|
||||
'user.username LIKE %>',
|
||||
$name_prefix);
|
||||
}
|
||||
$where[] = '('.implode(' OR ', $parts).')';
|
||||
}
|
||||
|
||||
if ($this->emails !== null) {
|
||||
$where[] = qsprintf(
|
||||
$conn,
|
||||
|
|
|
@ -17,13 +17,18 @@ final class PhabricatorPeopleDatasource
|
|||
|
||||
public function loadResults() {
|
||||
$viewer = $this->getViewer();
|
||||
$tokens = $this->getTokens();
|
||||
|
||||
$query = id(new PhabricatorPeopleQuery())
|
||||
->setOrderVector(array('username'));
|
||||
|
||||
if ($tokens) {
|
||||
$query->withNameTokens($tokens);
|
||||
if ($this->getPhase() == self::PHASE_PREFIX) {
|
||||
$prefix = $this->getPrefixQuery();
|
||||
$query->withNamePrefixes(array($prefix));
|
||||
} else {
|
||||
$tokens = $this->getTokens();
|
||||
if ($tokens) {
|
||||
$query->withNameTokens($tokens);
|
||||
}
|
||||
}
|
||||
|
||||
$users = $this->executeQuery($query);
|
||||
|
|
|
@ -12,6 +12,7 @@ final class PhabricatorProjectQuery
|
|||
private $slugMap;
|
||||
private $allSlugs;
|
||||
private $names;
|
||||
private $namePrefixes;
|
||||
private $nameTokens;
|
||||
private $icons;
|
||||
private $colors;
|
||||
|
@ -78,6 +79,11 @@ final class PhabricatorProjectQuery
|
|||
return $this;
|
||||
}
|
||||
|
||||
public function withNamePrefixes(array $prefixes) {
|
||||
$this->namePrefixes = $prefixes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function withNameTokens(array $tokens) {
|
||||
$this->nameTokens = array_values($tokens);
|
||||
return $this;
|
||||
|
@ -464,6 +470,17 @@ final class PhabricatorProjectQuery
|
|||
$this->names);
|
||||
}
|
||||
|
||||
if ($this->namePrefixes) {
|
||||
$parts = array();
|
||||
foreach ($this->namePrefixes as $name_prefix) {
|
||||
$parts[] = qsprintf(
|
||||
$conn,
|
||||
'name LIKE %>',
|
||||
$name_prefix);
|
||||
}
|
||||
$where[] = '('.implode(' OR ', $parts).')';
|
||||
}
|
||||
|
||||
if ($this->icons !== null) {
|
||||
$where[] = qsprintf(
|
||||
$conn,
|
||||
|
|
|
@ -28,7 +28,10 @@ final class PhabricatorProjectDatasource
|
|||
->needImages(true)
|
||||
->needSlugs(true);
|
||||
|
||||
if ($tokens) {
|
||||
if ($this->getPhase() == self::PHASE_PREFIX) {
|
||||
$prefix = $this->getPrefixQuery();
|
||||
$query->withNamePrefixes(array($prefix));
|
||||
} else if ($tokens) {
|
||||
$query->withNameTokens($tokens);
|
||||
}
|
||||
|
||||
|
|
|
@ -325,6 +325,11 @@ final class PhabricatorTypeaheadModularDatasourceController
|
|||
pht('Icon'),
|
||||
pht('Closed'),
|
||||
pht('Sprite'),
|
||||
pht('Color'),
|
||||
pht('Type'),
|
||||
pht('Unique'),
|
||||
pht('Auto'),
|
||||
pht('Phase'),
|
||||
));
|
||||
|
||||
$result_box = id(new PHUIObjectBoxView())
|
||||
|
|
|
@ -4,6 +4,8 @@ abstract class PhabricatorTypeaheadCompositeDatasource
|
|||
extends PhabricatorTypeaheadDatasource {
|
||||
|
||||
private $usable;
|
||||
private $prefixString;
|
||||
private $prefixLength;
|
||||
|
||||
abstract public function getComponentDatasources();
|
||||
|
||||
|
@ -22,9 +24,51 @@ abstract class PhabricatorTypeaheadCompositeDatasource
|
|||
}
|
||||
|
||||
public function loadResults() {
|
||||
$phases = array();
|
||||
|
||||
// We only need to do a prefix phase query if there's an actual query
|
||||
// string. If the user didn't type anything, nothing can possibly match it.
|
||||
if (strlen($this->getRawQuery())) {
|
||||
$phases[] = self::PHASE_PREFIX;
|
||||
}
|
||||
|
||||
$phases[] = self::PHASE_CONTENT;
|
||||
|
||||
$offset = $this->getOffset();
|
||||
$limit = $this->getLimit();
|
||||
|
||||
$results = array();
|
||||
foreach ($phases as $phase) {
|
||||
if ($limit) {
|
||||
$phase_limit = ($offset + $limit) - count($results);
|
||||
} else {
|
||||
$phase_limit = 0;
|
||||
}
|
||||
|
||||
$phase_results = $this->loadResultsForPhase(
|
||||
$phase,
|
||||
$phase_limit);
|
||||
|
||||
foreach ($phase_results as $result) {
|
||||
$results[] = $result;
|
||||
}
|
||||
|
||||
if ($limit) {
|
||||
if (count($results) >= $offset + $limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
protected function loadResultsForPhase($phase, $limit) {
|
||||
if ($phase == self::PHASE_PREFIX) {
|
||||
$this->prefixString = $this->getPrefixQuery();
|
||||
$this->prefixLength = strlen($this->prefixString);
|
||||
}
|
||||
|
||||
// If the input query is a function like `members(platy`, and we can
|
||||
// parse the function, we strip the function off and hand the stripped
|
||||
// query to child sources. This makes it easier to implement function
|
||||
|
@ -62,28 +106,110 @@ abstract class PhabricatorTypeaheadCompositeDatasource
|
|||
}
|
||||
|
||||
$source
|
||||
->setPhase($phase)
|
||||
->setFunctionStack($source_stack)
|
||||
->setRawQuery($source_query)
|
||||
->setQuery($this->getQuery())
|
||||
->setViewer($this->getViewer());
|
||||
|
||||
if ($limit) {
|
||||
$source->setLimit($offset + $limit);
|
||||
}
|
||||
|
||||
if ($is_browse) {
|
||||
$source->setIsBrowse(true);
|
||||
}
|
||||
|
||||
$source_results = $source->loadResults();
|
||||
$source_results = $source->didLoadResults($source_results);
|
||||
if ($limit) {
|
||||
// If we are loading results from a source with a limit, it may return
|
||||
// some results which belong to the wrong phase. We need an entire page
|
||||
// of valid results in the correct phase AFTER any results for the
|
||||
// wrong phase are filtered for pagination to work correctly.
|
||||
|
||||
// To make sure we can get there, we fetch more and more results until
|
||||
// enough of them survive filtering to generate a full page.
|
||||
|
||||
// We start by fetching 150% of the results than we think we need, and
|
||||
// double the amount we overfetch by each time.
|
||||
$factor = 1.5;
|
||||
while (true) {
|
||||
$query_source = clone $source;
|
||||
$total = (int)ceil($limit * $factor) + 1;
|
||||
$query_source->setLimit($total);
|
||||
|
||||
$source_results = $query_source->loadResultsForPhase(
|
||||
$phase,
|
||||
$limit);
|
||||
|
||||
// If there are fewer unfiltered results than we asked for, we know
|
||||
// this is the entire result set and we don't need to keep going.
|
||||
if (count($source_results) < $total) {
|
||||
$source_results = $query_source->didLoadResults($source_results);
|
||||
$source_results = $this->filterPhaseResults(
|
||||
$phase,
|
||||
$source_results);
|
||||
break;
|
||||
}
|
||||
|
||||
// Otherwise, this result set have everything we need, or may not.
|
||||
// Filter the results that are part of the wrong phase out first...
|
||||
$source_results = $query_source->didLoadResults($source_results);
|
||||
$source_results = $this->filterPhaseResults($phase, $source_results);
|
||||
|
||||
// Now check if we have enough results left. If we do, we're all set.
|
||||
if (count($source_results) >= $total) {
|
||||
break;
|
||||
}
|
||||
|
||||
// We filtered out too many results to have a full page left, so we
|
||||
// need to run the query again, asking for even more results. We'll
|
||||
// keep doing this until we get a full page or get all of the
|
||||
// results.
|
||||
$factor = $factor * 2;
|
||||
}
|
||||
} else {
|
||||
$source_results = $source->loadResults();
|
||||
$source_results = $source->didLoadResults($source_results);
|
||||
$source_results = $this->filterPhaseResults($phase, $source_results);
|
||||
}
|
||||
|
||||
$results[] = $source_results;
|
||||
}
|
||||
|
||||
$results = array_mergev($results);
|
||||
$results = msort($results, 'getSortKey');
|
||||
|
||||
$count = count($results);
|
||||
$results = $this->sliceResults($results);
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
private function filterPhaseResults($phase, $source_results) {
|
||||
foreach ($source_results as $key => $source_result) {
|
||||
$result_phase = $this->getResultPhase($source_result);
|
||||
|
||||
if ($result_phase != $phase) {
|
||||
unset($source_results[$key]);
|
||||
continue;
|
||||
}
|
||||
|
||||
$source_result->setPhase($result_phase);
|
||||
}
|
||||
|
||||
return $source_results;
|
||||
}
|
||||
|
||||
private function getResultPhase(PhabricatorTypeaheadResult $result) {
|
||||
if ($this->prefixLength) {
|
||||
$result_name = phutil_utf8_strtolower($result->getName());
|
||||
if (!strncmp($result_name, $this->prefixString, $this->prefixLength)) {
|
||||
return self::PHASE_PREFIX;
|
||||
}
|
||||
}
|
||||
|
||||
return self::PHASE_CONTENT;
|
||||
}
|
||||
|
||||
protected function sliceResults(array $results) {
|
||||
$offset = $this->getOffset();
|
||||
$limit = $this->getLimit();
|
||||
|
||||
if ($offset || $limit) {
|
||||
if (!$limit) {
|
||||
$limit = count($results);
|
||||
|
|
|
@ -13,6 +13,10 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject {
|
|||
private $parameters = array();
|
||||
private $functionStack = array();
|
||||
private $isBrowse;
|
||||
private $phase = self::PHASE_CONTENT;
|
||||
|
||||
const PHASE_PREFIX = 'prefix';
|
||||
const PHASE_CONTENT = 'content';
|
||||
|
||||
public function setLimit($limit) {
|
||||
$this->limit = $limit;
|
||||
|
@ -46,6 +50,10 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject {
|
|||
return $this;
|
||||
}
|
||||
|
||||
public function getPrefixQuery() {
|
||||
return phutil_utf8_strtolower($this->getRawQuery());
|
||||
}
|
||||
|
||||
public function getRawQuery() {
|
||||
return $this->rawQuery;
|
||||
}
|
||||
|
@ -81,6 +89,15 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject {
|
|||
return $this->isBrowse;
|
||||
}
|
||||
|
||||
public function setPhase($phase) {
|
||||
$this->phase = $phase;
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function getPhase() {
|
||||
return $this->phase;
|
||||
}
|
||||
|
||||
public function getDatasourceURI() {
|
||||
$uri = new PhutilURI('/typeahead/class/'.get_class($this).'/');
|
||||
$uri->setQueryParams($this->parameters);
|
||||
|
@ -106,6 +123,13 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject {
|
|||
abstract public function getDatasourceApplicationClass();
|
||||
abstract public function loadResults();
|
||||
|
||||
protected function loadResultsForPhase($phase, $limit) {
|
||||
// By default, sources just load all of their results in every phase and
|
||||
// rely on filtering at a higher level to sequence phases correctly.
|
||||
$this->setLimit($limit);
|
||||
return $this->loadResults();
|
||||
}
|
||||
|
||||
protected function didLoadResults(array $results) {
|
||||
return $results;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ final class PhabricatorTypeaheadResult extends Phobject {
|
|||
private $unique;
|
||||
private $autocomplete;
|
||||
private $attributes = array();
|
||||
private $phase;
|
||||
|
||||
public function setIcon($icon) {
|
||||
$this->icon = $icon;
|
||||
|
@ -154,6 +155,7 @@ final class PhabricatorTypeaheadResult extends Phobject {
|
|||
$this->tokenType,
|
||||
$this->unique ? 1 : null,
|
||||
$this->autocomplete,
|
||||
$this->phase,
|
||||
);
|
||||
while (end($data) === null) {
|
||||
array_pop($data);
|
||||
|
@ -211,4 +213,13 @@ final class PhabricatorTypeaheadResult extends Phobject {
|
|||
return $this;
|
||||
}
|
||||
|
||||
public function setPhase($phase) {
|
||||
$this->phase = $phase;
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function getPhase() {
|
||||
return $this->phase;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue