mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-10 00:42:41 +01:00
Execute fulltext queries using a subquery instead of by ordering the entire result set
Summary: Ref T6740. Currently, we issue fulltext queries with an "ORDER BY <score>" on the entire result set. For very large result sets, this can require MySQL to do a lot of work. However, this work is generally useless: if you search for some common word like "diff" or "internet" or whatever and match 4,000 documents, the chance that we can score whatever thing you were thinking of at the top of the result set is nearly nothing. It's more useful to return quickly, and let the user see that they need to narrow their query to get useful results. Instead of doing all that work, let MySQL find up to 1,000 results, then pick the best ones out of those. This actual change is a little flimsy, since our index isn't really big enough to suffer indexing issues. However, searching for common terms on my local install (where I have some large repositories imported and indexed) drops from ~40ms to ~10ms. My hope is to improve downstream performance for queries like "translatewiki" here, particularly: <https://phabricator.wikimedia.org/T143863> That query matches about 300 trillion documents but there's a ~0% chance that the one the user wants is at the top. It takes a couple of seconds to execute, for me. Better to return quickly and let the user refine their results. I think this will also make some other changes related to stemming easier. This also removes the "list users first" ordering on the query, which made performance more complicated and seems irrelevant now that we have the typeahead. Test Plan: - Searched for some common terms like "code" locally, saw similar results with better performance. - Searched for useful queries (e.g., small result set), got identical results. Reviewers: chad Reviewed By: chad Maniphest Tasks: T6740 Differential Revision: https://secure.phabricator.com/D16944
This commit is contained in:
parent
48a34eced2
commit
54470a12d4
1 changed files with 85 additions and 64 deletions
|
@ -153,71 +153,105 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
}
|
||||
|
||||
public function executeSearch(PhabricatorSavedQuery $query) {
|
||||
$where = array();
|
||||
$table = new PhabricatorSearchDocument();
|
||||
$document_table = $table->getTableName();
|
||||
$conn = $table->establishConnection('r');
|
||||
|
||||
$subquery = $this->newFulltextSubquery($query, $conn);
|
||||
|
||||
$offset = (int)$query->getParameter('offset', 0);
|
||||
$limit = (int)$query->getParameter('limit', 25);
|
||||
|
||||
// NOTE: We must JOIN the subquery in order to apply a limit.
|
||||
$results = queryfx_all(
|
||||
$conn,
|
||||
'SELECT
|
||||
documentPHID,
|
||||
MAX(fieldScore) AS documentScore
|
||||
FROM (%Q) query
|
||||
JOIN %T root ON query.documentPHID = root.phid
|
||||
GROUP BY documentPHID
|
||||
ORDER BY documentScore DESC
|
||||
LIMIT %d, %d',
|
||||
$subquery,
|
||||
$document_table,
|
||||
$offset,
|
||||
$limit);
|
||||
|
||||
return ipull($results, 'documentPHID');
|
||||
}
|
||||
|
||||
private function newFulltextSubquery(
|
||||
PhabricatorSavedQuery $query,
|
||||
AphrontDatabaseConnection $conn) {
|
||||
|
||||
$field = new PhabricatorSearchDocumentField();
|
||||
$field_table = $field->getTableName();
|
||||
|
||||
$document = new PhabricatorSearchDocument();
|
||||
$document_table = $document->getTableName();
|
||||
|
||||
$select = array();
|
||||
$select[] = 'document.phid AS documentPHID';
|
||||
|
||||
$join = array();
|
||||
$order = 'ORDER BY documentCreated DESC';
|
||||
|
||||
$dao_doc = new PhabricatorSearchDocument();
|
||||
$dao_field = new PhabricatorSearchDocumentField();
|
||||
|
||||
$t_doc = $dao_doc->getTableName();
|
||||
$t_field = $dao_field->getTableName();
|
||||
|
||||
$conn_r = $dao_doc->establishConnection('r');
|
||||
$where = array();
|
||||
|
||||
$raw_query = $query->getParameter('query');
|
||||
$q = $this->compileQuery($raw_query);
|
||||
$compiled_query = $this->compileQuery($raw_query);
|
||||
if (strlen($compiled_query)) {
|
||||
$select[] = qsprintf(
|
||||
$conn,
|
||||
'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE) AS fieldScore',
|
||||
$compiled_query);
|
||||
|
||||
if (strlen($q)) {
|
||||
$join[] = qsprintf(
|
||||
$conn_r,
|
||||
$conn,
|
||||
'%T field ON field.phid = document.phid',
|
||||
$t_field);
|
||||
$field_table);
|
||||
|
||||
$where[] = qsprintf(
|
||||
$conn_r,
|
||||
$conn,
|
||||
'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE)',
|
||||
$q);
|
||||
$compiled_query);
|
||||
|
||||
// When searching for a string, promote user listings above other
|
||||
// listings.
|
||||
$order = qsprintf(
|
||||
$conn_r,
|
||||
'ORDER BY
|
||||
IF(documentType = %s, 0, 1) ASC,
|
||||
MAX(MATCH(corpus) AGAINST (%s)) DESC',
|
||||
'USER',
|
||||
$q);
|
||||
|
||||
$field = $query->getParameter('field');
|
||||
if ($field) {
|
||||
if ($query->getParameter('field')) {
|
||||
$where[] = qsprintf(
|
||||
$conn_r,
|
||||
$conn,
|
||||
'field.field = %s',
|
||||
$field);
|
||||
}
|
||||
} else {
|
||||
$select[] = qsprintf(
|
||||
$conn,
|
||||
'document.dateCreated AS fieldScore');
|
||||
}
|
||||
|
||||
$exclude = $query->getParameter('exclude');
|
||||
if ($exclude) {
|
||||
$where[] = qsprintf($conn_r, 'document.phid != %s', $exclude);
|
||||
$where[] = qsprintf(
|
||||
$conn,
|
||||
'document.phid != %s',
|
||||
$exclude);
|
||||
}
|
||||
|
||||
$types = $query->getParameter('types');
|
||||
if ($types) {
|
||||
if (strlen($q)) {
|
||||
if (strlen($compiled_query)) {
|
||||
$where[] = qsprintf(
|
||||
$conn_r,
|
||||
$conn,
|
||||
'field.phidType IN (%Ls)',
|
||||
$types);
|
||||
}
|
||||
|
||||
$where[] = qsprintf(
|
||||
$conn_r,
|
||||
$conn,
|
||||
'document.documentType IN (%Ls)',
|
||||
$types);
|
||||
}
|
||||
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'authorPHIDs',
|
||||
PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR);
|
||||
|
@ -231,14 +265,14 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
|
||||
if ($include_open && !$include_closed) {
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'statuses',
|
||||
$open_rel,
|
||||
true);
|
||||
} else if ($include_closed && !$include_open) {
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'statuses',
|
||||
$closed_rel,
|
||||
|
@ -247,46 +281,47 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
|
||||
if ($query->getParameter('withAnyOwner')) {
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'withAnyOwner',
|
||||
PhabricatorSearchRelationship::RELATIONSHIP_OWNER,
|
||||
true);
|
||||
} else if ($query->getParameter('withUnowned')) {
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'withUnowned',
|
||||
PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED,
|
||||
true);
|
||||
} else {
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'ownerPHIDs',
|
||||
PhabricatorSearchRelationship::RELATIONSHIP_OWNER);
|
||||
}
|
||||
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'subscriberPHIDs',
|
||||
PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER);
|
||||
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'projectPHIDs',
|
||||
PhabricatorSearchRelationship::RELATIONSHIP_PROJECT);
|
||||
|
||||
$join[] = $this->joinRelationship(
|
||||
$conn_r,
|
||||
$conn,
|
||||
$query,
|
||||
'repository',
|
||||
PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY);
|
||||
|
||||
$join = array_filter($join);
|
||||
$select = implode(', ', $select);
|
||||
|
||||
$join = array_filter($join);
|
||||
foreach ($join as $key => $clause) {
|
||||
$join[$key] = ' JOIN '.$clause;
|
||||
}
|
||||
|
@ -298,27 +333,13 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
$where = '';
|
||||
}
|
||||
|
||||
$offset = (int)$query->getParameter('offset', 0);
|
||||
$limit = (int)$query->getParameter('limit', 25);
|
||||
|
||||
$hits = queryfx_all(
|
||||
$conn_r,
|
||||
'SELECT
|
||||
document.phid
|
||||
FROM %T document
|
||||
%Q
|
||||
%Q
|
||||
GROUP BY document.phid
|
||||
%Q
|
||||
LIMIT %d, %d',
|
||||
$t_doc,
|
||||
return qsprintf(
|
||||
$conn,
|
||||
'SELECT %Q FROM %T document %Q %Q LIMIT 1000',
|
||||
$select,
|
||||
$document_table,
|
||||
$join,
|
||||
$where,
|
||||
$order,
|
||||
$offset,
|
||||
$limit);
|
||||
|
||||
return ipull($hits, 'phid');
|
||||
$where);
|
||||
}
|
||||
|
||||
protected function joinRelationship(
|
||||
|
|
Loading…
Reference in a new issue