mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-11 07:11:04 +01:00
Remove broken and unfixable "prefix" ngram behavior
Summary: Ref T13501. The older ngram code has some "prefix" behavior that tries to handle cases where a user issues a very short (one or two character) query. This code doesn't work, presumably never worked, and can not be made to work (or, at least, I don't see a way, and am fairly sure one does not exist). If the user searches for "xy", we can find trigrams in the form "xy*" using the index, but not in the form "*xy". The code makes a misguided effort to look for " xy", but this will only find "xy" in words that begin with "xy", like "xylophone". For example, searching Files for "om" does not currently find "random.txt". Remove this behavior. Without engaging the trigram index, these queries fall back to an unidexed "LIKE" table scan, but that's about the best we can do. Test Plan: Searched for "om", hit "random.txt". Maniphest Tasks: T13501 Differential Revision: https://secure.phabricator.com/D21127
This commit is contained in:
parent
b1b9c844ac
commit
fb3f423279
2 changed files with 20 additions and 37 deletions
|
@ -63,9 +63,6 @@ abstract class PhabricatorSearchNgrams
|
|||
case 'index':
|
||||
$token = ' '.$token.' ';
|
||||
break;
|
||||
case 'prefix':
|
||||
$token = ' '.$token;
|
||||
break;
|
||||
}
|
||||
|
||||
$len = (strlen($token) - 2);
|
||||
|
|
|
@ -2411,30 +2411,29 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
protected function buildNgramsJoinClause(AphrontDatabaseConnection $conn) {
|
||||
$flat = array();
|
||||
foreach ($this->ngrams as $spec) {
|
||||
$index = $spec['index'];
|
||||
$value = $spec['value'];
|
||||
$length = $spec['length'];
|
||||
|
||||
if ($length >= 3) {
|
||||
$ngrams = $index->getNgramsFromString($value, 'query');
|
||||
$prefix = false;
|
||||
} else if ($length == 2) {
|
||||
$ngrams = $index->getNgramsFromString($value, 'prefix');
|
||||
$prefix = false;
|
||||
} else {
|
||||
$ngrams = array(' '.$value);
|
||||
$prefix = true;
|
||||
if ($length < 3) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$index = $spec['index'];
|
||||
$value = $spec['value'];
|
||||
|
||||
$ngrams = $index->getNgramsFromString($value, 'query');
|
||||
|
||||
foreach ($ngrams as $ngram) {
|
||||
$flat[] = array(
|
||||
'table' => $index->getTableName(),
|
||||
'ngram' => $ngram,
|
||||
'prefix' => $prefix,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (!$flat) {
|
||||
return array();
|
||||
}
|
||||
|
||||
// MySQL only allows us to join a maximum of 61 tables per query. Each
|
||||
// ngram is going to cost us a join toward that limit, so if the user
|
||||
// specified a very long query string, just pick 16 of the ngrams
|
||||
|
@ -2456,31 +2455,18 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
foreach ($flat as $spec) {
|
||||
$table = $spec['table'];
|
||||
$ngram = $spec['ngram'];
|
||||
$prefix = $spec['prefix'];
|
||||
|
||||
$alias = 'ngm'.$idx++;
|
||||
|
||||
if ($prefix) {
|
||||
$joins[] = qsprintf(
|
||||
$conn,
|
||||
'JOIN %T %T ON %T.objectID = %Q AND %T.ngram LIKE %>',
|
||||
$table,
|
||||
$alias,
|
||||
$alias,
|
||||
$id_column,
|
||||
$alias,
|
||||
$ngram);
|
||||
} else {
|
||||
$joins[] = qsprintf(
|
||||
$conn,
|
||||
'JOIN %T %T ON %T.objectID = %Q AND %T.ngram = %s',
|
||||
$table,
|
||||
$alias,
|
||||
$alias,
|
||||
$id_column,
|
||||
$alias,
|
||||
$ngram);
|
||||
}
|
||||
$joins[] = qsprintf(
|
||||
$conn,
|
||||
'JOIN %T %T ON %T.objectID = %Q AND %T.ngram = %s',
|
||||
$table,
|
||||
$alias,
|
||||
$alias,
|
||||
$id_column,
|
||||
$alias,
|
||||
$ngram);
|
||||
}
|
||||
|
||||
return $joins;
|
||||
|
|
Loading…
Reference in a new issue