mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-18 11:30:55 +01:00
Use stemming in the MySQL fulltext search engine
Summary: Ref T6740. When we index a document, also save a copy of the stemmed version. When querying, search the combined corpus for the terms. (We may need to tune this a bit later since it's possible for literal, quoted terms to match in the stemmed section, but I think this wil rarely cause issues in practice.) A downside here is that search sort of breaks if you upgrade into this and don't reindex. I wasn't able to find a way to issue the query that remained compatible with older indexes and didn't have awful performance, so my plan is: - Put this on `secure`. - Rebuild the index. - If things look good after a couple of days, add a way that we can tell people they need to rebuild the search index with a setup warning. We might get some reports between now and then, but if this is super awful we should know by the end of the weekend. Test Plan: WOW AMAZING {F2021466} Reviewers: chad Reviewed By: chad Maniphest Tasks: T6740 Differential Revision: https://secure.phabricator.com/D16947
This commit is contained in:
parent
d54c14c644
commit
7c5b5327c8
3 changed files with 29 additions and 11 deletions
2
resources/sql/autopatches/20161125.search.01.stemmed.sql
Normal file
2
resources/sql/autopatches/20161125.search.01.stemmed.sql
Normal file
|
@ -0,0 +1,2 @@
|
|||
ALTER TABLE {$NAMESPACE}_search.search_documentfield
|
||||
ADD stemmedCorpus LONGTEXT COLLATE {$COLLATE_FULLTEXT};
|
|
@ -33,6 +33,8 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
|
||||
$conn_w = $store->establishConnection('w');
|
||||
|
||||
$stemmer = new PhutilSearchStemmer();
|
||||
|
||||
$field_dao = new PhabricatorSearchDocumentField();
|
||||
queryfx(
|
||||
$conn_w,
|
||||
|
@ -41,16 +43,21 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
$phid);
|
||||
foreach ($doc->getFieldData() as $field) {
|
||||
list($ftype, $corpus, $aux_phid) = $field;
|
||||
|
||||
$stemmed_corpus = $stemmer->stemCorpus($corpus);
|
||||
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'INSERT INTO %T (phid, phidType, field, auxPHID, corpus) '.
|
||||
'VALUES (%s, %s, %s, %ns, %s)',
|
||||
'INSERT INTO %T
|
||||
(phid, phidType, field, auxPHID, corpus, stemmedCorpus) '.
|
||||
'VALUES (%s, %s, %s, %ns, %s, %s)',
|
||||
$field_dao->getTableName(),
|
||||
$phid,
|
||||
$doc->getDocumentType(),
|
||||
$ftype,
|
||||
$aux_phid,
|
||||
$corpus);
|
||||
$corpus,
|
||||
$stemmed_corpus);
|
||||
}
|
||||
|
||||
|
||||
|
@ -205,8 +212,9 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
if (strlen($compiled_query)) {
|
||||
$select[] = qsprintf(
|
||||
$conn,
|
||||
'IF(field.field = %s, %d, 0) + '.
|
||||
'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE) AS fieldScore',
|
||||
'IF(field.field = %s, %d, 0) +
|
||||
MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE)
|
||||
AS fieldScore',
|
||||
$title_field,
|
||||
$title_boost,
|
||||
$compiled_query);
|
||||
|
@ -218,7 +226,7 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
|
||||
$where[] = qsprintf(
|
||||
$conn,
|
||||
'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE)',
|
||||
'MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE)',
|
||||
$compiled_query);
|
||||
|
||||
if ($query->getParameter('field')) {
|
||||
|
@ -380,11 +388,17 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
}
|
||||
|
||||
private function compileQuery($raw_query) {
|
||||
$compiler = PhabricatorSearchDocument::newQueryCompiler();
|
||||
$stemmer = new PhutilSearchStemmer();
|
||||
|
||||
return $compiler
|
||||
$compiler = PhabricatorSearchDocument::newQueryCompiler()
|
||||
->setQuery($raw_query)
|
||||
->compileQuery();
|
||||
->setStemmer($stemmer);
|
||||
|
||||
$queries = array();
|
||||
$queries[] = $compiler->compileLiteralQuery();
|
||||
$queries[] = $compiler->compileStemmedQuery();
|
||||
|
||||
return implode(' ', array_filter($queries));
|
||||
}
|
||||
|
||||
public function indexExists() {
|
||||
|
|
|
@ -6,6 +6,7 @@ final class PhabricatorSearchDocumentField extends PhabricatorSearchDAO {
|
|||
protected $field;
|
||||
protected $auxPHID;
|
||||
protected $corpus;
|
||||
protected $stemmedCorpus;
|
||||
|
||||
protected function getConfiguration() {
|
||||
return array(
|
||||
|
@ -16,14 +17,15 @@ final class PhabricatorSearchDocumentField extends PhabricatorSearchDAO {
|
|||
'field' => 'text4',
|
||||
'auxPHID' => 'phid?',
|
||||
'corpus' => 'fulltext?',
|
||||
'stemmedCorpus' => 'fulltext?',
|
||||
),
|
||||
self::CONFIG_KEY_SCHEMA => array(
|
||||
'key_phid' => null,
|
||||
'phid' => array(
|
||||
'columns' => array('phid'),
|
||||
),
|
||||
'corpus' => array(
|
||||
'columns' => array('corpus'),
|
||||
'key_corpus' => array(
|
||||
'columns' => array('corpus', 'stemmedCorpus'),
|
||||
'type' => 'FULLTEXT',
|
||||
),
|
||||
),
|
||||
|
|
Loading…
Reference in a new issue