mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-26 00:32:42 +01:00
Add a garbage collector for common ngrams
Summary: Ref T13000. After an ngram is marked as "common", we can delete it from the storage table. Currently, the only way to get ngrams marked as "common" is to manually run `bin/search ngrams`, so this has no impact on normal installs. Test Plan: Ran `bin/garbage collect`, saw it start chewing through my local Maniphest ngrams table and removing common ngrams. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18687
This commit is contained in:
parent
c767c971ca
commit
66df5b1493
3 changed files with 61 additions and 1 deletions
|
@ -3941,6 +3941,7 @@ phutil_register_library_map(array(
|
|||
'PhabricatorSearchEngineAttachment' => 'applications/search/engineextension/PhabricatorSearchEngineAttachment.php',
|
||||
'PhabricatorSearchEngineExtension' => 'applications/search/engineextension/PhabricatorSearchEngineExtension.php',
|
||||
'PhabricatorSearchEngineExtensionModule' => 'applications/search/engineextension/PhabricatorSearchEngineExtensionModule.php',
|
||||
'PhabricatorSearchFerretNgramGarbageCollector' => 'applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php',
|
||||
'PhabricatorSearchField' => 'applications/search/field/PhabricatorSearchField.php',
|
||||
'PhabricatorSearchHost' => 'infrastructure/cluster/search/PhabricatorSearchHost.php',
|
||||
'PhabricatorSearchHovercardController' => 'applications/search/controller/PhabricatorSearchHovercardController.php',
|
||||
|
@ -9522,6 +9523,7 @@ phutil_register_library_map(array(
|
|||
'PhabricatorSearchEngineAttachment' => 'Phobject',
|
||||
'PhabricatorSearchEngineExtension' => 'Phobject',
|
||||
'PhabricatorSearchEngineExtensionModule' => 'PhabricatorConfigModule',
|
||||
'PhabricatorSearchFerretNgramGarbageCollector' => 'PhabricatorGarbageCollector',
|
||||
'PhabricatorSearchField' => 'Phobject',
|
||||
'PhabricatorSearchHost' => 'Phobject',
|
||||
'PhabricatorSearchHovercardController' => 'PhabricatorSearchBaseController',
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
<?php
|
||||
|
||||
final class PhabricatorSearchFerretNgramGarbageCollector
|
||||
extends PhabricatorGarbageCollector {
|
||||
|
||||
const COLLECTORCONST = 'search.ferret.ngram';
|
||||
|
||||
public function getCollectorName() {
|
||||
return pht('Ferret Engine Ngrams');
|
||||
}
|
||||
|
||||
public function hasAutomaticPolicy() {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected function collectGarbage() {
|
||||
$all_objects = id(new PhutilClassMapQuery())
|
||||
->setAncestorClass('PhabricatorFerretInterface')
|
||||
->execute();
|
||||
|
||||
$did_collect = false;
|
||||
foreach ($all_objects as $object) {
|
||||
$engine = $object->newFerretEngine();
|
||||
$conn = $object->establishConnection('w');
|
||||
|
||||
$ngram_row = queryfx_one(
|
||||
$conn,
|
||||
'SELECT ngram FROM %T WHERE needsCollection = 1 LIMIT 1',
|
||||
$engine->getCommonNgramsTableName());
|
||||
if (!$ngram_row) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$ngram = $ngram_row['ngram'];
|
||||
|
||||
queryfx(
|
||||
$conn,
|
||||
'DELETE FROM %T WHERE ngram = %s',
|
||||
$engine->getNgramsTableName(),
|
||||
$ngram);
|
||||
|
||||
queryfx(
|
||||
$conn,
|
||||
'UPDATE %T SET needsCollection = 0 WHERE ngram = %s',
|
||||
$engine->getCommonNgramsTableName(),
|
||||
$ngram);
|
||||
|
||||
$did_collect = true;
|
||||
break;
|
||||
}
|
||||
|
||||
return $did_collect;
|
||||
}
|
||||
|
||||
}
|
|
@ -6,7 +6,10 @@ final class PhabricatorSearchManagementNgramsWorkflow
|
|||
protected function didConstruct() {
|
||||
$this
|
||||
->setName('ngrams')
|
||||
->setSynopsis(pht('Recompute common ngrams.'))
|
||||
->setSynopsis(
|
||||
pht(
|
||||
'Recompute common ngrams. This is an advanced workflow that '.
|
||||
'can harm search quality if used improperly.'))
|
||||
->setArguments(
|
||||
array(
|
||||
array(
|
||||
|
|
Loading…
Reference in a new issue