1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-10 14:51:06 +01:00

Add a garbage collector for common ngrams

Summary:
Ref T13000. After an ngram is marked as "common", we can delete it from the storage table.

Currently, the only way to get ngrams marked as "common" is to manually run `bin/search ngrams`, so this has no impact on normal installs.

Test Plan: Ran `bin/garbage collect`, saw it start chewing through my local Maniphest ngrams table and removing common ngrams.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13000

Differential Revision: https://secure.phabricator.com/D18687
This commit is contained in:
epriestley 2017-10-04 17:19:27 -07:00
parent c767c971ca
commit 66df5b1493
3 changed files with 61 additions and 1 deletions

View file

@ -3941,6 +3941,7 @@ phutil_register_library_map(array(
'PhabricatorSearchEngineAttachment' => 'applications/search/engineextension/PhabricatorSearchEngineAttachment.php',
'PhabricatorSearchEngineExtension' => 'applications/search/engineextension/PhabricatorSearchEngineExtension.php',
'PhabricatorSearchEngineExtensionModule' => 'applications/search/engineextension/PhabricatorSearchEngineExtensionModule.php',
'PhabricatorSearchFerretNgramGarbageCollector' => 'applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php',
'PhabricatorSearchField' => 'applications/search/field/PhabricatorSearchField.php',
'PhabricatorSearchHost' => 'infrastructure/cluster/search/PhabricatorSearchHost.php',
'PhabricatorSearchHovercardController' => 'applications/search/controller/PhabricatorSearchHovercardController.php',
@ -9522,6 +9523,7 @@ phutil_register_library_map(array(
'PhabricatorSearchEngineAttachment' => 'Phobject',
'PhabricatorSearchEngineExtension' => 'Phobject',
'PhabricatorSearchEngineExtensionModule' => 'PhabricatorConfigModule',
'PhabricatorSearchFerretNgramGarbageCollector' => 'PhabricatorGarbageCollector',
'PhabricatorSearchField' => 'Phobject',
'PhabricatorSearchHost' => 'Phobject',
'PhabricatorSearchHovercardController' => 'PhabricatorSearchBaseController',

View file

@ -0,0 +1,55 @@
<?php
final class PhabricatorSearchFerretNgramGarbageCollector
extends PhabricatorGarbageCollector {
const COLLECTORCONST = 'search.ferret.ngram';
public function getCollectorName() {
return pht('Ferret Engine Ngrams');
}
public function hasAutomaticPolicy() {
return true;
}
protected function collectGarbage() {
$all_objects = id(new PhutilClassMapQuery())
->setAncestorClass('PhabricatorFerretInterface')
->execute();
$did_collect = false;
foreach ($all_objects as $object) {
$engine = $object->newFerretEngine();
$conn = $object->establishConnection('w');
$ngram_row = queryfx_one(
$conn,
'SELECT ngram FROM %T WHERE needsCollection = 1 LIMIT 1',
$engine->getCommonNgramsTableName());
if (!$ngram_row) {
continue;
}
$ngram = $ngram_row['ngram'];
queryfx(
$conn,
'DELETE FROM %T WHERE ngram = %s',
$engine->getNgramsTableName(),
$ngram);
queryfx(
$conn,
'UPDATE %T SET needsCollection = 0 WHERE ngram = %s',
$engine->getCommonNgramsTableName(),
$ngram);
$did_collect = true;
break;
}
return $did_collect;
}
}

View file

@ -6,7 +6,10 @@ final class PhabricatorSearchManagementNgramsWorkflow
protected function didConstruct() {
$this
->setName('ngrams')
->setSynopsis(pht('Recompute common ngrams.'))
->setSynopsis(
pht(
'Recompute common ngrams. This is an advanced workflow that '.
'can harm search quality if used improperly.'))
->setArguments(
array(
array(