From 66df5b149309ae261e335022429c458aac323570 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 4 Oct 2017 17:19:27 -0700 Subject: [PATCH] Add a garbage collector for common ngrams Summary: Ref T13000. After an ngram is marked as "common", we can delete it from the storage table. Currently, the only way to get ngrams marked as "common" is to manually run `bin/search ngrams`, so this has no impact on normal installs. Test Plan: Ran `bin/garbage collect`, saw it start chewing through my local Maniphest ngrams table and removing common ngrams. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18687 --- src/__phutil_library_map__.php | 2 + ...catorSearchFerretNgramGarbageCollector.php | 55 +++++++++++++++++++ ...bricatorSearchManagementNgramsWorkflow.php | 5 +- 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 src/applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index dd2bcd8dfe..aca6e09fdb 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -3941,6 +3941,7 @@ phutil_register_library_map(array( 'PhabricatorSearchEngineAttachment' => 'applications/search/engineextension/PhabricatorSearchEngineAttachment.php', 'PhabricatorSearchEngineExtension' => 'applications/search/engineextension/PhabricatorSearchEngineExtension.php', 'PhabricatorSearchEngineExtensionModule' => 'applications/search/engineextension/PhabricatorSearchEngineExtensionModule.php', + 'PhabricatorSearchFerretNgramGarbageCollector' => 'applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php', 'PhabricatorSearchField' => 'applications/search/field/PhabricatorSearchField.php', 'PhabricatorSearchHost' => 'infrastructure/cluster/search/PhabricatorSearchHost.php', 'PhabricatorSearchHovercardController' => 'applications/search/controller/PhabricatorSearchHovercardController.php', @@ -9522,6 +9523,7 @@ phutil_register_library_map(array( 'PhabricatorSearchEngineAttachment' => 'Phobject', 'PhabricatorSearchEngineExtension' => 'Phobject', 'PhabricatorSearchEngineExtensionModule' => 'PhabricatorConfigModule', + 'PhabricatorSearchFerretNgramGarbageCollector' => 'PhabricatorGarbageCollector', 'PhabricatorSearchField' => 'Phobject', 'PhabricatorSearchHost' => 'Phobject', 'PhabricatorSearchHovercardController' => 'PhabricatorSearchBaseController', diff --git a/src/applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php b/src/applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php new file mode 100644 index 0000000000..f2c43743f6 --- /dev/null +++ b/src/applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php @@ -0,0 +1,55 @@ +setAncestorClass('PhabricatorFerretInterface') + ->execute(); + + $did_collect = false; + foreach ($all_objects as $object) { + $engine = $object->newFerretEngine(); + $conn = $object->establishConnection('w'); + + $ngram_row = queryfx_one( + $conn, + 'SELECT ngram FROM %T WHERE needsCollection = 1 LIMIT 1', + $engine->getCommonNgramsTableName()); + if (!$ngram_row) { + continue; + } + + $ngram = $ngram_row['ngram']; + + queryfx( + $conn, + 'DELETE FROM %T WHERE ngram = %s', + $engine->getNgramsTableName(), + $ngram); + + queryfx( + $conn, + 'UPDATE %T SET needsCollection = 0 WHERE ngram = %s', + $engine->getCommonNgramsTableName(), + $ngram); + + $did_collect = true; + break; + } + + return $did_collect; + } + +} diff --git a/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php b/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php index d1a6e0bdff..4b983fe0f9 100644 --- a/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php +++ b/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php @@ -6,7 +6,10 @@ final class PhabricatorSearchManagementNgramsWorkflow protected function didConstruct() { $this ->setName('ngrams') - ->setSynopsis(pht('Recompute common ngrams.')) + ->setSynopsis( + pht( + 'Recompute common ngrams. This is an advanced workflow that '. + 'can harm search quality if used improperly.')) ->setArguments( array( array(