mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-30 02:32:42 +01:00
Parameterize the common ngrams threshold
Summary: Ref T13000. Since other changes have generally made the ngrams table manageable, I'm not planning to enable common ngrams by default at this time. Instead, make the threshold configurable with "--threshold" so we can guide installs through tuning this if they want (e.g. PHI110), and tune hosted instances. (This might eventually become automatic, but just smoothing this bit off for now feels reasonable to me.) Test Plan: Ran with `--reset`, and with various invalid and valid `--threshold` arguments. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18710
This commit is contained in:
parent
819b833607
commit
63d1230ade
1 changed files with 33 additions and 3 deletions
|
@ -16,19 +16,49 @@ final class PhabricatorSearchManagementNgramsWorkflow
|
||||||
'name' => 'reset',
|
'name' => 'reset',
|
||||||
'help' => pht('Reset all common ngram records.'),
|
'help' => pht('Reset all common ngram records.'),
|
||||||
),
|
),
|
||||||
|
array(
|
||||||
|
'name' => 'threshold',
|
||||||
|
'param' => 'threshold',
|
||||||
|
'help' => pht(
|
||||||
|
'Prune ngrams present in more than this fraction of '.
|
||||||
|
'documents.'),
|
||||||
|
),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function execute(PhutilArgumentParser $args) {
|
public function execute(PhutilArgumentParser $args) {
|
||||||
|
$min_documents = 4096;
|
||||||
|
|
||||||
$is_reset = $args->getArg('reset');
|
$is_reset = $args->getArg('reset');
|
||||||
|
$threshold = $args->getArg('threshold');
|
||||||
|
|
||||||
|
if ($is_reset && $threshold !== null) {
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht('Specify either --reset or --threshold, not both.'));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$is_reset && $threshold === null) {
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht('Specify either --reset or --threshold.'));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$is_reset) {
|
||||||
|
if (!is_numeric($threshold)) {
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht('Specify a numeric threshold between 0 and 1.'));
|
||||||
|
}
|
||||||
|
|
||||||
|
$threshold = (double)$threshold;
|
||||||
|
if ($threshold <= 0 || $threshold >= 1) {
|
||||||
|
throw new PhutilArgumentUsageException(
|
||||||
|
pht('Threshold must be greater than 0.0 and less than 1.0.'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$all_objects = id(new PhutilClassMapQuery())
|
$all_objects = id(new PhutilClassMapQuery())
|
||||||
->setAncestorClass('PhabricatorFerretInterface')
|
->setAncestorClass('PhabricatorFerretInterface')
|
||||||
->execute();
|
->execute();
|
||||||
|
|
||||||
$min_documents = 4096;
|
|
||||||
$threshold = 0.15;
|
|
||||||
|
|
||||||
foreach ($all_objects as $object) {
|
foreach ($all_objects as $object) {
|
||||||
$engine = $object->newFerretEngine();
|
$engine = $object->newFerretEngine();
|
||||||
$conn = $object->establishConnection('w');
|
$conn = $object->establishConnection('w');
|
||||||
|
|
Loading…
Reference in a new issue