mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-18 19:40:55 +01:00
Implement basic ngram search for Owners Package names
Summary: Ref T9979. This uses ngrams (specifically, trigrams) to build a reasonably efficient index for substring matching. Specifically, for a package like "Example", with ID 123, we store rows like this: ``` < ex, 123> <exa, 123> <xam, 123> <amp, 123> <mpl, 123> <ple, 123> <le , 123> ``` When the user searches for `exam`, we join this table for packages with tokens `exa` and `xam`. MySQL can do this a lot more efficiently than it can process a `LIKE "%exam%"` query against a huge table. When the user searches for a one-letter or two-letter string, we only search the beginnings of words. This is probably what they want, the only thing we can do quickly, and a reasonable/expected behavior for typeaheads. Test Plan: - Ran storage upgrades and search indexer. - Searched for stuff with "name contains". - Used typehaead and got sensible results. - Searched for `aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz` and saw only 16 joins. Reviewers: chad Reviewed By: chad Maniphest Tasks: T9979 Differential Revision: https://secure.phabricator.com/D14846
This commit is contained in:
parent
5c8025c41d
commit
96fe8c0b83
18 changed files with 457 additions and 28 deletions
|
@ -0,0 +1,7 @@
|
||||||
|
CREATE TABLE {$NAMESPACE}_owners.owners_name_ngrams (
|
||||||
|
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
objectID INT UNSIGNED NOT NULL,
|
||||||
|
ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT},
|
||||||
|
KEY `key_object` (objectID),
|
||||||
|
KEY `key_ngram` (ngram, objectID)
|
||||||
|
) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};
|
11
resources/sql/autopatches/20151221.search.3.reindex.php
Normal file
11
resources/sql/autopatches/20151221.search.3.reindex.php
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
$table = new PhabricatorOwnersPackage();
|
||||||
|
|
||||||
|
foreach (new LiskMigrationIterator($table) as $package) {
|
||||||
|
PhabricatorSearchWorker::queueDocumentForIndexing(
|
||||||
|
$package->getPHID(),
|
||||||
|
array(
|
||||||
|
'force' => true,
|
||||||
|
));
|
||||||
|
}
|
|
@ -2548,6 +2548,8 @@ phutil_register_library_map(array(
|
||||||
'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php',
|
'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php',
|
||||||
'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php',
|
'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php',
|
||||||
'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php',
|
'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php',
|
||||||
|
'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php',
|
||||||
|
'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php',
|
||||||
'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php',
|
'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php',
|
||||||
'PhabricatorNotificationClearController' => 'applications/notification/controller/PhabricatorNotificationClearController.php',
|
'PhabricatorNotificationClearController' => 'applications/notification/controller/PhabricatorNotificationClearController.php',
|
||||||
'PhabricatorNotificationClient' => 'applications/notification/client/PhabricatorNotificationClient.php',
|
'PhabricatorNotificationClient' => 'applications/notification/client/PhabricatorNotificationClient.php',
|
||||||
|
@ -2636,7 +2638,9 @@ phutil_register_library_map(array(
|
||||||
'PhabricatorOwnersPackage' => 'applications/owners/storage/PhabricatorOwnersPackage.php',
|
'PhabricatorOwnersPackage' => 'applications/owners/storage/PhabricatorOwnersPackage.php',
|
||||||
'PhabricatorOwnersPackageDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php',
|
'PhabricatorOwnersPackageDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php',
|
||||||
'PhabricatorOwnersPackageEditEngine' => 'applications/owners/editor/PhabricatorOwnersPackageEditEngine.php',
|
'PhabricatorOwnersPackageEditEngine' => 'applications/owners/editor/PhabricatorOwnersPackageEditEngine.php',
|
||||||
|
'PhabricatorOwnersPackageFulltextEngine' => 'applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php',
|
||||||
'PhabricatorOwnersPackageFunctionDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageFunctionDatasource.php',
|
'PhabricatorOwnersPackageFunctionDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageFunctionDatasource.php',
|
||||||
|
'PhabricatorOwnersPackageNameNgrams' => 'applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php',
|
||||||
'PhabricatorOwnersPackageOwnerDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php',
|
'PhabricatorOwnersPackageOwnerDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php',
|
||||||
'PhabricatorOwnersPackagePHIDType' => 'applications/owners/phid/PhabricatorOwnersPackagePHIDType.php',
|
'PhabricatorOwnersPackagePHIDType' => 'applications/owners/phid/PhabricatorOwnersPackagePHIDType.php',
|
||||||
'PhabricatorOwnersPackageQuery' => 'applications/owners/query/PhabricatorOwnersPackageQuery.php',
|
'PhabricatorOwnersPackageQuery' => 'applications/owners/query/PhabricatorOwnersPackageQuery.php',
|
||||||
|
@ -3047,6 +3051,8 @@ phutil_register_library_map(array(
|
||||||
'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php',
|
'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php',
|
||||||
'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php',
|
'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php',
|
||||||
'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php',
|
'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php',
|
||||||
|
'PhabricatorSearchNgrams' => 'applications/search/ngrams/PhabricatorSearchNgrams.php',
|
||||||
|
'PhabricatorSearchNgramsDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php',
|
||||||
'PhabricatorSearchOrderController' => 'applications/search/controller/PhabricatorSearchOrderController.php',
|
'PhabricatorSearchOrderController' => 'applications/search/controller/PhabricatorSearchOrderController.php',
|
||||||
'PhabricatorSearchOrderField' => 'applications/search/field/PhabricatorSearchOrderField.php',
|
'PhabricatorSearchOrderField' => 'applications/search/field/PhabricatorSearchOrderField.php',
|
||||||
'PhabricatorSearchPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorSearchPreferencesSettingsPanel.php',
|
'PhabricatorSearchPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorSearchPreferencesSettingsPanel.php',
|
||||||
|
@ -6802,6 +6808,7 @@ phutil_register_library_map(array(
|
||||||
'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
|
'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
|
||||||
'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule',
|
'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule',
|
||||||
'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock',
|
'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock',
|
||||||
|
'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension',
|
||||||
'PhabricatorNotificationBuilder' => 'Phobject',
|
'PhabricatorNotificationBuilder' => 'Phobject',
|
||||||
'PhabricatorNotificationClearController' => 'PhabricatorNotificationController',
|
'PhabricatorNotificationClearController' => 'PhabricatorNotificationController',
|
||||||
'PhabricatorNotificationClient' => 'Phobject',
|
'PhabricatorNotificationClient' => 'Phobject',
|
||||||
|
@ -6907,10 +6914,14 @@ phutil_register_library_map(array(
|
||||||
'PhabricatorCustomFieldInterface',
|
'PhabricatorCustomFieldInterface',
|
||||||
'PhabricatorDestructibleInterface',
|
'PhabricatorDestructibleInterface',
|
||||||
'PhabricatorConduitResultInterface',
|
'PhabricatorConduitResultInterface',
|
||||||
|
'PhabricatorFulltextInterface',
|
||||||
|
'PhabricatorNgramsInterface',
|
||||||
),
|
),
|
||||||
'PhabricatorOwnersPackageDatasource' => 'PhabricatorTypeaheadDatasource',
|
'PhabricatorOwnersPackageDatasource' => 'PhabricatorTypeaheadDatasource',
|
||||||
'PhabricatorOwnersPackageEditEngine' => 'PhabricatorEditEngine',
|
'PhabricatorOwnersPackageEditEngine' => 'PhabricatorEditEngine',
|
||||||
|
'PhabricatorOwnersPackageFulltextEngine' => 'PhabricatorFulltextEngine',
|
||||||
'PhabricatorOwnersPackageFunctionDatasource' => 'PhabricatorTypeaheadCompositeDatasource',
|
'PhabricatorOwnersPackageFunctionDatasource' => 'PhabricatorTypeaheadCompositeDatasource',
|
||||||
|
'PhabricatorOwnersPackageNameNgrams' => 'PhabricatorSearchNgrams',
|
||||||
'PhabricatorOwnersPackageOwnerDatasource' => 'PhabricatorTypeaheadCompositeDatasource',
|
'PhabricatorOwnersPackageOwnerDatasource' => 'PhabricatorTypeaheadCompositeDatasource',
|
||||||
'PhabricatorOwnersPackagePHIDType' => 'PhabricatorPHIDType',
|
'PhabricatorOwnersPackagePHIDType' => 'PhabricatorPHIDType',
|
||||||
'PhabricatorOwnersPackageQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
|
'PhabricatorOwnersPackageQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
|
||||||
|
@ -7414,6 +7425,8 @@ phutil_register_library_map(array(
|
||||||
'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow',
|
'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow',
|
||||||
'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow',
|
'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow',
|
||||||
'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow',
|
'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow',
|
||||||
|
'PhabricatorSearchNgrams' => 'PhabricatorSearchDAO',
|
||||||
|
'PhabricatorSearchNgramsDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension',
|
||||||
'PhabricatorSearchOrderController' => 'PhabricatorSearchBaseController',
|
'PhabricatorSearchOrderController' => 'PhabricatorSearchBaseController',
|
||||||
'PhabricatorSearchOrderField' => 'PhabricatorSearchField',
|
'PhabricatorSearchOrderField' => 'PhabricatorSearchField',
|
||||||
'PhabricatorSearchPreferencesSettingsPanel' => 'PhabricatorSettingsPanel',
|
'PhabricatorSearchPreferencesSettingsPanel' => 'PhabricatorSettingsPanel',
|
||||||
|
|
|
@ -201,7 +201,8 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
|
||||||
|
|
||||||
$is_binary = ($this->getUTF8Charset() == 'binary');
|
$is_binary = ($this->getUTF8Charset() == 'binary');
|
||||||
$matches = null;
|
$matches = null;
|
||||||
if (preg_match('/^(fulltext|sort|text)(\d+)?\z/', $data_type, $matches)) {
|
$pattern = '/^(fulltext|sort|text|char)(\d+)?\z/';
|
||||||
|
if (preg_match($pattern, $data_type, $matches)) {
|
||||||
|
|
||||||
// Limit the permitted column lengths under the theory that it would
|
// Limit the permitted column lengths under the theory that it would
|
||||||
// be nice to eventually reduce this to a small set of standard lengths.
|
// be nice to eventually reduce this to a small set of standard lengths.
|
||||||
|
@ -220,6 +221,7 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
|
||||||
'text8' => true,
|
'text8' => true,
|
||||||
'text4' => true,
|
'text4' => true,
|
||||||
'text' => true,
|
'text' => true,
|
||||||
|
'char3' => true,
|
||||||
'sort255' => true,
|
'sort255' => true,
|
||||||
'sort128' => true,
|
'sort128' => true,
|
||||||
'sort64' => true,
|
'sort64' => true,
|
||||||
|
@ -266,10 +268,14 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
|
||||||
// the majority of cases.
|
// the majority of cases.
|
||||||
$column_type = 'longtext';
|
$column_type = 'longtext';
|
||||||
break;
|
break;
|
||||||
|
case 'char':
|
||||||
|
$column_type = 'char('.$size.')';
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch ($type) {
|
switch ($type) {
|
||||||
case 'text':
|
case 'text':
|
||||||
|
case 'char':
|
||||||
if ($is_binary) {
|
if ($is_binary) {
|
||||||
// We leave collation and character set unspecified in order to
|
// We leave collation and character set unspecified in order to
|
||||||
// generate valid SQL.
|
// generate valid SQL.
|
||||||
|
|
|
@ -334,4 +334,8 @@ final class PhabricatorOwnersPackageTransactionEditor
|
||||||
return $body;
|
return $body;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected function supportsSearch() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
final class PhabricatorOwnersPackageFulltextEngine
|
||||||
|
extends PhabricatorFulltextEngine {
|
||||||
|
|
||||||
|
protected function buildAbstractDocument(
|
||||||
|
PhabricatorSearchAbstractDocument $document,
|
||||||
|
$object) {
|
||||||
|
|
||||||
|
$package = $object;
|
||||||
|
$document->setDocumentTitle($package->getName());
|
||||||
|
|
||||||
|
// TODO: These are bogus, but not currently stored on packages.
|
||||||
|
$document->setDocumentCreated(PhabricatorTime::getNow());
|
||||||
|
$document->setDocumentModified(PhabricatorTime::getNow());
|
||||||
|
|
||||||
|
$document->addRelationship(
|
||||||
|
$package->isArchived()
|
||||||
|
? PhabricatorSearchRelationship::RELATIONSHIP_CLOSED
|
||||||
|
: PhabricatorSearchRelationship::RELATIONSHIP_OPEN,
|
||||||
|
$package->getPHID(),
|
||||||
|
PhabricatorOwnersPackagePHIDType::TYPECONST,
|
||||||
|
PhabricatorTime::getNow());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -9,7 +9,6 @@ final class PhabricatorOwnersPackageQuery
|
||||||
private $authorityPHIDs;
|
private $authorityPHIDs;
|
||||||
private $repositoryPHIDs;
|
private $repositoryPHIDs;
|
||||||
private $paths;
|
private $paths;
|
||||||
private $namePrefix;
|
|
||||||
private $statuses;
|
private $statuses;
|
||||||
|
|
||||||
private $controlMap = array();
|
private $controlMap = array();
|
||||||
|
@ -78,9 +77,10 @@ final class PhabricatorOwnersPackageQuery
|
||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function withNamePrefix($prefix) {
|
public function withNameNgrams($ngrams) {
|
||||||
$this->namePrefix = $prefix;
|
return $this->withNgramsConstraint(
|
||||||
return $this;
|
new PhabricatorOwnersPackageNameNgrams(),
|
||||||
|
$ngrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function needPaths($need_paths) {
|
public function needPaths($need_paths) {
|
||||||
|
@ -208,15 +208,6 @@ final class PhabricatorOwnersPackageQuery
|
||||||
$this->statuses);
|
$this->statuses);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strlen($this->namePrefix)) {
|
|
||||||
// NOTE: This is a hacky mess, but this column is currently case
|
|
||||||
// sensitive and unique.
|
|
||||||
$where[] = qsprintf(
|
|
||||||
$conn,
|
|
||||||
'LOWER(p.name) LIKE %>',
|
|
||||||
phutil_utf8_strtolower($this->namePrefix));
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($this->controlMap) {
|
if ($this->controlMap) {
|
||||||
$clauses = array();
|
$clauses = array();
|
||||||
foreach ($this->controlMap as $repository_phid => $paths) {
|
foreach ($this->controlMap as $repository_phid => $paths) {
|
||||||
|
|
|
@ -25,6 +25,10 @@ final class PhabricatorOwnersPackageSearchEngine
|
||||||
->setDescription(
|
->setDescription(
|
||||||
pht('Search for packages with specific owners.'))
|
pht('Search for packages with specific owners.'))
|
||||||
->setDatasource(new PhabricatorProjectOrUserDatasource()),
|
->setDatasource(new PhabricatorProjectOrUserDatasource()),
|
||||||
|
id(new PhabricatorSearchTextField())
|
||||||
|
->setLabel(pht('Name Contains'))
|
||||||
|
->setKey('name')
|
||||||
|
->setDescription(pht('Search for packages by name substrings.')),
|
||||||
id(new PhabricatorSearchDatasourceField())
|
id(new PhabricatorSearchDatasourceField())
|
||||||
->setLabel(pht('Repositories'))
|
->setLabel(pht('Repositories'))
|
||||||
->setKey('repositoryPHIDs')
|
->setKey('repositoryPHIDs')
|
||||||
|
@ -69,6 +73,10 @@ final class PhabricatorOwnersPackageSearchEngine
|
||||||
$query->withStatuses($map['statuses']);
|
$query->withStatuses($map['statuses']);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (strlen($map['name'])) {
|
||||||
|
$query->withNameNgrams($map['name']);
|
||||||
|
}
|
||||||
|
|
||||||
return $query;
|
return $query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,9 @@ final class PhabricatorOwnersPackage
|
||||||
PhabricatorApplicationTransactionInterface,
|
PhabricatorApplicationTransactionInterface,
|
||||||
PhabricatorCustomFieldInterface,
|
PhabricatorCustomFieldInterface,
|
||||||
PhabricatorDestructibleInterface,
|
PhabricatorDestructibleInterface,
|
||||||
PhabricatorConduitResultInterface {
|
PhabricatorConduitResultInterface,
|
||||||
|
PhabricatorFulltextInterface,
|
||||||
|
PhabricatorNgramsInterface {
|
||||||
|
|
||||||
protected $name;
|
protected $name;
|
||||||
protected $originalName;
|
protected $originalName;
|
||||||
|
@ -46,7 +48,7 @@ final class PhabricatorOwnersPackage
|
||||||
self::CONFIG_TIMESTAMPS => false,
|
self::CONFIG_TIMESTAMPS => false,
|
||||||
self::CONFIG_AUX_PHID => true,
|
self::CONFIG_AUX_PHID => true,
|
||||||
self::CONFIG_COLUMN_SCHEMA => array(
|
self::CONFIG_COLUMN_SCHEMA => array(
|
||||||
'name' => 'text128',
|
'name' => 'sort128',
|
||||||
'originalName' => 'text255',
|
'originalName' => 'text255',
|
||||||
'description' => 'text',
|
'description' => 'text',
|
||||||
'primaryOwnerPHID' => 'phid?',
|
'primaryOwnerPHID' => 'phid?',
|
||||||
|
@ -54,17 +56,6 @@ final class PhabricatorOwnersPackage
|
||||||
'mailKey' => 'bytes20',
|
'mailKey' => 'bytes20',
|
||||||
'status' => 'text32',
|
'status' => 'text32',
|
||||||
),
|
),
|
||||||
self::CONFIG_KEY_SCHEMA => array(
|
|
||||||
'key_phid' => null,
|
|
||||||
'phid' => array(
|
|
||||||
'columns' => array('phid'),
|
|
||||||
'unique' => true,
|
|
||||||
),
|
|
||||||
'name' => array(
|
|
||||||
'columns' => array('name'),
|
|
||||||
'unique' => true,
|
|
||||||
),
|
|
||||||
),
|
|
||||||
) + parent::getConfiguration();
|
) + parent::getConfiguration();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -433,4 +424,23 @@ final class PhabricatorOwnersPackage
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* -( PhabricatorFulltextInterface )--------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
|
public function newFulltextEngine() {
|
||||||
|
return new PhabricatorOwnersPackageFulltextEngine();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* -( PhabricatorNgramInterface )------------------------------------------ */
|
||||||
|
|
||||||
|
|
||||||
|
public function newNgrams() {
|
||||||
|
return array(
|
||||||
|
id(new PhabricatorOwnersPackageNameNgrams())
|
||||||
|
->setValue($this->getName()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
final class PhabricatorOwnersPackageNameNgrams
|
||||||
|
extends PhabricatorSearchNgrams {
|
||||||
|
|
||||||
|
public function getNgramKey() {
|
||||||
|
return 'name';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getColumnName() {
|
||||||
|
return 'name';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getApplicationName() {
|
||||||
|
return 'owners';
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -27,6 +27,14 @@ final class PhabricatorOwnersPackageTransaction
|
||||||
|
|
||||||
switch ($this->getTransactionType()) {
|
switch ($this->getTransactionType()) {
|
||||||
case self::TYPE_OWNERS:
|
case self::TYPE_OWNERS:
|
||||||
|
if (!is_array($old)) {
|
||||||
|
$old = array();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_array($new)) {
|
||||||
|
$new = array();
|
||||||
|
}
|
||||||
|
|
||||||
$add = array_diff($new, $old);
|
$add = array_diff($new, $old);
|
||||||
foreach ($add as $phid) {
|
foreach ($add as $phid) {
|
||||||
$phids[] = $phid;
|
$phids[] = $phid;
|
||||||
|
|
|
@ -22,7 +22,7 @@ final class PhabricatorOwnersPackageDatasource
|
||||||
$results = array();
|
$results = array();
|
||||||
|
|
||||||
$query = id(new PhabricatorOwnersPackageQuery())
|
$query = id(new PhabricatorOwnersPackageQuery())
|
||||||
->withNamePrefix($raw_query)
|
->withNameNgrams($raw_query)
|
||||||
->setOrder('name');
|
->setOrder('name');
|
||||||
|
|
||||||
$packages = $this->executeQuery($query);
|
$packages = $this->executeQuery($query);
|
||||||
|
|
|
@ -65,6 +65,9 @@ final class PhabricatorFulltextIndexEngineExtension
|
||||||
|
|
||||||
try {
|
try {
|
||||||
$comment = $xaction->getApplicationTransactionCommentObject();
|
$comment = $xaction->getApplicationTransactionCommentObject();
|
||||||
|
if (!$comment) {
|
||||||
|
return 'none';
|
||||||
|
}
|
||||||
} catch (Exception $ex) {
|
} catch (Exception $ex) {
|
||||||
return 'none';
|
return 'none';
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
final class PhabricatorNgramsIndexEngineExtension
|
||||||
|
extends PhabricatorIndexEngineExtension {
|
||||||
|
|
||||||
|
const EXTENSIONKEY = 'ngrams';
|
||||||
|
|
||||||
|
public function getExtensionName() {
|
||||||
|
return pht('Ngrams Engine');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getIndexVersion($object) {
|
||||||
|
$ngrams = $object->newNgrams();
|
||||||
|
$map = mpull($ngrams, 'getValue', 'getNgramKey');
|
||||||
|
ksort($map);
|
||||||
|
$serialized = serialize($map);
|
||||||
|
|
||||||
|
return PhabricatorHash::digestForIndex($serialized);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function shouldIndexObject($object) {
|
||||||
|
return ($object instanceof PhabricatorNgramsInterface);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function indexObject(
|
||||||
|
PhabricatorIndexEngine $engine,
|
||||||
|
$object) {
|
||||||
|
|
||||||
|
foreach ($object->newNgrams() as $ngram) {
|
||||||
|
$ngram->writeNgram($object->getID());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
final class PhabricatorSearchNgramsDestructionEngineExtension
|
||||||
|
extends PhabricatorDestructionEngineExtension {
|
||||||
|
|
||||||
|
const EXTENSIONKEY = 'search.ngrams';
|
||||||
|
|
||||||
|
public function getExtensionName() {
|
||||||
|
return pht('Search Ngram');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function canDestroyObject(
|
||||||
|
PhabricatorDestructionEngine $engine,
|
||||||
|
$object) {
|
||||||
|
return ($object instanceof PhabricatorNgramsInterface);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function destroyObject(
|
||||||
|
PhabricatorDestructionEngine $engine,
|
||||||
|
$object) {
|
||||||
|
|
||||||
|
foreach ($object->newNgrams() as $ngram) {
|
||||||
|
queryfx(
|
||||||
|
$ngram->establishConnection('w'),
|
||||||
|
'DELETE FROM %T WHERE objectID = %d',
|
||||||
|
$ngram->getTableName(),
|
||||||
|
$object->getID());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
interface PhabricatorNgramsInterface {
|
||||||
|
|
||||||
|
public function newNgrams();
|
||||||
|
|
||||||
|
}
|
113
src/applications/search/ngrams/PhabricatorSearchNgrams.php
Normal file
113
src/applications/search/ngrams/PhabricatorSearchNgrams.php
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
abstract class PhabricatorSearchNgrams
|
||||||
|
extends PhabricatorSearchDAO {
|
||||||
|
|
||||||
|
protected $objectID;
|
||||||
|
protected $ngram;
|
||||||
|
|
||||||
|
private $value;
|
||||||
|
|
||||||
|
abstract public function getNgramKey();
|
||||||
|
abstract public function getColumnName();
|
||||||
|
|
||||||
|
final public function setValue($value) {
|
||||||
|
$this->value = $value;
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
final public function getValue() {
|
||||||
|
return $this->value;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getConfiguration() {
|
||||||
|
return array(
|
||||||
|
self::CONFIG_TIMESTAMPS => false,
|
||||||
|
self::CONFIG_COLUMN_SCHEMA => array(
|
||||||
|
'objectID' => 'uint32',
|
||||||
|
'ngram' => 'char3',
|
||||||
|
),
|
||||||
|
self::CONFIG_KEY_SCHEMA => array(
|
||||||
|
'key_ngram' => array(
|
||||||
|
'columns' => array('ngram', 'objectID'),
|
||||||
|
),
|
||||||
|
'key_object' => array(
|
||||||
|
'columns' => array('objectID'),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
) + parent::getConfiguration();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTableName() {
|
||||||
|
$application = $this->getApplicationName();
|
||||||
|
$key = $this->getNgramKey();
|
||||||
|
return "{$application}_{$key}_ngrams";
|
||||||
|
}
|
||||||
|
|
||||||
|
final public function tokenizeString($value) {
|
||||||
|
$value = trim($value, ' ');
|
||||||
|
$value = preg_split('/ +/', $value);
|
||||||
|
return $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
final public function getNgramsFromString($value, $mode) {
|
||||||
|
$tokens = $this->tokenizeString($value);
|
||||||
|
|
||||||
|
$ngrams = array();
|
||||||
|
foreach ($tokens as $token) {
|
||||||
|
$token = phutil_utf8_strtolower($token);
|
||||||
|
|
||||||
|
switch ($mode) {
|
||||||
|
case 'query':
|
||||||
|
break;
|
||||||
|
case 'index':
|
||||||
|
$token = ' '.$token.' ';
|
||||||
|
break;
|
||||||
|
case 'prefix':
|
||||||
|
$token = ' '.$token;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
$len = (strlen($token) - 2);
|
||||||
|
for ($ii = 0; $ii < $len; $ii++) {
|
||||||
|
$ngram = substr($token, $ii, 3);
|
||||||
|
$ngrams[$ngram] = $ngram;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ksort($ngrams);
|
||||||
|
|
||||||
|
return array_keys($ngrams);
|
||||||
|
}
|
||||||
|
|
||||||
|
final public function writeNgram($object_id) {
|
||||||
|
$ngrams = $this->getNgramsFromString($this->getValue(), 'index');
|
||||||
|
$conn_w = $this->establishConnection('w');
|
||||||
|
|
||||||
|
$sql = array();
|
||||||
|
foreach ($ngrams as $ngram) {
|
||||||
|
$sql[] = qsprintf(
|
||||||
|
$conn_w,
|
||||||
|
'(%d, %s)',
|
||||||
|
$object_id,
|
||||||
|
$ngram);
|
||||||
|
}
|
||||||
|
|
||||||
|
queryfx(
|
||||||
|
$conn_w,
|
||||||
|
'DELETE FROM %T WHERE objectID = %d',
|
||||||
|
$this->getTableName(),
|
||||||
|
$object_id);
|
||||||
|
|
||||||
|
if ($sql) {
|
||||||
|
queryfx(
|
||||||
|
$conn_w,
|
||||||
|
'INSERT INTO %T (objectID, ngram) VALUES %Q',
|
||||||
|
$this->getTableName(),
|
||||||
|
implode(', ', $sql));
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -26,6 +26,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
private $edgeLogicConstraintsAreValid = false;
|
private $edgeLogicConstraintsAreValid = false;
|
||||||
private $spacePHIDs;
|
private $spacePHIDs;
|
||||||
private $spaceIsArchived;
|
private $spaceIsArchived;
|
||||||
|
private $ngrams = array();
|
||||||
|
|
||||||
protected function getPageCursors(array $page) {
|
protected function getPageCursors(array $page) {
|
||||||
return array(
|
return array(
|
||||||
|
@ -253,6 +254,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
$joins = array();
|
$joins = array();
|
||||||
$joins[] = $this->buildEdgeLogicJoinClause($conn);
|
$joins[] = $this->buildEdgeLogicJoinClause($conn);
|
||||||
$joins[] = $this->buildApplicationSearchJoinClause($conn);
|
$joins[] = $this->buildApplicationSearchJoinClause($conn);
|
||||||
|
$joins[] = $this->buildNgramsJoinClause($conn);
|
||||||
return $joins;
|
return $joins;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,6 +276,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
$where[] = $this->buildPagingClause($conn);
|
$where[] = $this->buildPagingClause($conn);
|
||||||
$where[] = $this->buildEdgeLogicWhereClause($conn);
|
$where[] = $this->buildEdgeLogicWhereClause($conn);
|
||||||
$where[] = $this->buildSpacesWhereClause($conn);
|
$where[] = $this->buildSpacesWhereClause($conn);
|
||||||
|
$where[] = $this->buildNgramsWhereClause($conn);
|
||||||
return $where;
|
return $where;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -324,6 +327,10 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($this->shouldGroupNgramResultRows()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1345,6 +1352,138 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* -( Ngrams )------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
|
protected function withNgramsConstraint(
|
||||||
|
PhabricatorSearchNgrams $index,
|
||||||
|
$value) {
|
||||||
|
|
||||||
|
if (strlen($value)) {
|
||||||
|
$this->ngrams[] = array(
|
||||||
|
'index' => $index,
|
||||||
|
'value' => $value,
|
||||||
|
'length' => count(phutil_utf8v($value)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected function buildNgramsJoinClause(AphrontDatabaseConnection $conn) {
|
||||||
|
$flat = array();
|
||||||
|
foreach ($this->ngrams as $spec) {
|
||||||
|
$index = $spec['index'];
|
||||||
|
$value = $spec['value'];
|
||||||
|
$length = $spec['length'];
|
||||||
|
|
||||||
|
if ($length >= 3) {
|
||||||
|
$ngrams = $index->getNgramsFromString($value, 'query');
|
||||||
|
$prefix = false;
|
||||||
|
} else if ($length == 2) {
|
||||||
|
$ngrams = $index->getNgramsFromString($value, 'prefix');
|
||||||
|
$prefix = false;
|
||||||
|
} else {
|
||||||
|
$ngrams = array(' '.$value);
|
||||||
|
$prefix = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($ngrams as $ngram) {
|
||||||
|
$flat[] = array(
|
||||||
|
'table' => $index->getTableName(),
|
||||||
|
'ngram' => $ngram,
|
||||||
|
'prefix' => $prefix,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MySQL only allows us to join a maximum of 61 tables per query. Each
|
||||||
|
// ngram is going to cost us a join toward that limit, so if the user
|
||||||
|
// specified a very long query string, just pick 16 of the ngrams
|
||||||
|
// at random.
|
||||||
|
if (count($flat) > 16) {
|
||||||
|
shuffle($flat);
|
||||||
|
$flat = array_slice($flat, 0, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
$alias = $this->getPrimaryTableAlias();
|
||||||
|
if ($alias) {
|
||||||
|
$id_column = qsprintf($conn, '%T.%T', $alias, 'id');
|
||||||
|
} else {
|
||||||
|
$id_column = qsprintf($conn, '%T', 'id');
|
||||||
|
}
|
||||||
|
|
||||||
|
$idx = 1;
|
||||||
|
$joins = array();
|
||||||
|
foreach ($flat as $spec) {
|
||||||
|
$table = $spec['table'];
|
||||||
|
$ngram = $spec['ngram'];
|
||||||
|
$prefix = $spec['prefix'];
|
||||||
|
|
||||||
|
$alias = 'ngm'.$idx++;
|
||||||
|
|
||||||
|
if ($prefix) {
|
||||||
|
$joins[] = qsprintf(
|
||||||
|
$conn,
|
||||||
|
'JOIN %T %T ON %T.objectID = %Q AND %T.ngram LIKE %>',
|
||||||
|
$table,
|
||||||
|
$alias,
|
||||||
|
$alias,
|
||||||
|
$id_column,
|
||||||
|
$alias,
|
||||||
|
$ngram);
|
||||||
|
} else {
|
||||||
|
$joins[] = qsprintf(
|
||||||
|
$conn,
|
||||||
|
'JOIN %T %T ON %T.objectID = %Q AND %T.ngram = %s',
|
||||||
|
$table,
|
||||||
|
$alias,
|
||||||
|
$alias,
|
||||||
|
$id_column,
|
||||||
|
$alias,
|
||||||
|
$ngram);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $joins;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected function buildNgramsWhereClause(AphrontDatabaseConnection $conn) {
|
||||||
|
$where = array();
|
||||||
|
|
||||||
|
foreach ($this->ngrams as $ngram) {
|
||||||
|
$index = $ngram['index'];
|
||||||
|
$value = $ngram['value'];
|
||||||
|
|
||||||
|
$column = $index->getColumnName();
|
||||||
|
$alias = $this->getPrimaryTableAlias();
|
||||||
|
if ($alias) {
|
||||||
|
$column = qsprintf($conn, '%T.%T', $alias, $column);
|
||||||
|
} else {
|
||||||
|
$column = qsprintf($conn, '%T', $column);
|
||||||
|
}
|
||||||
|
|
||||||
|
$tokens = $index->tokenizeString($value);
|
||||||
|
foreach ($tokens as $token) {
|
||||||
|
$where[] = qsprintf(
|
||||||
|
$conn,
|
||||||
|
'%Q LIKE %~',
|
||||||
|
$column,
|
||||||
|
$token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $where;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected function shouldGroupNgramResultRows() {
|
||||||
|
return (bool)$this->ngrams;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* -( Edge Logic )--------------------------------------------------------- */
|
/* -( Edge Logic )--------------------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue