mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-10 00:42:41 +01:00
Implement basic ngram search for Owners Package names
Summary: Ref T9979. This uses ngrams (specifically, trigrams) to build a reasonably efficient index for substring matching. Specifically, for a package like "Example", with ID 123, we store rows like this: ``` < ex, 123> <exa, 123> <xam, 123> <amp, 123> <mpl, 123> <ple, 123> <le , 123> ``` When the user searches for `exam`, we join this table for packages with tokens `exa` and `xam`. MySQL can do this a lot more efficiently than it can process a `LIKE "%exam%"` query against a huge table. When the user searches for a one-letter or two-letter string, we only search the beginnings of words. This is probably what they want, the only thing we can do quickly, and a reasonable/expected behavior for typeaheads. Test Plan: - Ran storage upgrades and search indexer. - Searched for stuff with "name contains". - Used typehaead and got sensible results. - Searched for `aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz` and saw only 16 joins. Reviewers: chad Reviewed By: chad Maniphest Tasks: T9979 Differential Revision: https://secure.phabricator.com/D14846
This commit is contained in:
parent
5c8025c41d
commit
96fe8c0b83
18 changed files with 457 additions and 28 deletions
|
@ -0,0 +1,7 @@
|
|||
CREATE TABLE {$NAMESPACE}_owners.owners_name_ngrams (
|
||||
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||
objectID INT UNSIGNED NOT NULL,
|
||||
ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT},
|
||||
KEY `key_object` (objectID),
|
||||
KEY `key_ngram` (ngram, objectID)
|
||||
) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};
|
11
resources/sql/autopatches/20151221.search.3.reindex.php
Normal file
11
resources/sql/autopatches/20151221.search.3.reindex.php
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
|
||||
$table = new PhabricatorOwnersPackage();
|
||||
|
||||
foreach (new LiskMigrationIterator($table) as $package) {
|
||||
PhabricatorSearchWorker::queueDocumentForIndexing(
|
||||
$package->getPHID(),
|
||||
array(
|
||||
'force' => true,
|
||||
));
|
||||
}
|
|
@ -2548,6 +2548,8 @@ phutil_register_library_map(array(
|
|||
'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php',
|
||||
'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php',
|
||||
'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php',
|
||||
'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php',
|
||||
'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php',
|
||||
'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php',
|
||||
'PhabricatorNotificationClearController' => 'applications/notification/controller/PhabricatorNotificationClearController.php',
|
||||
'PhabricatorNotificationClient' => 'applications/notification/client/PhabricatorNotificationClient.php',
|
||||
|
@ -2636,7 +2638,9 @@ phutil_register_library_map(array(
|
|||
'PhabricatorOwnersPackage' => 'applications/owners/storage/PhabricatorOwnersPackage.php',
|
||||
'PhabricatorOwnersPackageDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php',
|
||||
'PhabricatorOwnersPackageEditEngine' => 'applications/owners/editor/PhabricatorOwnersPackageEditEngine.php',
|
||||
'PhabricatorOwnersPackageFulltextEngine' => 'applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php',
|
||||
'PhabricatorOwnersPackageFunctionDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageFunctionDatasource.php',
|
||||
'PhabricatorOwnersPackageNameNgrams' => 'applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php',
|
||||
'PhabricatorOwnersPackageOwnerDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php',
|
||||
'PhabricatorOwnersPackagePHIDType' => 'applications/owners/phid/PhabricatorOwnersPackagePHIDType.php',
|
||||
'PhabricatorOwnersPackageQuery' => 'applications/owners/query/PhabricatorOwnersPackageQuery.php',
|
||||
|
@ -3047,6 +3051,8 @@ phutil_register_library_map(array(
|
|||
'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php',
|
||||
'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php',
|
||||
'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php',
|
||||
'PhabricatorSearchNgrams' => 'applications/search/ngrams/PhabricatorSearchNgrams.php',
|
||||
'PhabricatorSearchNgramsDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php',
|
||||
'PhabricatorSearchOrderController' => 'applications/search/controller/PhabricatorSearchOrderController.php',
|
||||
'PhabricatorSearchOrderField' => 'applications/search/field/PhabricatorSearchOrderField.php',
|
||||
'PhabricatorSearchPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorSearchPreferencesSettingsPanel.php',
|
||||
|
@ -6802,6 +6808,7 @@ phutil_register_library_map(array(
|
|||
'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
|
||||
'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule',
|
||||
'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock',
|
||||
'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension',
|
||||
'PhabricatorNotificationBuilder' => 'Phobject',
|
||||
'PhabricatorNotificationClearController' => 'PhabricatorNotificationController',
|
||||
'PhabricatorNotificationClient' => 'Phobject',
|
||||
|
@ -6907,10 +6914,14 @@ phutil_register_library_map(array(
|
|||
'PhabricatorCustomFieldInterface',
|
||||
'PhabricatorDestructibleInterface',
|
||||
'PhabricatorConduitResultInterface',
|
||||
'PhabricatorFulltextInterface',
|
||||
'PhabricatorNgramsInterface',
|
||||
),
|
||||
'PhabricatorOwnersPackageDatasource' => 'PhabricatorTypeaheadDatasource',
|
||||
'PhabricatorOwnersPackageEditEngine' => 'PhabricatorEditEngine',
|
||||
'PhabricatorOwnersPackageFulltextEngine' => 'PhabricatorFulltextEngine',
|
||||
'PhabricatorOwnersPackageFunctionDatasource' => 'PhabricatorTypeaheadCompositeDatasource',
|
||||
'PhabricatorOwnersPackageNameNgrams' => 'PhabricatorSearchNgrams',
|
||||
'PhabricatorOwnersPackageOwnerDatasource' => 'PhabricatorTypeaheadCompositeDatasource',
|
||||
'PhabricatorOwnersPackagePHIDType' => 'PhabricatorPHIDType',
|
||||
'PhabricatorOwnersPackageQuery' => 'PhabricatorCursorPagedPolicyAwareQuery',
|
||||
|
@ -7414,6 +7425,8 @@ phutil_register_library_map(array(
|
|||
'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow',
|
||||
'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow',
|
||||
'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow',
|
||||
'PhabricatorSearchNgrams' => 'PhabricatorSearchDAO',
|
||||
'PhabricatorSearchNgramsDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension',
|
||||
'PhabricatorSearchOrderController' => 'PhabricatorSearchBaseController',
|
||||
'PhabricatorSearchOrderField' => 'PhabricatorSearchField',
|
||||
'PhabricatorSearchPreferencesSettingsPanel' => 'PhabricatorSettingsPanel',
|
||||
|
|
|
@ -201,7 +201,8 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
|
|||
|
||||
$is_binary = ($this->getUTF8Charset() == 'binary');
|
||||
$matches = null;
|
||||
if (preg_match('/^(fulltext|sort|text)(\d+)?\z/', $data_type, $matches)) {
|
||||
$pattern = '/^(fulltext|sort|text|char)(\d+)?\z/';
|
||||
if (preg_match($pattern, $data_type, $matches)) {
|
||||
|
||||
// Limit the permitted column lengths under the theory that it would
|
||||
// be nice to eventually reduce this to a small set of standard lengths.
|
||||
|
@ -220,6 +221,7 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
|
|||
'text8' => true,
|
||||
'text4' => true,
|
||||
'text' => true,
|
||||
'char3' => true,
|
||||
'sort255' => true,
|
||||
'sort128' => true,
|
||||
'sort64' => true,
|
||||
|
@ -266,10 +268,14 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
|
|||
// the majority of cases.
|
||||
$column_type = 'longtext';
|
||||
break;
|
||||
case 'char':
|
||||
$column_type = 'char('.$size.')';
|
||||
break;
|
||||
}
|
||||
|
||||
switch ($type) {
|
||||
case 'text':
|
||||
case 'char':
|
||||
if ($is_binary) {
|
||||
// We leave collation and character set unspecified in order to
|
||||
// generate valid SQL.
|
||||
|
|
|
@ -334,4 +334,8 @@ final class PhabricatorOwnersPackageTransactionEditor
|
|||
return $body;
|
||||
}
|
||||
|
||||
protected function supportsSearch() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
<?php
|
||||
|
||||
final class PhabricatorOwnersPackageFulltextEngine
|
||||
extends PhabricatorFulltextEngine {
|
||||
|
||||
protected function buildAbstractDocument(
|
||||
PhabricatorSearchAbstractDocument $document,
|
||||
$object) {
|
||||
|
||||
$package = $object;
|
||||
$document->setDocumentTitle($package->getName());
|
||||
|
||||
// TODO: These are bogus, but not currently stored on packages.
|
||||
$document->setDocumentCreated(PhabricatorTime::getNow());
|
||||
$document->setDocumentModified(PhabricatorTime::getNow());
|
||||
|
||||
$document->addRelationship(
|
||||
$package->isArchived()
|
||||
? PhabricatorSearchRelationship::RELATIONSHIP_CLOSED
|
||||
: PhabricatorSearchRelationship::RELATIONSHIP_OPEN,
|
||||
$package->getPHID(),
|
||||
PhabricatorOwnersPackagePHIDType::TYPECONST,
|
||||
PhabricatorTime::getNow());
|
||||
}
|
||||
|
||||
}
|
|
@ -9,7 +9,6 @@ final class PhabricatorOwnersPackageQuery
|
|||
private $authorityPHIDs;
|
||||
private $repositoryPHIDs;
|
||||
private $paths;
|
||||
private $namePrefix;
|
||||
private $statuses;
|
||||
|
||||
private $controlMap = array();
|
||||
|
@ -78,9 +77,10 @@ final class PhabricatorOwnersPackageQuery
|
|||
return $this;
|
||||
}
|
||||
|
||||
public function withNamePrefix($prefix) {
|
||||
$this->namePrefix = $prefix;
|
||||
return $this;
|
||||
public function withNameNgrams($ngrams) {
|
||||
return $this->withNgramsConstraint(
|
||||
new PhabricatorOwnersPackageNameNgrams(),
|
||||
$ngrams);
|
||||
}
|
||||
|
||||
public function needPaths($need_paths) {
|
||||
|
@ -208,15 +208,6 @@ final class PhabricatorOwnersPackageQuery
|
|||
$this->statuses);
|
||||
}
|
||||
|
||||
if (strlen($this->namePrefix)) {
|
||||
// NOTE: This is a hacky mess, but this column is currently case
|
||||
// sensitive and unique.
|
||||
$where[] = qsprintf(
|
||||
$conn,
|
||||
'LOWER(p.name) LIKE %>',
|
||||
phutil_utf8_strtolower($this->namePrefix));
|
||||
}
|
||||
|
||||
if ($this->controlMap) {
|
||||
$clauses = array();
|
||||
foreach ($this->controlMap as $repository_phid => $paths) {
|
||||
|
|
|
@ -25,6 +25,10 @@ final class PhabricatorOwnersPackageSearchEngine
|
|||
->setDescription(
|
||||
pht('Search for packages with specific owners.'))
|
||||
->setDatasource(new PhabricatorProjectOrUserDatasource()),
|
||||
id(new PhabricatorSearchTextField())
|
||||
->setLabel(pht('Name Contains'))
|
||||
->setKey('name')
|
||||
->setDescription(pht('Search for packages by name substrings.')),
|
||||
id(new PhabricatorSearchDatasourceField())
|
||||
->setLabel(pht('Repositories'))
|
||||
->setKey('repositoryPHIDs')
|
||||
|
@ -69,6 +73,10 @@ final class PhabricatorOwnersPackageSearchEngine
|
|||
$query->withStatuses($map['statuses']);
|
||||
}
|
||||
|
||||
if (strlen($map['name'])) {
|
||||
$query->withNameNgrams($map['name']);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,9 @@ final class PhabricatorOwnersPackage
|
|||
PhabricatorApplicationTransactionInterface,
|
||||
PhabricatorCustomFieldInterface,
|
||||
PhabricatorDestructibleInterface,
|
||||
PhabricatorConduitResultInterface {
|
||||
PhabricatorConduitResultInterface,
|
||||
PhabricatorFulltextInterface,
|
||||
PhabricatorNgramsInterface {
|
||||
|
||||
protected $name;
|
||||
protected $originalName;
|
||||
|
@ -46,7 +48,7 @@ final class PhabricatorOwnersPackage
|
|||
self::CONFIG_TIMESTAMPS => false,
|
||||
self::CONFIG_AUX_PHID => true,
|
||||
self::CONFIG_COLUMN_SCHEMA => array(
|
||||
'name' => 'text128',
|
||||
'name' => 'sort128',
|
||||
'originalName' => 'text255',
|
||||
'description' => 'text',
|
||||
'primaryOwnerPHID' => 'phid?',
|
||||
|
@ -54,17 +56,6 @@ final class PhabricatorOwnersPackage
|
|||
'mailKey' => 'bytes20',
|
||||
'status' => 'text32',
|
||||
),
|
||||
self::CONFIG_KEY_SCHEMA => array(
|
||||
'key_phid' => null,
|
||||
'phid' => array(
|
||||
'columns' => array('phid'),
|
||||
'unique' => true,
|
||||
),
|
||||
'name' => array(
|
||||
'columns' => array('name'),
|
||||
'unique' => true,
|
||||
),
|
||||
),
|
||||
) + parent::getConfiguration();
|
||||
}
|
||||
|
||||
|
@ -433,4 +424,23 @@ final class PhabricatorOwnersPackage
|
|||
);
|
||||
}
|
||||
|
||||
|
||||
/* -( PhabricatorFulltextInterface )--------------------------------------- */
|
||||
|
||||
|
||||
public function newFulltextEngine() {
|
||||
return new PhabricatorOwnersPackageFulltextEngine();
|
||||
}
|
||||
|
||||
|
||||
/* -( PhabricatorNgramInterface )------------------------------------------ */
|
||||
|
||||
|
||||
public function newNgrams() {
|
||||
return array(
|
||||
id(new PhabricatorOwnersPackageNameNgrams())
|
||||
->setValue($this->getName()),
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
<?php
|
||||
|
||||
final class PhabricatorOwnersPackageNameNgrams
|
||||
extends PhabricatorSearchNgrams {
|
||||
|
||||
public function getNgramKey() {
|
||||
return 'name';
|
||||
}
|
||||
|
||||
public function getColumnName() {
|
||||
return 'name';
|
||||
}
|
||||
|
||||
public function getApplicationName() {
|
||||
return 'owners';
|
||||
}
|
||||
|
||||
}
|
|
@ -27,6 +27,14 @@ final class PhabricatorOwnersPackageTransaction
|
|||
|
||||
switch ($this->getTransactionType()) {
|
||||
case self::TYPE_OWNERS:
|
||||
if (!is_array($old)) {
|
||||
$old = array();
|
||||
}
|
||||
|
||||
if (!is_array($new)) {
|
||||
$new = array();
|
||||
}
|
||||
|
||||
$add = array_diff($new, $old);
|
||||
foreach ($add as $phid) {
|
||||
$phids[] = $phid;
|
||||
|
|
|
@ -22,7 +22,7 @@ final class PhabricatorOwnersPackageDatasource
|
|||
$results = array();
|
||||
|
||||
$query = id(new PhabricatorOwnersPackageQuery())
|
||||
->withNamePrefix($raw_query)
|
||||
->withNameNgrams($raw_query)
|
||||
->setOrder('name');
|
||||
|
||||
$packages = $this->executeQuery($query);
|
||||
|
|
|
@ -65,6 +65,9 @@ final class PhabricatorFulltextIndexEngineExtension
|
|||
|
||||
try {
|
||||
$comment = $xaction->getApplicationTransactionCommentObject();
|
||||
if (!$comment) {
|
||||
return 'none';
|
||||
}
|
||||
} catch (Exception $ex) {
|
||||
return 'none';
|
||||
}
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
<?php
|
||||
|
||||
final class PhabricatorNgramsIndexEngineExtension
|
||||
extends PhabricatorIndexEngineExtension {
|
||||
|
||||
const EXTENSIONKEY = 'ngrams';
|
||||
|
||||
public function getExtensionName() {
|
||||
return pht('Ngrams Engine');
|
||||
}
|
||||
|
||||
public function getIndexVersion($object) {
|
||||
$ngrams = $object->newNgrams();
|
||||
$map = mpull($ngrams, 'getValue', 'getNgramKey');
|
||||
ksort($map);
|
||||
$serialized = serialize($map);
|
||||
|
||||
return PhabricatorHash::digestForIndex($serialized);
|
||||
}
|
||||
|
||||
public function shouldIndexObject($object) {
|
||||
return ($object instanceof PhabricatorNgramsInterface);
|
||||
}
|
||||
|
||||
public function indexObject(
|
||||
PhabricatorIndexEngine $engine,
|
||||
$object) {
|
||||
|
||||
foreach ($object->newNgrams() as $ngram) {
|
||||
$ngram->writeNgram($object->getID());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
<?php
|
||||
|
||||
final class PhabricatorSearchNgramsDestructionEngineExtension
|
||||
extends PhabricatorDestructionEngineExtension {
|
||||
|
||||
const EXTENSIONKEY = 'search.ngrams';
|
||||
|
||||
public function getExtensionName() {
|
||||
return pht('Search Ngram');
|
||||
}
|
||||
|
||||
public function canDestroyObject(
|
||||
PhabricatorDestructionEngine $engine,
|
||||
$object) {
|
||||
return ($object instanceof PhabricatorNgramsInterface);
|
||||
}
|
||||
|
||||
public function destroyObject(
|
||||
PhabricatorDestructionEngine $engine,
|
||||
$object) {
|
||||
|
||||
foreach ($object->newNgrams() as $ngram) {
|
||||
queryfx(
|
||||
$ngram->establishConnection('w'),
|
||||
'DELETE FROM %T WHERE objectID = %d',
|
||||
$ngram->getTableName(),
|
||||
$object->getID());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
<?php
|
||||
|
||||
interface PhabricatorNgramsInterface {
|
||||
|
||||
public function newNgrams();
|
||||
|
||||
}
|
113
src/applications/search/ngrams/PhabricatorSearchNgrams.php
Normal file
113
src/applications/search/ngrams/PhabricatorSearchNgrams.php
Normal file
|
@ -0,0 +1,113 @@
|
|||
<?php
|
||||
|
||||
abstract class PhabricatorSearchNgrams
|
||||
extends PhabricatorSearchDAO {
|
||||
|
||||
protected $objectID;
|
||||
protected $ngram;
|
||||
|
||||
private $value;
|
||||
|
||||
abstract public function getNgramKey();
|
||||
abstract public function getColumnName();
|
||||
|
||||
final public function setValue($value) {
|
||||
$this->value = $value;
|
||||
return $this;
|
||||
}
|
||||
|
||||
final public function getValue() {
|
||||
return $this->value;
|
||||
}
|
||||
|
||||
protected function getConfiguration() {
|
||||
return array(
|
||||
self::CONFIG_TIMESTAMPS => false,
|
||||
self::CONFIG_COLUMN_SCHEMA => array(
|
||||
'objectID' => 'uint32',
|
||||
'ngram' => 'char3',
|
||||
),
|
||||
self::CONFIG_KEY_SCHEMA => array(
|
||||
'key_ngram' => array(
|
||||
'columns' => array('ngram', 'objectID'),
|
||||
),
|
||||
'key_object' => array(
|
||||
'columns' => array('objectID'),
|
||||
),
|
||||
),
|
||||
) + parent::getConfiguration();
|
||||
}
|
||||
|
||||
public function getTableName() {
|
||||
$application = $this->getApplicationName();
|
||||
$key = $this->getNgramKey();
|
||||
return "{$application}_{$key}_ngrams";
|
||||
}
|
||||
|
||||
final public function tokenizeString($value) {
|
||||
$value = trim($value, ' ');
|
||||
$value = preg_split('/ +/', $value);
|
||||
return $value;
|
||||
}
|
||||
|
||||
final public function getNgramsFromString($value, $mode) {
|
||||
$tokens = $this->tokenizeString($value);
|
||||
|
||||
$ngrams = array();
|
||||
foreach ($tokens as $token) {
|
||||
$token = phutil_utf8_strtolower($token);
|
||||
|
||||
switch ($mode) {
|
||||
case 'query':
|
||||
break;
|
||||
case 'index':
|
||||
$token = ' '.$token.' ';
|
||||
break;
|
||||
case 'prefix':
|
||||
$token = ' '.$token;
|
||||
break;
|
||||
}
|
||||
|
||||
$len = (strlen($token) - 2);
|
||||
for ($ii = 0; $ii < $len; $ii++) {
|
||||
$ngram = substr($token, $ii, 3);
|
||||
$ngrams[$ngram] = $ngram;
|
||||
}
|
||||
}
|
||||
|
||||
ksort($ngrams);
|
||||
|
||||
return array_keys($ngrams);
|
||||
}
|
||||
|
||||
final public function writeNgram($object_id) {
|
||||
$ngrams = $this->getNgramsFromString($this->getValue(), 'index');
|
||||
$conn_w = $this->establishConnection('w');
|
||||
|
||||
$sql = array();
|
||||
foreach ($ngrams as $ngram) {
|
||||
$sql[] = qsprintf(
|
||||
$conn_w,
|
||||
'(%d, %s)',
|
||||
$object_id,
|
||||
$ngram);
|
||||
}
|
||||
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'DELETE FROM %T WHERE objectID = %d',
|
||||
$this->getTableName(),
|
||||
$object_id);
|
||||
|
||||
if ($sql) {
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'INSERT INTO %T (objectID, ngram) VALUES %Q',
|
||||
$this->getTableName(),
|
||||
implode(', ', $sql));
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
}
|
|
@ -26,6 +26,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
private $edgeLogicConstraintsAreValid = false;
|
||||
private $spacePHIDs;
|
||||
private $spaceIsArchived;
|
||||
private $ngrams = array();
|
||||
|
||||
protected function getPageCursors(array $page) {
|
||||
return array(
|
||||
|
@ -253,6 +254,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
$joins = array();
|
||||
$joins[] = $this->buildEdgeLogicJoinClause($conn);
|
||||
$joins[] = $this->buildApplicationSearchJoinClause($conn);
|
||||
$joins[] = $this->buildNgramsJoinClause($conn);
|
||||
return $joins;
|
||||
}
|
||||
|
||||
|
@ -274,6 +276,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
$where[] = $this->buildPagingClause($conn);
|
||||
$where[] = $this->buildEdgeLogicWhereClause($conn);
|
||||
$where[] = $this->buildSpacesWhereClause($conn);
|
||||
$where[] = $this->buildNgramsWhereClause($conn);
|
||||
return $where;
|
||||
}
|
||||
|
||||
|
@ -324,6 +327,10 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
return true;
|
||||
}
|
||||
|
||||
if ($this->shouldGroupNgramResultRows()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1345,6 +1352,138 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
}
|
||||
|
||||
|
||||
/* -( Ngrams )------------------------------------------------------------- */
|
||||
|
||||
|
||||
protected function withNgramsConstraint(
|
||||
PhabricatorSearchNgrams $index,
|
||||
$value) {
|
||||
|
||||
if (strlen($value)) {
|
||||
$this->ngrams[] = array(
|
||||
'index' => $index,
|
||||
'value' => $value,
|
||||
'length' => count(phutil_utf8v($value)),
|
||||
);
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
protected function buildNgramsJoinClause(AphrontDatabaseConnection $conn) {
|
||||
$flat = array();
|
||||
foreach ($this->ngrams as $spec) {
|
||||
$index = $spec['index'];
|
||||
$value = $spec['value'];
|
||||
$length = $spec['length'];
|
||||
|
||||
if ($length >= 3) {
|
||||
$ngrams = $index->getNgramsFromString($value, 'query');
|
||||
$prefix = false;
|
||||
} else if ($length == 2) {
|
||||
$ngrams = $index->getNgramsFromString($value, 'prefix');
|
||||
$prefix = false;
|
||||
} else {
|
||||
$ngrams = array(' '.$value);
|
||||
$prefix = true;
|
||||
}
|
||||
|
||||
foreach ($ngrams as $ngram) {
|
||||
$flat[] = array(
|
||||
'table' => $index->getTableName(),
|
||||
'ngram' => $ngram,
|
||||
'prefix' => $prefix,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// MySQL only allows us to join a maximum of 61 tables per query. Each
|
||||
// ngram is going to cost us a join toward that limit, so if the user
|
||||
// specified a very long query string, just pick 16 of the ngrams
|
||||
// at random.
|
||||
if (count($flat) > 16) {
|
||||
shuffle($flat);
|
||||
$flat = array_slice($flat, 0, 16);
|
||||
}
|
||||
|
||||
$alias = $this->getPrimaryTableAlias();
|
||||
if ($alias) {
|
||||
$id_column = qsprintf($conn, '%T.%T', $alias, 'id');
|
||||
} else {
|
||||
$id_column = qsprintf($conn, '%T', 'id');
|
||||
}
|
||||
|
||||
$idx = 1;
|
||||
$joins = array();
|
||||
foreach ($flat as $spec) {
|
||||
$table = $spec['table'];
|
||||
$ngram = $spec['ngram'];
|
||||
$prefix = $spec['prefix'];
|
||||
|
||||
$alias = 'ngm'.$idx++;
|
||||
|
||||
if ($prefix) {
|
||||
$joins[] = qsprintf(
|
||||
$conn,
|
||||
'JOIN %T %T ON %T.objectID = %Q AND %T.ngram LIKE %>',
|
||||
$table,
|
||||
$alias,
|
||||
$alias,
|
||||
$id_column,
|
||||
$alias,
|
||||
$ngram);
|
||||
} else {
|
||||
$joins[] = qsprintf(
|
||||
$conn,
|
||||
'JOIN %T %T ON %T.objectID = %Q AND %T.ngram = %s',
|
||||
$table,
|
||||
$alias,
|
||||
$alias,
|
||||
$id_column,
|
||||
$alias,
|
||||
$ngram);
|
||||
}
|
||||
}
|
||||
|
||||
return $joins;
|
||||
}
|
||||
|
||||
|
||||
protected function buildNgramsWhereClause(AphrontDatabaseConnection $conn) {
|
||||
$where = array();
|
||||
|
||||
foreach ($this->ngrams as $ngram) {
|
||||
$index = $ngram['index'];
|
||||
$value = $ngram['value'];
|
||||
|
||||
$column = $index->getColumnName();
|
||||
$alias = $this->getPrimaryTableAlias();
|
||||
if ($alias) {
|
||||
$column = qsprintf($conn, '%T.%T', $alias, $column);
|
||||
} else {
|
||||
$column = qsprintf($conn, '%T', $column);
|
||||
}
|
||||
|
||||
$tokens = $index->tokenizeString($value);
|
||||
foreach ($tokens as $token) {
|
||||
$where[] = qsprintf(
|
||||
$conn,
|
||||
'%Q LIKE %~',
|
||||
$column,
|
||||
$token);
|
||||
}
|
||||
}
|
||||
|
||||
return $where;
|
||||
}
|
||||
|
||||
|
||||
protected function shouldGroupNgramResultRows() {
|
||||
return (bool)$this->ngrams;
|
||||
}
|
||||
|
||||
|
||||
/* -( Edge Logic )--------------------------------------------------------- */
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue