From f97157e7edb1ca395da17e7dc0662fd0ac573675 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 28 Aug 2017 13:04:56 -0700 Subject: [PATCH] Build a prototype fulltext engine ("Ferret") using only basic MySQL primitives Summary: Ref T12819. I gave this stuff a sweet code name because all the terms related to "fulltext" and "search" already mean 5 different things. It, uh, ferrets out documents for you? I'm building this to work a lot like the existing ngram index, which seems to work pretty well. If this sticks, it will auto-resolve the join issue (in T12443) by letting us do the entire thing locally in a JOIN and thus dodge a lot of mess. This index gets built alongside other indexes, but only shows up in the UI if you have prototypes enabled. If you do, it appears under the existing fulltext field in Maniphest. No existing functionality is affected or disrupted. NOTE: The query engine half of this is still EXTREMELY primitive, and this probably performs worse than the existing field for now. If this doesn't show obvious signs of being awful on `secure` I'll improve that in followup changes. Test Plan: Indexed my tasks, ran some simple queries, got the results I wanted, even for queries "ko", "k", "v0.1". {F5147746} Reviewers: chad Reviewed By: chad Maniphest Tasks: T12819, T12443 Differential Revision: https://secure.phabricator.com/D18484 --- .../20170828.ferret.01.taskdoc.sql | 9 ++ .../20170828.ferret.02.taskfield.sql | 7 + .../20170828.ferret.03.taskngrams.sql | 5 + src/__phutil_library_map__.php | 22 +++ .../query/ManiphestTaskSearchEngine.php | 13 ++ .../search/ManiphestTaskFerretEngine.php | 18 +++ .../maniphest/storage/ManiphestTask.php | 9 ++ .../storage/ManiphestTaskFerretDocument.php | 14 ++ .../storage/ManiphestTaskFerretField.php | 14 ++ .../storage/ManiphestTaskFerretNgrams.php | 14 ++ ...abricatorFerretFulltextEngineExtension.php | 126 +++++++++++++++ .../ferret/PhabricatorFerretDocument.php | 40 +++++ .../search/ferret/PhabricatorFerretEngine.php | 9 ++ .../search/ferret/PhabricatorFerretField.php | 36 +++++ .../ferret/PhabricatorFerretInterface.php | 7 + .../search/ferret/PhabricatorFerretNgrams.php | 35 ++++ .../search/ngrams/PhabricatorNgramEngine.php | 41 +++++ ...PhabricatorCursorPagedPolicyAwareQuery.php | 152 ++++++++++++++++++ 18 files changed, 571 insertions(+) create mode 100644 resources/sql/autopatches/20170828.ferret.01.taskdoc.sql create mode 100644 resources/sql/autopatches/20170828.ferret.02.taskfield.sql create mode 100644 resources/sql/autopatches/20170828.ferret.03.taskngrams.sql create mode 100644 src/applications/maniphest/search/ManiphestTaskFerretEngine.php create mode 100644 src/applications/maniphest/storage/ManiphestTaskFerretDocument.php create mode 100644 src/applications/maniphest/storage/ManiphestTaskFerretField.php create mode 100644 src/applications/maniphest/storage/ManiphestTaskFerretNgrams.php create mode 100644 src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php create mode 100644 src/applications/search/ferret/PhabricatorFerretDocument.php create mode 100644 src/applications/search/ferret/PhabricatorFerretEngine.php create mode 100644 src/applications/search/ferret/PhabricatorFerretField.php create mode 100644 src/applications/search/ferret/PhabricatorFerretInterface.php create mode 100644 src/applications/search/ferret/PhabricatorFerretNgrams.php create mode 100644 src/applications/search/ngrams/PhabricatorNgramEngine.php diff --git a/resources/sql/autopatches/20170828.ferret.01.taskdoc.sql b/resources/sql/autopatches/20170828.ferret.01.taskdoc.sql new file mode 100644 index 0000000000..8cb6835602 --- /dev/null +++ b/resources/sql/autopatches/20170828.ferret.01.taskdoc.sql @@ -0,0 +1,9 @@ +CREATE TABLE {$NAMESPACE}_maniphest.maniphest_task_fdocument ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + objectPHID VARBINARY(64) NOT NULL, + isClosed BOOL NOT NULL, + authorPHID VARBINARY(64), + ownerPHID VARBINARY(64), + epochCreated INT UNSIGNED NOT NULL, + epochModified INT UNSIGNED NOT NULL +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20170828.ferret.02.taskfield.sql b/resources/sql/autopatches/20170828.ferret.02.taskfield.sql new file mode 100644 index 0000000000..5528feec8f --- /dev/null +++ b/resources/sql/autopatches/20170828.ferret.02.taskfield.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_maniphest.maniphest_task_ffield ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + documentID INT UNSIGNED NOT NULL, + fieldKey VARCHAR(4) NOT NULL COLLATE {$COLLATE_TEXT}, + rawCorpus LONGTEXT NOT NULL COLLATE {$COLLATE_SORT}, + normalCorpus LONGTEXT NOT NULL COLLATE {$COLLATE_SORT} +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20170828.ferret.03.taskngrams.sql b/resources/sql/autopatches/20170828.ferret.03.taskngrams.sql new file mode 100644 index 0000000000..a7b5180642 --- /dev/null +++ b/resources/sql/autopatches/20170828.ferret.03.taskngrams.sql @@ -0,0 +1,5 @@ +CREATE TABLE {$NAMESPACE}_maniphest.maniphest_task_fngrams ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + documentID INT UNSIGNED NOT NULL, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT} +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index 4575908ef8..3edb31640b 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -1533,6 +1533,10 @@ phutil_register_library_map(array( 'ManiphestTaskEditBulkJobType' => 'applications/maniphest/bulk/ManiphestTaskEditBulkJobType.php', 'ManiphestTaskEditController' => 'applications/maniphest/controller/ManiphestTaskEditController.php', 'ManiphestTaskEditEngineLock' => 'applications/maniphest/editor/ManiphestTaskEditEngineLock.php', + 'ManiphestTaskFerretDocument' => 'applications/maniphest/storage/ManiphestTaskFerretDocument.php', + 'ManiphestTaskFerretEngine' => 'applications/maniphest/search/ManiphestTaskFerretEngine.php', + 'ManiphestTaskFerretField' => 'applications/maniphest/storage/ManiphestTaskFerretField.php', + 'ManiphestTaskFerretNgrams' => 'applications/maniphest/storage/ManiphestTaskFerretNgrams.php', 'ManiphestTaskFulltextEngine' => 'applications/maniphest/search/ManiphestTaskFulltextEngine.php', 'ManiphestTaskGraph' => 'infrastructure/graph/ManiphestTaskGraph.php', 'ManiphestTaskHasCommitEdgeType' => 'applications/maniphest/edge/ManiphestTaskHasCommitEdgeType.php', @@ -2828,6 +2832,12 @@ phutil_register_library_map(array( 'PhabricatorFeedStoryNotification' => 'applications/notification/storage/PhabricatorFeedStoryNotification.php', 'PhabricatorFeedStoryPublisher' => 'applications/feed/PhabricatorFeedStoryPublisher.php', 'PhabricatorFeedStoryReference' => 'applications/feed/storage/PhabricatorFeedStoryReference.php', + 'PhabricatorFerretDocument' => 'applications/search/ferret/PhabricatorFerretDocument.php', + 'PhabricatorFerretEngine' => 'applications/search/ferret/PhabricatorFerretEngine.php', + 'PhabricatorFerretField' => 'applications/search/ferret/PhabricatorFerretField.php', + 'PhabricatorFerretFulltextEngineExtension' => 'applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php', + 'PhabricatorFerretInterface' => 'applications/search/ferret/PhabricatorFerretInterface.php', + 'PhabricatorFerretNgrams' => 'applications/search/ferret/PhabricatorFerretNgrams.php', 'PhabricatorFile' => 'applications/files/storage/PhabricatorFile.php', 'PhabricatorFileAES256StorageFormat' => 'applications/files/format/PhabricatorFileAES256StorageFormat.php', 'PhabricatorFileBundleLoader' => 'applications/files/query/PhabricatorFileBundleLoader.php', @@ -3195,6 +3205,7 @@ phutil_register_library_map(array( 'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php', 'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php', 'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php', + 'PhabricatorNgramEngine' => 'applications/search/ngrams/PhabricatorNgramEngine.php', 'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php', 'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php', 'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php', @@ -6659,6 +6670,7 @@ phutil_register_library_map(array( 'PhabricatorSpacesInterface', 'PhabricatorConduitResultInterface', 'PhabricatorFulltextInterface', + 'PhabricatorFerretInterface', 'DoorkeeperBridgedObjectInterface', 'PhabricatorEditEngineSubtypeInterface', 'PhabricatorEditEngineLockableInterface', @@ -6682,6 +6694,10 @@ phutil_register_library_map(array( 'ManiphestTaskEditBulkJobType' => 'PhabricatorWorkerBulkJobType', 'ManiphestTaskEditController' => 'ManiphestController', 'ManiphestTaskEditEngineLock' => 'PhabricatorEditEngineLock', + 'ManiphestTaskFerretDocument' => 'PhabricatorFerretDocument', + 'ManiphestTaskFerretEngine' => 'PhabricatorFerretEngine', + 'ManiphestTaskFerretField' => 'PhabricatorFerretField', + 'ManiphestTaskFerretNgrams' => 'PhabricatorFerretNgrams', 'ManiphestTaskFulltextEngine' => 'PhabricatorFulltextEngine', 'ManiphestTaskGraph' => 'PhabricatorObjectGraph', 'ManiphestTaskHasCommitEdgeType' => 'PhabricatorEdgeType', @@ -8147,6 +8163,11 @@ phutil_register_library_map(array( 'PhabricatorFeedStoryNotification' => 'PhabricatorFeedDAO', 'PhabricatorFeedStoryPublisher' => 'Phobject', 'PhabricatorFeedStoryReference' => 'PhabricatorFeedDAO', + 'PhabricatorFerretDocument' => 'PhabricatorSearchDAO', + 'PhabricatorFerretEngine' => 'Phobject', + 'PhabricatorFerretField' => 'PhabricatorSearchDAO', + 'PhabricatorFerretFulltextEngineExtension' => 'PhabricatorFulltextEngineExtension', + 'PhabricatorFerretNgrams' => 'PhabricatorSearchDAO', 'PhabricatorFile' => array( 'PhabricatorFileDAO', 'PhabricatorApplicationTransactionInterface', @@ -8565,6 +8586,7 @@ phutil_register_library_map(array( 'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', 'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule', 'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock', + 'PhabricatorNgramEngine' => 'Phobject', 'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension', 'PhabricatorNgramsInterface' => 'PhabricatorIndexableInterface', 'PhabricatorNotificationBuilder' => 'Phobject', diff --git a/src/applications/maniphest/query/ManiphestTaskSearchEngine.php b/src/applications/maniphest/query/ManiphestTaskSearchEngine.php index 150ec81def..8eb4a416e3 100644 --- a/src/applications/maniphest/query/ManiphestTaskSearchEngine.php +++ b/src/applications/maniphest/query/ManiphestTaskSearchEngine.php @@ -49,6 +49,8 @@ final class ManiphestTaskSearchEngine $subtype_map = id(new ManiphestTask())->newEditEngineSubtypeMap(); $hide_subtypes = (count($subtype_map) == 1); + $hide_ferret = !PhabricatorEnv::getEnvConfig('phabricator.show-prototypes'); + return array( id(new PhabricatorOwnersSearchField()) ->setLabel(pht('Assigned To')) @@ -89,6 +91,10 @@ final class ManiphestTaskSearchEngine id(new PhabricatorSearchTextField()) ->setLabel(pht('Contains Words')) ->setKey('fulltext'), + id(new PhabricatorSearchTextField()) + ->setLabel(pht('Matches (Prototype)')) + ->setKey('ferret') + ->setIsHidden($hide_ferret), id(new PhabricatorSearchThreeStateField()) ->setLabel(pht('Open Parents')) ->setKey('hasParents') @@ -145,6 +151,7 @@ final class ManiphestTaskSearchEngine 'priorities', 'subtypes', 'fulltext', + 'ferret', 'hasParents', 'hasSubtasks', 'parentIDs', @@ -224,6 +231,12 @@ final class ManiphestTaskSearchEngine $query->withFullTextSearch($map['fulltext']); } + if (strlen($map['ferret'])) { + $query->withFerretConstraint( + id(new ManiphestTask())->newFerretEngine(), + $map['ferret']); + } + if ($map['parentIDs']) { $query->withParentTaskIDs($map['parentIDs']); } diff --git a/src/applications/maniphest/search/ManiphestTaskFerretEngine.php b/src/applications/maniphest/search/ManiphestTaskFerretEngine.php new file mode 100644 index 0000000000..7232d0251f --- /dev/null +++ b/src/applications/maniphest/search/ManiphestTaskFerretEngine.php @@ -0,0 +1,18 @@ +getPHID(); + $engine = $object->newFerretEngine(); + + $ferret_document = $engine->newDocumentObject() + ->setObjectPHID($phid) + ->setIsClosed(0) + ->setEpochCreated(0) + ->setEpochModified(0); + + $stemmer = new PhutilSearchStemmer(); + + $ferret_fields = array(); + $ngrams_source = array(); + foreach ($document->getFieldData() as $field) { + list($key, $raw_corpus) = $field; + + if (!strlen($raw_corpus)) { + continue; + } + + $normal_corpus = $stemmer->stemCorpus($raw_corpus); + + $ferret_fields[] = $engine->newFieldObject() + ->setFieldKey($key) + ->setRawCorpus($raw_corpus) + ->setNormalCorpus($normal_corpus); + + $ngrams_source[] = $raw_corpus; + } + $ngrams_source = implode(' ', $ngrams_source); + + $ngrams = id(new PhabricatorNgramEngine()) + ->getNgramsFromString($ngrams_source, 'index'); + + $ferret_document->openTransaction(); + $this->deleteOldDocument($engine, $object, $document); + + $ferret_document->save(); + + $document_id = $ferret_document->getID(); + foreach ($ferret_fields as $ferret_field) { + $ferret_field + ->setDocumentID($document_id) + ->save(); + } + + $ferret_ngrams = $engine->newNgramsObject(); + $conn = $ferret_ngrams->establishConnection('w'); + + $sql = array(); + foreach ($ngrams as $ngram) { + $sql[] = qsprintf( + $conn, + '(%d, %s)', + $document_id, + $ngram); + } + + foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { + queryfx( + $conn, + 'INSERT INTO %T (documentID, ngram) VALUES %Q', + $ferret_ngrams->getTableName(), + $chunk); + } + $ferret_document->saveTransaction(); + } + + + private function deleteOldDocument( + PhabricatorFerretEngine $engine, + $object, + PhabricatorSearchAbstractDocument $document) { + + $old_document = $engine->newDocumentObject()->loadOneWhere( + 'objectPHID = %s', + $document->getPHID()); + if (!$old_document) { + return; + } + + $conn = $old_document->establishConnection('w'); + $old_id = $old_document->getID(); + + queryfx( + $conn, + 'DELETE FROM %T WHERE id = %d', + $engine->newDocumentObject()->getTableName(), + $old_id); + + queryfx( + $conn, + 'DELETE FROM %T WHERE documentID = %d', + $engine->newFieldObject()->getTableName(), + $old_id); + + queryfx( + $conn, + 'DELETE FROM %T WHERE documentID = %d', + $engine->newNgramsObject()->getTableName(), + $old_id); + } + +} diff --git a/src/applications/search/ferret/PhabricatorFerretDocument.php b/src/applications/search/ferret/PhabricatorFerretDocument.php new file mode 100644 index 0000000000..fa816c8d17 --- /dev/null +++ b/src/applications/search/ferret/PhabricatorFerretDocument.php @@ -0,0 +1,40 @@ + false, + self::CONFIG_COLUMN_SCHEMA => array( + 'isClosed' => 'bool', + 'authorPHID' => 'phid?', + 'ownerPHID' => 'phid?', + 'epochCreated' => 'epoch', + 'epochModified' => 'epoch', + ), + self::CONFIG_KEY_SCHEMA => array( + 'key_object' => array( + 'columns' => array('objectPHID'), + 'unique' => true, + ), + ), + ) + parent::getConfiguration(); + } + + public function getTableName() { + $application = $this->getApplicationName(); + $key = $this->getIndexKey(); + return "{$application}_{$key}_fdocument"; + } + +} diff --git a/src/applications/search/ferret/PhabricatorFerretEngine.php b/src/applications/search/ferret/PhabricatorFerretEngine.php new file mode 100644 index 0000000000..e816ef3f59 --- /dev/null +++ b/src/applications/search/ferret/PhabricatorFerretEngine.php @@ -0,0 +1,9 @@ + false, + self::CONFIG_COLUMN_SCHEMA => array( + 'documentID' => 'uint32', + 'fieldKey' => 'text4', + 'rawCorpus' => 'sort', + 'normalCorpus' => 'sort', + ), + self::CONFIG_KEY_SCHEMA => array( + 'key_document' => array( + 'columns' => array('documentID', 'fieldKey'), + ), + ), + ) + parent::getConfiguration(); + } + + public function getTableName() { + $application = $this->getApplicationName(); + $key = $this->getIndexKey(); + return "{$application}_{$key}_ffield"; + } + +} diff --git a/src/applications/search/ferret/PhabricatorFerretInterface.php b/src/applications/search/ferret/PhabricatorFerretInterface.php new file mode 100644 index 0000000000..cdb651b6cf --- /dev/null +++ b/src/applications/search/ferret/PhabricatorFerretInterface.php @@ -0,0 +1,7 @@ + false, + self::CONFIG_COLUMN_SCHEMA => array( + 'documentID' => 'uint32', + 'ngram' => 'char3', + ), + self::CONFIG_KEY_SCHEMA => array( + 'key_ngram' => array( + 'columns' => array('ngram', 'documentID'), + ), + 'key_object' => array( + 'columns' => array('documentID'), + ), + ), + ) + parent::getConfiguration(); + } + + public function getTableName() { + $application = $this->getApplicationName(); + $key = $this->getIndexKey(); + return "{$application}_{$key}_fngrams"; + } + +} diff --git a/src/applications/search/ngrams/PhabricatorNgramEngine.php b/src/applications/search/ngrams/PhabricatorNgramEngine.php new file mode 100644 index 0000000000..e168a86127 --- /dev/null +++ b/src/applications/search/ngrams/PhabricatorNgramEngine.php @@ -0,0 +1,41 @@ +tokenizeString($value); + + $ngrams = array(); + foreach ($tokens as $token) { + $token = phutil_utf8_strtolower($token); + + switch ($mode) { + case 'query': + break; + case 'index': + $token = ' '.$token.' '; + break; + case 'prefix': + $token = ' '.$token; + break; + } + + $len = (strlen($token) - 2); + for ($ii = 0; $ii < $len; $ii++) { + $ngram = substr($token, $ii, 3); + $ngrams[$ngram] = $ngram; + } + } + + ksort($ngrams); + + return array_keys($ngrams); + } + +} diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php index 3ef2a72e6f..c0b2bbc100 100644 --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -27,6 +27,8 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery private $spacePHIDs; private $spaceIsArchived; private $ngrams = array(); + private $ferretEngine; + private $ferretConstraints; protected function getPageCursors(array $page) { return array( @@ -270,6 +272,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $joins[] = $this->buildEdgeLogicJoinClause($conn); $joins[] = $this->buildApplicationSearchJoinClause($conn); $joins[] = $this->buildNgramsJoinClause($conn); + $joins[] = $this->buildFerretJoinClause($conn); return $joins; } @@ -292,6 +295,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $where[] = $this->buildEdgeLogicWhereClause($conn); $where[] = $this->buildSpacesWhereClause($conn); $where[] = $this->buildNgramsWhereClause($conn); + $where[] = $this->buildFerretWhereClause($conn); return $where; } @@ -346,6 +350,10 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery return true; } + if ($this->shouldGroupFerretResultRows()) { + return true; + } + return false; } @@ -1373,6 +1381,150 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery } +/* -( Ferret )------------------------------------------------------------- */ + + + public function withFerretConstraint( + PhabricatorFerretEngine $engine, + $raw_query) { + + if ($this->ferretEngine) { + throw new Exception( + pht( + 'Query may not have multiple fulltext constraints.')); + } + + if (!strlen($raw_query)) { + return $this; + } + + $this->ferretEngine = $engine; + $this->ferretConstraints = preg_split('/\s+/', $raw_query); + + return $this; + } + + protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) { + if (!$this->ferretEngine) { + return array(); + } + + $engine = $this->ferretEngine; + $ngram_engine = new PhabricatorNgramEngine(); + + $ngram_table = $engine->newNgramsObject(); + $ngram_table_name = $ngram_table->getTableName(); + + $flat = array(); + foreach ($this->ferretConstraints as $term) { + $value = $term; + $length = count(phutil_utf8v($term)); + + if ($length >= 3) { + $ngrams = $ngram_engine->getNgramsFromString($value, 'query'); + $prefix = false; + } else if ($length == 2) { + $ngrams = $ngram_engine->getNgramsFromString($value, 'prefix'); + $prefix = false; + } else { + $ngrams = array(' '.$value); + $prefix = true; + } + + foreach ($ngrams as $ngram) { + $flat[] = array( + 'table' => $ngram_table_name, + 'ngram' => $ngram, + 'prefix' => $prefix, + ); + } + } + + // MySQL only allows us to join a maximum of 61 tables per query. Each + // ngram is going to cost us a join toward that limit, so if the user + // specified a very long query string, just pick 16 of the ngrams + // at random. + if (count($flat) > 16) { + shuffle($flat); + $flat = array_slice($flat, 0, 16); + } + + $alias = $this->getPrimaryTableAlias(); + if ($alias) { + $phid_column = qsprintf($conn, '%T.%T', $alias, 'phid'); + } else { + $phid_column = qsprintf($conn, '%T', 'phid'); + } + + $document_table = $engine->newDocumentObject(); + $field_table = $engine->newFieldObject(); + + $joins = array(); + $joins[] = qsprintf( + $conn, + 'JOIN %T ftdoc ON ftdoc.objectPHID = %Q', + $document_table->getTableName(), + $phid_column); + + $idx = 1; + foreach ($flat as $spec) { + $table = $spec['table']; + $ngram = $spec['ngram']; + $prefix = $spec['prefix']; + + $alias = 'ft'.$idx++; + + if ($prefix) { + $joins[] = qsprintf( + $conn, + 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram LIKE %>', + $table, + $alias, + $alias, + $alias, + $ngram); + } else { + $joins[] = qsprintf( + $conn, + 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram = %s', + $table, + $alias, + $alias, + $alias, + $ngram); + } + } + + $joins[] = qsprintf( + $conn, + 'JOIN %T ftfield ON ftdoc.id = ftfield.documentID', + $field_table->getTableName()); + + return $joins; + } + + protected function buildFerretWhereClause(AphrontDatabaseConnection $conn) { + if (!$this->ferretEngine) { + return array(); + } + + $where = array(); + foreach ($this->ferretConstraints as $constraint) { + $where[] = qsprintf( + $conn, + '(ftfield.rawCorpus LIKE %~ OR ftfield.normalCorpus LIKE %~)', + $constraint, + $constraint); + } + + return $where; + } + + protected function shouldGroupFerretResultRows() { + return (bool)$this->ferretConstraints; + } + + /* -( Ngrams )------------------------------------------------------------- */