From 44bbdec9acfa35b2dde58200d7af211a9d4b097d Mon Sep 17 00:00:00 2001 From: vrana Date: Fri, 20 Apr 2012 17:09:30 -0700 Subject: [PATCH] Improve elasticsearch Summary: I thought that this will be fun but the elasticsearch API is horrible and the documentation is poor. Test Plan: Search for: - string - author - author, owner - string, author - open - string, open, author - string, exclude - several authors, several owners - nothing - probably all other combinations Normally, such an exhaustive test plan wouldn't be required but each combination requires a completely different query. Reviewers: epriestley, jungejason Reviewed By: epriestley CC: aran, Koolvin, btrahan Differential Revision: https://secure.phabricator.com/D2298 --- .../search/PhabricatorSearchController.php | 9 +- .../search/controller/search/__init__.php | 2 +- .../PhabricatorSearchEngineElastic.php | 155 +++++++++++------- .../search/engine/elastic/__init__.php | 4 + .../mysql/PhabricatorSearchEngineMySQL.php | 7 +- .../PhabricatorSearchAbstractDocument.php | 13 +- .../index/abstractdocument/__init__.php | 2 + .../user/PhabricatorSearchUserIndexer.php | 2 +- 8 files changed, 126 insertions(+), 68 deletions(-) diff --git a/src/applications/search/controller/search/PhabricatorSearchController.php b/src/applications/search/controller/search/PhabricatorSearchController.php index e9afaa83cb..93dc03e84b 100644 --- a/src/applications/search/controller/search/PhabricatorSearchController.php +++ b/src/applications/search/controller/search/PhabricatorSearchController.php @@ -113,16 +113,9 @@ final class PhabricatorSearchController } } - $more = PhabricatorEnv::getEnvConfig('search.more-document-types', array()); - $options = array( '' => 'All Documents', - PhabricatorPHIDConstants::PHID_TYPE_DREV => 'Differential Revisions', - PhabricatorPHIDConstants::PHID_TYPE_CMIT => 'Repository Commits', - PhabricatorPHIDConstants::PHID_TYPE_TASK => 'Maniphest Tasks', - PhabricatorPHIDConstants::PHID_TYPE_WIKI => 'Phriction Documents', - PhabricatorPHIDConstants::PHID_TYPE_USER => 'Phabricator Users', - ) + $more; + ) + PhabricatorSearchAbstractDocument::getSupportedTypes(); $status_options = array( 0 => 'Open and Closed Documents', diff --git a/src/applications/search/controller/search/__init__.php b/src/applications/search/controller/search/__init__.php index 5e3ac55056..1a46a0412d 100644 --- a/src/applications/search/controller/search/__init__.php +++ b/src/applications/search/controller/search/__init__.php @@ -18,11 +18,11 @@ phutil_require_module('phabricator', 'applications/repository/storage/repository phutil_require_module('phabricator', 'applications/search/constants/scope'); phutil_require_module('phabricator', 'applications/search/controller/base'); phutil_require_module('phabricator', 'applications/search/engine/jumpnav'); +phutil_require_module('phabricator', 'applications/search/index/abstractdocument'); phutil_require_module('phabricator', 'applications/search/selector/base'); phutil_require_module('phabricator', 'applications/search/storage/query'); phutil_require_module('phabricator', 'applications/search/view/searchresult'); phutil_require_module('phabricator', 'infrastructure/celerity/api'); -phutil_require_module('phabricator', 'infrastructure/env'); phutil_require_module('phabricator', 'view/control/pager'); phutil_require_module('phabricator', 'view/form/base'); phutil_require_module('phabricator', 'view/form/control/select'); diff --git a/src/applications/search/engine/elastic/PhabricatorSearchEngineElastic.php b/src/applications/search/engine/elastic/PhabricatorSearchEngineElastic.php index f31ca9f4ec..1bb2488a6b 100644 --- a/src/applications/search/engine/elastic/PhabricatorSearchEngineElastic.php +++ b/src/applications/search/engine/elastic/PhabricatorSearchEngineElastic.php @@ -1,7 +1,7 @@ getDocumentType(); $phid = $doc->getPHID(); + $handle = PhabricatorObjectHandleData::loadOneHandle($phid); $spec = array( - 'phid' => $phid, - 'type' => $type, 'title' => $doc->getDocumentTitle(), - 'dateCreated' => date('c', $doc->getDocumentCreated()), - 'dateModified' => date('c', $doc->getDocumentModified()), + 'url' => PhabricatorEnv::getProductionURI($handle->getURI()), + 'dateCreated' => $doc->getDocumentCreated(), + '_timestamp' => $doc->getDocumentModified(), 'field' => array(), 'relationship' => array(), ); foreach ($doc->getFieldData() as $field) { - list($ftype, $corpus, $aux_phid) = $field; - $spec['field'][$ftype][] = array( - 'corpus' => $corpus, - 'aux' => $aux_phid, - ); + $spec['field'][] = array_combine(array('type', 'corpus', 'aux'), $field); } foreach ($doc->getRelationshipData() as $relationship) { @@ -47,7 +43,7 @@ final class PhabricatorSearchEngineElastic extends PhabricatorSearchEngine { $spec['relationship'][$rtype][] = array( 'phid' => $to_phid, 'phidType' => $to_type, - 'when' => date('c', $time), + 'when' => $time, ); } @@ -58,39 +54,25 @@ final class PhabricatorSearchEngineElastic extends PhabricatorSearchEngine { } public function reconstructDocument($phid) { + $type = phid_get_type($phid); - $response = $this->executeRequest( - '/phabricator/_search', - array( - 'query' => array( - 'ids' => array( - 'values' => array( - $phid, - ), - ), - ), - ), - $is_write = false); + $response = $this->executeRequest("/phabricator/{$type}/{$phid}", array()); - $hit = $response['hits']['hits'][0]['_source']; - if (!$hit) { + if (empty($response['exists'])) { return null; } - $doc = new PhabricatorSearchAbstractDocument(); - $doc->setPHID($hit['phid']); - $doc->setDocumentType($hit['type']); - $doc->setDocumentTitle($hit['title']); - $doc->setDocumentCreated(strtotime($hit['dateCreated'])); - $doc->setDocumentModified(strtotime($hit['dateModified'])); + $hit = $response['_source']; - foreach ($hit['field'] as $ftype => $fdefs) { - foreach ($fdefs as $fdef) { - $doc->addField( - $ftype, - $fdef['corpus'], - $fdef['aux']); - } + $doc = new PhabricatorSearchAbstractDocument(); + $doc->setPHID($phid); + $doc->setDocumentType($response['_type']); + $doc->setDocumentTitle($hit['title']); + $doc->setDocumentCreated($hit['dateCreated']); + $doc->setDocumentModified($hit['_timestamp']); + + foreach ($hit['field'] as $fdef) { + $doc->addField($fdef['type'], $fdef['corpus'], $fdef['aux']); } foreach ($hit['relationship'] as $rtype => $rships) { @@ -99,7 +81,7 @@ final class PhabricatorSearchEngineElastic extends PhabricatorSearchEngine { $rtype, $rship['phid'], $rship['phidType'], - strtotime($rship['when'])); + $rship['when']); } } @@ -108,35 +90,96 @@ final class PhabricatorSearchEngineElastic extends PhabricatorSearchEngine { public function executeSearch(PhabricatorSearchQuery $query) { - $spec = array( - 'text' => array( - '_all' => $query->getQuery(), - ), - ); + $spec = array(); + $filter = array(); + + if ($query->getQuery()) { + $spec[] = array( + 'field' => array( + 'field.corpus' => $query->getQuery(), + ), + ); + } + + $exclude = $query->getParameter('exclude'); + if ($exclude) { + $filter[] = array( + 'not' => array( + 'ids' => array( + 'values' => array($exclude), + ), + ), + ); + } $type = $query->getParameter('type'); if ($type) { $uri = "/phabricator/{$type}/_search"; } else { - $uri = "/phabricator/_search"; + // Don't use '/phabricator/_search' for the case that there is something + // else in the index (for example if 'phabricator' is only an alias to + // some bigger index). + $types = PhabricatorSearchAbstractDocument::getSupportedTypes(); + $uri = '/phabricator/' . implode(',', array_keys($types)) . '/_search'; } - $response = $this->executeRequest( - $uri, - array( - 'query' => $spec, - ), - $is_write = false); - - $phids = array(); - foreach ($response['hits']['hits'] as $hit) { - $phids[] = $hit['_id']; + $rel_mapping = array( + 'author' => PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR, + 'open' => PhabricatorSearchRelationship::RELATIONSHIP_OPEN, + 'owner' => PhabricatorSearchRelationship::RELATIONSHIP_OWNER, + 'project' => PhabricatorSearchRelationship::RELATIONSHIP_PROJECT, + 'repository' => PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY, + ); + foreach ($rel_mapping as $name => $field) { + $param = $query->getParameter($name); + if (is_array($param)) { + $should = array(); + foreach ($param as $val) { + $should[] = array( + 'text' => array( + "relationship.{$field}.phid" => array( + 'query' => $val, + 'type' => 'phrase', + ), + ), + ); + } + $spec[] = array('bool' => array('should' => $should)); + } else if ($param) { + $filter[] = array( + 'exists' => array( + 'field' => "relationship.{$field}.phid", + ), + ); + } } + if ($spec) { + $spec = array('query' => array('bool' => array('must' => $spec))); + } + + if ($filter) { + $filter = array('filter' => array('and' => $filter)); + if ($spec) { + $spec = array( + 'query' => array( + 'filtered' => $spec + $filter, + ), + ); + } else { + $spec = $filter; + } + } + + $spec['from'] = (int)$query->getParameter('offset', 0); + $spec['size'] = (int)$query->getParameter('limit', 0); + $response = $this->executeRequest($uri, $spec); + + $phids = ipull($response['hits']['hits'], '_id'); return $phids; } - private function executeRequest($path, array $data, $is_write) { + private function executeRequest($path, array $data, $is_write = false) { $uri = PhabricatorEnv::getEnvConfig('search.elastic.host'); $uri = new PhutilURI($uri); $data = json_encode($data); diff --git a/src/applications/search/engine/elastic/__init__.php b/src/applications/search/engine/elastic/__init__.php index 8bf12fdd73..1acaecabd5 100644 --- a/src/applications/search/engine/elastic/__init__.php +++ b/src/applications/search/engine/elastic/__init__.php @@ -6,6 +6,9 @@ +phutil_require_module('phabricator', 'applications/phid/handle/data'); +phutil_require_module('phabricator', 'applications/phid/utils'); +phutil_require_module('phabricator', 'applications/search/constants/relationship'); phutil_require_module('phabricator', 'applications/search/engine/base'); phutil_require_module('phabricator', 'applications/search/index/abstractdocument'); phutil_require_module('phabricator', 'infrastructure/env'); @@ -13,6 +16,7 @@ phutil_require_module('phabricator', 'infrastructure/env'); phutil_require_module('phutil', 'future/http/http'); phutil_require_module('phutil', 'future/http/https'); phutil_require_module('phutil', 'parser/uri'); +phutil_require_module('phutil', 'utils'); phutil_require_source('PhabricatorSearchEngineElastic.php'); diff --git a/src/applications/search/engine/mysql/PhabricatorSearchEngineMySQL.php b/src/applications/search/engine/mysql/PhabricatorSearchEngineMySQL.php index 00d3a8c0d3..7b3d1c9e6f 100644 --- a/src/applications/search/engine/mysql/PhabricatorSearchEngineMySQL.php +++ b/src/applications/search/engine/mysql/PhabricatorSearchEngineMySQL.php @@ -305,7 +305,12 @@ final class PhabricatorSearchEngineMySQL extends PhabricatorSearchEngine { return ipull($hits, 'phid'); } - protected function joinRelationship($conn, $query, $field, $type) { + protected function joinRelationship( + AphrontDatabaseConnection $conn, + PhabricatorSearchQuery $query, + $field, + $type) { + $phids = $query->getParameter($field, array()); if (!$phids) { return null; diff --git a/src/applications/search/index/abstractdocument/PhabricatorSearchAbstractDocument.php b/src/applications/search/index/abstractdocument/PhabricatorSearchAbstractDocument.php index 97cad5007f..9bea89dda6 100644 --- a/src/applications/search/index/abstractdocument/PhabricatorSearchAbstractDocument.php +++ b/src/applications/search/index/abstractdocument/PhabricatorSearchAbstractDocument.php @@ -1,7 +1,7 @@ 'Differential Revisions', + PhabricatorPHIDConstants::PHID_TYPE_CMIT => 'Repository Commits', + PhabricatorPHIDConstants::PHID_TYPE_TASK => 'Maniphest Tasks', + PhabricatorPHIDConstants::PHID_TYPE_WIKI => 'Phriction Documents', + PhabricatorPHIDConstants::PHID_TYPE_USER => 'Phabricator Users', + ) + $more; + } + public function setPHID($phid) { $this->phid = $phid; return $this; diff --git a/src/applications/search/index/abstractdocument/__init__.php b/src/applications/search/index/abstractdocument/__init__.php index 355519d478..f1af4dec62 100644 --- a/src/applications/search/index/abstractdocument/__init__.php +++ b/src/applications/search/index/abstractdocument/__init__.php @@ -6,7 +6,9 @@ +phutil_require_module('phabricator', 'applications/phid/constants'); phutil_require_module('phabricator', 'applications/search/constants/field'); +phutil_require_module('phabricator', 'infrastructure/env'); phutil_require_source('PhabricatorSearchAbstractDocument.php'); diff --git a/src/applications/search/index/indexer/user/PhabricatorSearchUserIndexer.php b/src/applications/search/index/indexer/user/PhabricatorSearchUserIndexer.php index e321eff38e..7cacf31492 100644 --- a/src/applications/search/index/indexer/user/PhabricatorSearchUserIndexer.php +++ b/src/applications/search/index/indexer/user/PhabricatorSearchUserIndexer.php @@ -26,7 +26,7 @@ final class PhabricatorSearchUserIndexer $doc = new PhabricatorSearchAbstractDocument(); $doc->setPHID($user->getPHID()); $doc->setDocumentType(PhabricatorPHIDConstants::PHID_TYPE_USER); - $doc->setDocumentTitle($user->getUserName().'('.$user->getRealName().')'); + $doc->setDocumentTitle($user->getUserName().' ('.$user->getRealName().')'); $doc->setDocumentCreated($user->getDateCreated()); $doc->setDocumentModified($user->getDateModified());