From bbe206344302b4417937f32ba2df027ecd7103c6 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 20 Apr 2012 15:33:09 -0700 Subject: [PATCH] [NO CLUE WHAT I'M DOING] Add an Elasticsearch engine Summary: I have no idea what I'm doing, but here's part of an elasticsearch engine. These things work: - Indexing stuff (??) - Searching for text/type? - Reconstructing things?? All the complicated stuff doesn't work. I'm having a hard time figuring out the best way to model things because elasticsearch's documentation is not exactly the most complete or illuminating. @amckinley, does this look sane-ish so far? Particularly, the /phabricator/// URI scheme and how I've set up the relationships and fields in the documents? How should I model the relationship and field queries? I want, like, an "equal" query but it seems like I've got "text" or "term" to work with and neither are exact match? And "term" doesn't consider PHIDs to be terms since they have hyphens in them? I'll keep kind of slogging my way forward here but if you have valuable wisdom to share it would probably get me to a better end state much faster. The whole query construction phase is pretty much black magic to me. Test Plan: nyancat Reviewers: amckinley, vrana Reviewed By: vrana CC: jungejason, tuomaspelkonen, aran, 20after4, vrana Differential Revision: https://secure.phabricator.com/D790 --- conf/default.conf.php | 4 + src/__phutil_library_map__.php | 2 + .../search/PhabricatorSearchController.php | 2 - .../PhabricatorSearchEngineElastic.php | 173 ++++++++++++++++++ .../search/engine/elastic/__init__.php | 18 ++ .../mysql/PhabricatorSearchEngineMySQL.php | 5 +- ...PhabricatorDefaultSearchEngineSelector.php | 3 + .../search/selector/default/__init__.php | 2 + .../PhabricatorSearchResultView.php | 9 + 9 files changed, 212 insertions(+), 6 deletions(-) create mode 100644 src/applications/search/engine/elastic/PhabricatorSearchEngineElastic.php create mode 100644 src/applications/search/engine/elastic/__init__.php diff --git a/conf/default.conf.php b/conf/default.conf.php index 742d6a2513..e36a6991f5 100644 --- a/conf/default.conf.php +++ b/conf/default.conf.php @@ -687,6 +687,10 @@ return array( // -- Search ---------------------------------------------------------------- // + // Phabricator supports Elastic Search; to use it, specify a host like + // 'http://elastic.example.com:9200/' here. + 'search.elastic.host' => null, + // Phabricator uses a search engine selector to choose which search engine // to use when indexing and reconstructing documents, and when executing // queries. You can override the engine selector to provide a new selector diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index e6314f7dc3..b0b7d09ba0 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -862,6 +862,7 @@ phutil_register_library_map(array( 'PhabricatorSearchDocumentIndexer' => 'applications/search/index/indexer/base', 'PhabricatorSearchDocumentRelationship' => 'applications/search/storage/document/relationship', 'PhabricatorSearchEngine' => 'applications/search/engine/base', + 'PhabricatorSearchEngineElastic' => 'applications/search/engine/elastic', 'PhabricatorSearchEngineMySQL' => 'applications/search/engine/mysql', 'PhabricatorSearchEngineSelector' => 'applications/search/selector/base', 'PhabricatorSearchField' => 'applications/search/constants/field', @@ -1714,6 +1715,7 @@ phutil_register_library_map(array( 'PhabricatorSearchDocument' => 'PhabricatorSearchDAO', 'PhabricatorSearchDocumentField' => 'PhabricatorSearchDAO', 'PhabricatorSearchDocumentRelationship' => 'PhabricatorSearchDAO', + 'PhabricatorSearchEngineElastic' => 'PhabricatorSearchEngine', 'PhabricatorSearchEngineMySQL' => 'PhabricatorSearchEngine', 'PhabricatorSearchIndexController' => 'PhabricatorSearchBaseController', 'PhabricatorSearchManiphestIndexer' => 'PhabricatorSearchDocumentIndexer', diff --git a/src/applications/search/controller/search/PhabricatorSearchController.php b/src/applications/search/controller/search/PhabricatorSearchController.php index 8138f20b50..e9afaa83cb 100644 --- a/src/applications/search/controller/search/PhabricatorSearchController.php +++ b/src/applications/search/controller/search/PhabricatorSearchController.php @@ -226,8 +226,6 @@ final class PhabricatorSearchController $engine = PhabricatorSearchEngineSelector::newSelector()->newEngine(); $results = $engine->executeSearch($query); - $results = ipull($results, 'phid'); - $results = $pager->sliceResults($results); if (!$request->getInt('page')) { diff --git a/src/applications/search/engine/elastic/PhabricatorSearchEngineElastic.php b/src/applications/search/engine/elastic/PhabricatorSearchEngineElastic.php new file mode 100644 index 0000000000..f31ca9f4ec --- /dev/null +++ b/src/applications/search/engine/elastic/PhabricatorSearchEngineElastic.php @@ -0,0 +1,173 @@ +getDocumentType(); + $phid = $doc->getPHID(); + + $spec = array( + 'phid' => $phid, + 'type' => $type, + 'title' => $doc->getDocumentTitle(), + 'dateCreated' => date('c', $doc->getDocumentCreated()), + 'dateModified' => date('c', $doc->getDocumentModified()), + 'field' => array(), + 'relationship' => array(), + ); + + foreach ($doc->getFieldData() as $field) { + list($ftype, $corpus, $aux_phid) = $field; + $spec['field'][$ftype][] = array( + 'corpus' => $corpus, + 'aux' => $aux_phid, + ); + } + + foreach ($doc->getRelationshipData() as $relationship) { + list($rtype, $to_phid, $to_type, $time) = $relationship; + $spec['relationship'][$rtype][] = array( + 'phid' => $to_phid, + 'phidType' => $to_type, + 'when' => date('c', $time), + ); + } + + $this->executeRequest( + "/phabricator/{$type}/{$phid}/", + $spec, + $is_write = true); + } + + public function reconstructDocument($phid) { + + $response = $this->executeRequest( + '/phabricator/_search', + array( + 'query' => array( + 'ids' => array( + 'values' => array( + $phid, + ), + ), + ), + ), + $is_write = false); + + $hit = $response['hits']['hits'][0]['_source']; + if (!$hit) { + return null; + } + + $doc = new PhabricatorSearchAbstractDocument(); + $doc->setPHID($hit['phid']); + $doc->setDocumentType($hit['type']); + $doc->setDocumentTitle($hit['title']); + $doc->setDocumentCreated(strtotime($hit['dateCreated'])); + $doc->setDocumentModified(strtotime($hit['dateModified'])); + + foreach ($hit['field'] as $ftype => $fdefs) { + foreach ($fdefs as $fdef) { + $doc->addField( + $ftype, + $fdef['corpus'], + $fdef['aux']); + } + } + + foreach ($hit['relationship'] as $rtype => $rships) { + foreach ($rships as $rship) { + $doc->addRelationship( + $rtype, + $rship['phid'], + $rship['phidType'], + strtotime($rship['when'])); + } + } + + return $doc; + } + + public function executeSearch(PhabricatorSearchQuery $query) { + + $spec = array( + 'text' => array( + '_all' => $query->getQuery(), + ), + ); + + $type = $query->getParameter('type'); + if ($type) { + $uri = "/phabricator/{$type}/_search"; + } else { + $uri = "/phabricator/_search"; + } + + $response = $this->executeRequest( + $uri, + array( + 'query' => $spec, + ), + $is_write = false); + + $phids = array(); + foreach ($response['hits']['hits'] as $hit) { + $phids[] = $hit['_id']; + } + + return $phids; + } + + private function executeRequest($path, array $data, $is_write) { + $uri = PhabricatorEnv::getEnvConfig('search.elastic.host'); + $uri = new PhutilURI($uri); + $data = json_encode($data); + + $uri->setPath($path); + + $protocol = $uri->getProtocol(); + if ($protocol == 'https') { + $future = new HTTPSFuture($uri, $data); + } else { + $future = new HTTPFuture($uri, $data); + } + + if ($is_write) { + $future->setMethod('PUT'); + } else { + $future->setMethod('GET'); + } + + list($body) = $future->resolvex(); + + if ($is_write) { + return null; + } + + $body = json_decode($body, true); + if (!is_array($body)) { + throw new Exception("elasticsearch server returned invalid JSON!"); + } + + return $body; + } + +} diff --git a/src/applications/search/engine/elastic/__init__.php b/src/applications/search/engine/elastic/__init__.php new file mode 100644 index 0000000000..8bf12fdd73 --- /dev/null +++ b/src/applications/search/engine/elastic/__init__.php @@ -0,0 +1,18 @@ + '/search/index/'.$handle->getPHID().'/', + 'style' => 'float: right', + ), + 'Examine Index'); + return '
'. $img. '
'. + $index_link. phutil_render_tag( 'a', array(