mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-26 08:42:41 +01:00
Parse raw Ferret queries into tokens before processing them
Summary: Ref T12819. Depends on D18492. Instead of passing a raw query into the Query layer, parse it first. This allows the query layer to figure out which parts should be substring vs term match, and would allow the SearchEngine layer to do `author:...` eventually by picking it out before sending it to the Ferret engine. Test Plan: Ran some Ferret queries. They work like before, except that nonsense like `-+"quack"` raises an exception now. Reviewers: chad Reviewed By: chad Maniphest Tasks: T12819 Differential Revision: https://secure.phabricator.com/D18499
This commit is contained in:
parent
0e2e525bb4
commit
e5a495f435
2 changed files with 32 additions and 12 deletions
|
@ -232,9 +232,24 @@ final class ManiphestTaskSearchEngine
|
|||
}
|
||||
|
||||
if (strlen($map['ferret'])) {
|
||||
$raw_query = $map['ferret'];
|
||||
|
||||
$compiler = id(new PhutilSearchQueryCompiler())
|
||||
->setEnableFunctions(true);
|
||||
|
||||
$raw_tokens = $compiler->newTokens($raw_query);
|
||||
|
||||
$fulltext_tokens = array();
|
||||
foreach ($raw_tokens as $raw_token) {
|
||||
$fulltext_token = id(new PhabricatorFulltextToken())
|
||||
->setToken($raw_token);
|
||||
|
||||
$fulltext_tokens[] = $fulltext_token;
|
||||
}
|
||||
|
||||
$query->withFerretConstraint(
|
||||
id(new ManiphestTask())->newFerretEngine(),
|
||||
$map['ferret']);
|
||||
$fulltext_tokens);
|
||||
}
|
||||
|
||||
if ($map['parentIDs']) {
|
||||
|
|
|
@ -28,7 +28,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
private $spaceIsArchived;
|
||||
private $ngrams = array();
|
||||
private $ferretEngine;
|
||||
private $ferretConstraints;
|
||||
private $ferretTokens;
|
||||
|
||||
protected function getPageCursors(array $page) {
|
||||
return array(
|
||||
|
@ -1386,7 +1386,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
|
||||
public function withFerretConstraint(
|
||||
PhabricatorFerretEngine $engine,
|
||||
$raw_query) {
|
||||
array $fulltext_tokens) {
|
||||
|
||||
if ($this->ferretEngine) {
|
||||
throw new Exception(
|
||||
|
@ -1394,12 +1394,12 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
'Query may not have multiple fulltext constraints.'));
|
||||
}
|
||||
|
||||
if (!strlen($raw_query)) {
|
||||
if (!$fulltext_tokens) {
|
||||
return $this;
|
||||
}
|
||||
|
||||
$this->ferretEngine = $engine;
|
||||
$this->ferretConstraints = preg_split('/\s+/', $raw_query);
|
||||
$this->ferretTokens = $fulltext_tokens;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
@ -1416,9 +1416,11 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
$ngram_table_name = $ngram_table->getTableName();
|
||||
|
||||
$flat = array();
|
||||
foreach ($this->ferretConstraints as $term) {
|
||||
$value = $term;
|
||||
$length = count(phutil_utf8v($term));
|
||||
foreach ($this->ferretTokens as $fulltext_token) {
|
||||
$raw_token = $fulltext_token->getToken();
|
||||
$value = $raw_token->getValue();
|
||||
|
||||
$length = count(phutil_utf8v($value));
|
||||
|
||||
if ($length >= 3) {
|
||||
$ngrams = $ngram_engine->getNgramsFromString($value, 'query');
|
||||
|
@ -1509,19 +1511,22 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
|||
}
|
||||
|
||||
$where = array();
|
||||
foreach ($this->ferretConstraints as $constraint) {
|
||||
foreach ($this->ferretTokens as $fulltext_token) {
|
||||
$raw_token = $fulltext_token->getToken();
|
||||
$value = $raw_token->getValue();
|
||||
|
||||
$where[] = qsprintf(
|
||||
$conn,
|
||||
'(ftfield.rawCorpus LIKE %~ OR ftfield.normalCorpus LIKE %~)',
|
||||
$constraint,
|
||||
$constraint);
|
||||
$value,
|
||||
$value);
|
||||
}
|
||||
|
||||
return $where;
|
||||
}
|
||||
|
||||
protected function shouldGroupFerretResultRows() {
|
||||
return (bool)$this->ferretConstraints;
|
||||
return (bool)$this->ferretTokens;
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue