1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-26 08:42:41 +01:00

Parse raw Ferret queries into tokens before processing them

Summary:
Ref T12819. Depends on D18492. Instead of passing a raw query into the Query layer, parse it first.

This allows the query layer to figure out which parts should be substring vs term match, and would allow the SearchEngine layer to do `author:...` eventually by picking it out before sending it to the Ferret engine.

Test Plan: Ran some Ferret queries. They work like before, except that nonsense like `-+"quack"` raises an exception now.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T12819

Differential Revision: https://secure.phabricator.com/D18499
This commit is contained in:
epriestley 2017-08-30 08:37:05 -07:00
parent 0e2e525bb4
commit e5a495f435
2 changed files with 32 additions and 12 deletions

View file

@ -232,9 +232,24 @@ final class ManiphestTaskSearchEngine
}
if (strlen($map['ferret'])) {
$raw_query = $map['ferret'];
$compiler = id(new PhutilSearchQueryCompiler())
->setEnableFunctions(true);
$raw_tokens = $compiler->newTokens($raw_query);
$fulltext_tokens = array();
foreach ($raw_tokens as $raw_token) {
$fulltext_token = id(new PhabricatorFulltextToken())
->setToken($raw_token);
$fulltext_tokens[] = $fulltext_token;
}
$query->withFerretConstraint(
id(new ManiphestTask())->newFerretEngine(),
$map['ferret']);
$fulltext_tokens);
}
if ($map['parentIDs']) {

View file

@ -28,7 +28,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
private $spaceIsArchived;
private $ngrams = array();
private $ferretEngine;
private $ferretConstraints;
private $ferretTokens;
protected function getPageCursors(array $page) {
return array(
@ -1386,7 +1386,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
public function withFerretConstraint(
PhabricatorFerretEngine $engine,
$raw_query) {
array $fulltext_tokens) {
if ($this->ferretEngine) {
throw new Exception(
@ -1394,12 +1394,12 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
'Query may not have multiple fulltext constraints.'));
}
if (!strlen($raw_query)) {
if (!$fulltext_tokens) {
return $this;
}
$this->ferretEngine = $engine;
$this->ferretConstraints = preg_split('/\s+/', $raw_query);
$this->ferretTokens = $fulltext_tokens;
return $this;
}
@ -1416,9 +1416,11 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$ngram_table_name = $ngram_table->getTableName();
$flat = array();
foreach ($this->ferretConstraints as $term) {
$value = $term;
$length = count(phutil_utf8v($term));
foreach ($this->ferretTokens as $fulltext_token) {
$raw_token = $fulltext_token->getToken();
$value = $raw_token->getValue();
$length = count(phutil_utf8v($value));
if ($length >= 3) {
$ngrams = $ngram_engine->getNgramsFromString($value, 'query');
@ -1509,19 +1511,22 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
}
$where = array();
foreach ($this->ferretConstraints as $constraint) {
foreach ($this->ferretTokens as $fulltext_token) {
$raw_token = $fulltext_token->getToken();
$value = $raw_token->getValue();
$where[] = qsprintf(
$conn,
'(ftfield.rawCorpus LIKE %~ OR ftfield.normalCorpus LIKE %~)',
$constraint,
$constraint);
$value,
$value);
}
return $where;
}
protected function shouldGroupFerretResultRows() {
return (bool)$this->ferretConstraints;
return (bool)$this->ferretTokens;
}