mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-10 00:42:41 +01:00
Implement the "present" and "absent" operators in the Ferret execution engine
Summary: Ref T13509. Now that the compiler can parse these queries, actually implement them. These are fairly easy to implement: - For present, just "JOIN". If it works, the field is present. - For absent, we "LEFT JOIN" and then "WHERE any_column IS NULL". Test Plan: Searched for various documents with and without fields present, got sensible results in Maniphest. For example, "body:-" finds tasks with no body, "body:- duck" finds tasks with no body and "duck" elsewhere in the content, and so on. Maniphest Tasks: T13509 Differential Revision: https://secure.phabricator.com/D21110
This commit is contained in:
parent
143f86d60b
commit
0511b2a012
1 changed files with 52 additions and 8 deletions
|
@ -1801,24 +1801,35 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
$this->ferretEngine = $engine;
|
$this->ferretEngine = $engine;
|
||||||
$this->ferretTokens = $fulltext_tokens;
|
$this->ferretTokens = $fulltext_tokens;
|
||||||
|
|
||||||
|
$op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT;
|
||||||
|
|
||||||
$default_function = $engine->getDefaultFunctionKey();
|
$default_function = $engine->getDefaultFunctionKey();
|
||||||
$table_map = array();
|
$table_map = array();
|
||||||
$idx = 1;
|
$idx = 1;
|
||||||
foreach ($this->ferretTokens as $fulltext_token) {
|
foreach ($this->ferretTokens as $fulltext_token) {
|
||||||
$raw_token = $fulltext_token->getToken();
|
$raw_token = $fulltext_token->getToken();
|
||||||
$function = $raw_token->getFunction();
|
|
||||||
|
|
||||||
|
$function = $raw_token->getFunction();
|
||||||
if ($function === null) {
|
if ($function === null) {
|
||||||
$function = $default_function;
|
$function = $default_function;
|
||||||
}
|
}
|
||||||
|
|
||||||
$raw_field = $engine->getFieldForFunction($function);
|
$raw_field = $engine->getFieldForFunction($function);
|
||||||
|
|
||||||
|
// NOTE: The query compiler guarantees that a query can not make a
|
||||||
|
// field both "present" and "absent", so it's safe to just use the
|
||||||
|
// first operator we encounter to determine whether the table is
|
||||||
|
// optional or not.
|
||||||
|
|
||||||
|
$operator = $raw_token->getOperator();
|
||||||
|
$is_optional = ($operator === $op_absent);
|
||||||
|
|
||||||
if (!isset($table_map[$function])) {
|
if (!isset($table_map[$function])) {
|
||||||
$alias = 'ftfield_'.$idx++;
|
$alias = 'ftfield_'.$idx++;
|
||||||
$table_map[$function] = array(
|
$table_map[$function] = array(
|
||||||
'alias' => $alias,
|
'alias' => $alias,
|
||||||
'key' => $raw_field,
|
'key' => $raw_field,
|
||||||
|
'optional' => $is_optional,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1966,6 +1977,8 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
|
|
||||||
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
|
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
|
||||||
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
|
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
|
||||||
|
$op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT;
|
||||||
|
$op_present = PhutilSearchQueryCompiler::OPERATOR_PRESENT;
|
||||||
|
|
||||||
$engine = $this->ferretEngine;
|
$engine = $this->ferretEngine;
|
||||||
$stemmer = $engine->newStemmer();
|
$stemmer = $engine->newStemmer();
|
||||||
|
@ -1976,11 +1989,19 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
foreach ($this->ferretTokens as $fulltext_token) {
|
foreach ($this->ferretTokens as $fulltext_token) {
|
||||||
$raw_token = $fulltext_token->getToken();
|
$raw_token = $fulltext_token->getToken();
|
||||||
|
|
||||||
|
$operator = $raw_token->getOperator();
|
||||||
|
|
||||||
// If this is a negated term like "-pomegranate", don't join the ngram
|
// If this is a negated term like "-pomegranate", don't join the ngram
|
||||||
// table since we aren't looking for documents with this term. (We could
|
// table since we aren't looking for documents with this term. (We could
|
||||||
// LEFT JOIN the table and require a NULL row, but this is probably more
|
// LEFT JOIN the table and require a NULL row, but this is probably more
|
||||||
// trouble than it's worth.)
|
// trouble than it's worth.)
|
||||||
if ($raw_token->getOperator() == $op_not) {
|
if ($operator === $op_not) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Neither the "present" or "absent" operators benefit from joining
|
||||||
|
// the ngram table.
|
||||||
|
if ($operator === $op_absent || $operator === $op_present) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2143,31 +2164,54 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
|
||||||
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
|
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
|
||||||
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
|
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
|
||||||
$op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT;
|
$op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT;
|
||||||
|
$op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT;
|
||||||
|
$op_present = PhutilSearchQueryCompiler::OPERATOR_PRESENT;
|
||||||
|
|
||||||
$where = array();
|
$where = array();
|
||||||
$current_function = 'all';
|
$default_function = $engine->getDefaultFunctionKey();
|
||||||
foreach ($this->ferretTokens as $fulltext_token) {
|
foreach ($this->ferretTokens as $fulltext_token) {
|
||||||
$raw_token = $fulltext_token->getToken();
|
$raw_token = $fulltext_token->getToken();
|
||||||
$value = $raw_token->getValue();
|
$value = $raw_token->getValue();
|
||||||
|
|
||||||
$function = $raw_token->getFunction();
|
$function = $raw_token->getFunction();
|
||||||
if ($function === null) {
|
if ($function === null) {
|
||||||
$function = $current_function;
|
$function = $default_function;
|
||||||
}
|
}
|
||||||
$current_function = $function;
|
|
||||||
|
$operator = $raw_token->getOperator();
|
||||||
|
|
||||||
$table_alias = $table_map[$function]['alias'];
|
$table_alias = $table_map[$function]['alias'];
|
||||||
|
|
||||||
$is_not = ($raw_token->getOperator() == $op_not);
|
// If this is a "field is present" operator, we've already implicitly
|
||||||
|
// guaranteed this by JOINing the table. We don't need to do any
|
||||||
|
// more work.
|
||||||
|
$is_present = ($operator === $op_present);
|
||||||
|
if ($is_present) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if ($raw_token->getOperator() == $op_sub) {
|
// If this is a "field is absent" operator, we just want documents
|
||||||
|
// which failed to match to a row when we LEFT JOINed the table. This
|
||||||
|
// means there's no index for the field.
|
||||||
|
$is_absent = ($operator === $op_absent);
|
||||||
|
if ($is_absent) {
|
||||||
|
$where[] = qsprintf(
|
||||||
|
$conn,
|
||||||
|
'(%T.rawCorpus IS NULL)',
|
||||||
|
$table_alias);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$is_not = ($operator === $op_not);
|
||||||
|
|
||||||
|
if ($operator == $op_sub) {
|
||||||
$is_substring = true;
|
$is_substring = true;
|
||||||
} else {
|
} else {
|
||||||
$is_substring = false;
|
$is_substring = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we're doing exact search, just test the raw corpus.
|
// If we're doing exact search, just test the raw corpus.
|
||||||
$is_exact = ($raw_token->getOperator() == $op_exact);
|
$is_exact = ($operator === $op_exact);
|
||||||
if ($is_exact) {
|
if ($is_exact) {
|
||||||
if ($is_not) {
|
if ($is_not) {
|
||||||
$where[] = qsprintf(
|
$where[] = qsprintf(
|
||||||
|
|
Loading…
Reference in a new issue