1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-10 08:52:39 +01:00

Implement the "present" and "absent" operators in the Ferret execution engine

Summary:
Ref T13509. Now that the compiler can parse these queries, actually implement them.

These are fairly easy to implement:

  - For present, just "JOIN". If it works, the field is present.
  - For absent, we "LEFT JOIN" and then "WHERE any_column IS NULL".

Test Plan: Searched for various documents with and without fields present, got sensible results in Maniphest. For example, "body:-" finds tasks with no body, "body:- duck" finds tasks with no body and "duck" elsewhere in the content, and so on.

Maniphest Tasks: T13509

Differential Revision: https://secure.phabricator.com/D21110
This commit is contained in:
epriestley 2020-04-14 09:42:12 -07:00
parent 143f86d60b
commit 0511b2a012

View file

@ -1801,24 +1801,35 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$this->ferretEngine = $engine; $this->ferretEngine = $engine;
$this->ferretTokens = $fulltext_tokens; $this->ferretTokens = $fulltext_tokens;
$op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT;
$default_function = $engine->getDefaultFunctionKey(); $default_function = $engine->getDefaultFunctionKey();
$table_map = array(); $table_map = array();
$idx = 1; $idx = 1;
foreach ($this->ferretTokens as $fulltext_token) { foreach ($this->ferretTokens as $fulltext_token) {
$raw_token = $fulltext_token->getToken(); $raw_token = $fulltext_token->getToken();
$function = $raw_token->getFunction();
$function = $raw_token->getFunction();
if ($function === null) { if ($function === null) {
$function = $default_function; $function = $default_function;
} }
$raw_field = $engine->getFieldForFunction($function); $raw_field = $engine->getFieldForFunction($function);
// NOTE: The query compiler guarantees that a query can not make a
// field both "present" and "absent", so it's safe to just use the
// first operator we encounter to determine whether the table is
// optional or not.
$operator = $raw_token->getOperator();
$is_optional = ($operator === $op_absent);
if (!isset($table_map[$function])) { if (!isset($table_map[$function])) {
$alias = 'ftfield_'.$idx++; $alias = 'ftfield_'.$idx++;
$table_map[$function] = array( $table_map[$function] = array(
'alias' => $alias, 'alias' => $alias,
'key' => $raw_field, 'key' => $raw_field,
'optional' => $is_optional,
); );
} }
} }
@ -1966,6 +1977,8 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT; $op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
$op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT;
$op_present = PhutilSearchQueryCompiler::OPERATOR_PRESENT;
$engine = $this->ferretEngine; $engine = $this->ferretEngine;
$stemmer = $engine->newStemmer(); $stemmer = $engine->newStemmer();
@ -1976,11 +1989,19 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
foreach ($this->ferretTokens as $fulltext_token) { foreach ($this->ferretTokens as $fulltext_token) {
$raw_token = $fulltext_token->getToken(); $raw_token = $fulltext_token->getToken();
$operator = $raw_token->getOperator();
// If this is a negated term like "-pomegranate", don't join the ngram // If this is a negated term like "-pomegranate", don't join the ngram
// table since we aren't looking for documents with this term. (We could // table since we aren't looking for documents with this term. (We could
// LEFT JOIN the table and require a NULL row, but this is probably more // LEFT JOIN the table and require a NULL row, but this is probably more
// trouble than it's worth.) // trouble than it's worth.)
if ($raw_token->getOperator() == $op_not) { if ($operator === $op_not) {
continue;
}
// Neither the "present" or "absent" operators benefit from joining
// the ngram table.
if ($operator === $op_absent || $operator === $op_present) {
continue; continue;
} }
@ -2143,31 +2164,54 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING; $op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
$op_not = PhutilSearchQueryCompiler::OPERATOR_NOT; $op_not = PhutilSearchQueryCompiler::OPERATOR_NOT;
$op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT; $op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT;
$op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT;
$op_present = PhutilSearchQueryCompiler::OPERATOR_PRESENT;
$where = array(); $where = array();
$current_function = 'all'; $default_function = $engine->getDefaultFunctionKey();
foreach ($this->ferretTokens as $fulltext_token) { foreach ($this->ferretTokens as $fulltext_token) {
$raw_token = $fulltext_token->getToken(); $raw_token = $fulltext_token->getToken();
$value = $raw_token->getValue(); $value = $raw_token->getValue();
$function = $raw_token->getFunction(); $function = $raw_token->getFunction();
if ($function === null) { if ($function === null) {
$function = $current_function; $function = $default_function;
} }
$current_function = $function;
$operator = $raw_token->getOperator();
$table_alias = $table_map[$function]['alias']; $table_alias = $table_map[$function]['alias'];
$is_not = ($raw_token->getOperator() == $op_not); // If this is a "field is present" operator, we've already implicitly
// guaranteed this by JOINing the table. We don't need to do any
// more work.
$is_present = ($operator === $op_present);
if ($is_present) {
continue;
}
if ($raw_token->getOperator() == $op_sub) { // If this is a "field is absent" operator, we just want documents
// which failed to match to a row when we LEFT JOINed the table. This
// means there's no index for the field.
$is_absent = ($operator === $op_absent);
if ($is_absent) {
$where[] = qsprintf(
$conn,
'(%T.rawCorpus IS NULL)',
$table_alias);
continue;
}
$is_not = ($operator === $op_not);
if ($operator == $op_sub) {
$is_substring = true; $is_substring = true;
} else { } else {
$is_substring = false; $is_substring = false;
} }
// If we're doing exact search, just test the raw corpus. // If we're doing exact search, just test the raw corpus.
$is_exact = ($raw_token->getOperator() == $op_exact); $is_exact = ($operator === $op_exact);
if ($is_exact) { if ($is_exact) {
if ($is_not) { if ($is_not) {
$where[] = qsprintf( $where[] = qsprintf(