mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-10 08:52:39 +01:00
Identify compound short search tokens in the form "xx.yy" as unqueryable in the search UI
Summary: Ref T12928. The index doesn't work for these, so show the user that there's a problem and drop the terms. This doesn't fix the problem, but makes the behavior more clear. Test Plan: {F5053703} {F5053704} Reviewers: chad Reviewed By: chad Maniphest Tasks: T12928 Differential Revision: https://secure.phabricator.com/D18254
This commit is contained in:
parent
e9208ed3da
commit
018d1b77bf
1 changed files with 19 additions and 1 deletions
|
@ -235,7 +235,7 @@ final class PhabricatorMySQLFulltextStorageEngine
|
||||||
$value = $stemmer->stemToken($value);
|
$value = $stemmer->stemToken($value);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (phutil_utf8_strlen($value) < $min_length) {
|
if ($this->isShortToken($value, $min_length)) {
|
||||||
$fulltext_token->setIsShort(true);
|
$fulltext_token->setIsShort(true);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -549,4 +549,22 @@ final class PhabricatorMySQLFulltextStorageEngine
|
||||||
return array($min_len, $stopwords);
|
return array($min_len, $stopwords);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function isShortToken($value, $min_length) {
|
||||||
|
// NOTE: The engine tokenizes internally on periods, so terms in the form
|
||||||
|
// "ab.cd", where short substrings are separated by periods, do not produce
|
||||||
|
// any queryable tokens. These terms are meaningful if at least one
|
||||||
|
// substring is longer than the minimum length, like "example.py". See
|
||||||
|
// T12928.
|
||||||
|
|
||||||
|
$parts = preg_split('/[.]+/', $value);
|
||||||
|
|
||||||
|
foreach ($parts as $part) {
|
||||||
|
if (phutil_utf8_strlen($part) >= $min_length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue