mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-09 06:11:01 +01:00
Identify compound short search tokens in the form "xx.yy" as unqueryable in the search UI
Summary: Ref T12928. The index doesn't work for these, so show the user that there's a problem and drop the terms. This doesn't fix the problem, but makes the behavior more clear. Test Plan: {F5053703} {F5053704} Reviewers: chad Reviewed By: chad Maniphest Tasks: T12928 Differential Revision: https://secure.phabricator.com/D18254
This commit is contained in:
parent
e9208ed3da
commit
018d1b77bf
1 changed files with 19 additions and 1 deletions
|
@ -235,7 +235,7 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
$value = $stemmer->stemToken($value);
|
||||
}
|
||||
|
||||
if (phutil_utf8_strlen($value) < $min_length) {
|
||||
if ($this->isShortToken($value, $min_length)) {
|
||||
$fulltext_token->setIsShort(true);
|
||||
continue;
|
||||
}
|
||||
|
@ -549,4 +549,22 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||
return array($min_len, $stopwords);
|
||||
}
|
||||
|
||||
private function isShortToken($value, $min_length) {
|
||||
// NOTE: The engine tokenizes internally on periods, so terms in the form
|
||||
// "ab.cd", where short substrings are separated by periods, do not produce
|
||||
// any queryable tokens. These terms are meaningful if at least one
|
||||
// substring is longer than the minimum length, like "example.py". See
|
||||
// T12928.
|
||||
|
||||
$parts = preg_split('/[.]+/', $value);
|
||||
|
||||
foreach ($parts as $part) {
|
||||
if (phutil_utf8_strlen($part) >= $min_length) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue