1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-12-18 11:30:55 +01:00

Show users how fulltext search queries are parsed and executed; don't query stopwords or short tokens

Summary:
Depends on D17670. Fixes T12137. Fixes T12003. Ref T2632.

This shows users a readout of which terms were actually searched for.

This also drops those terms from the query we submit to the backend, dodging the weird behaviors / search engine bugs in T12137.

This might need some design tweaking.

Test Plan: {F4899825}

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T12137, T12003, T2632

Differential Revision: https://secure.phabricator.com/D17672
This commit is contained in:
epriestley 2017-04-12 16:28:04 -07:00
parent cb49acc2ca
commit 3245e74f16
12 changed files with 855 additions and 22 deletions

View file

@ -103,7 +103,7 @@ return array(
'rsrc/css/application/releeph/releeph-request-differential-create-dialog.css' => '8d8b92cd',
'rsrc/css/application/releeph/releeph-request-typeahead.css' => '667a48ae',
'rsrc/css/application/search/application-search-view.css' => '66ee5d46',
'rsrc/css/application/search/search-results.css' => '64ad079a',
'rsrc/css/application/search/search-results.css' => 'f87d23ad',
'rsrc/css/application/slowvote/slowvote.css' => 'a94b7230',
'rsrc/css/application/tokens/tokens.css' => '3d0f239e',
'rsrc/css/application/uiexample/example.css' => '528b19de',
@ -794,7 +794,7 @@ return array(
'phabricator-phtize' => 'd254d646',
'phabricator-prefab' => '8d40ae75',
'phabricator-remarkup-css' => '17c0fb37',
'phabricator-search-results-css' => '64ad079a',
'phabricator-search-results-css' => 'f87d23ad',
'phabricator-shaped-request' => '7cbe244b',
'phabricator-slowvote-css' => 'a94b7230',
'phabricator-source-code-view-css' => '4383192f',

View file

@ -0,0 +1,543 @@
a's
able
about
above
according
accordingly
across
actually
after
afterwards
again
against
ain't
all
allow
allows
almost
alone
along
already
also
although
always
am
among
amongst
an
and
another
any
anybody
anyhow
anyone
anything
anyway
anyways
anywhere
apart
appear
appreciate
appropriate
are
aren't
around
as
aside
ask
asking
associated
at
available
away
awfully
be
became
because
become
becomes
becoming
been
before
beforehand
behind
being
believe
below
beside
besides
best
better
between
beyond
both
brief
but
by
c'mon
c's
came
can
can't
cannot
cant
cause
causes
certain
certainly
changes
clearly
co
com
come
comes
concerning
consequently
consider
considering
contain
containing
contains
corresponding
could
couldn't
course
currently
definitely
described
despite
did
didn't
different
do
does
doesn't
doing
don't
done
down
downwards
during
each
edu
eg
eight
either
else
elsewhere
enough
entirely
especially
et
etc
even
ever
every
everybody
everyone
everything
everywhere
ex
exactly
example
except
far
few
fifth
first
five
followed
following
follows
for
former
formerly
forth
four
from
further
furthermore
get
gets
getting
given
gives
go
goes
going
gone
got
gotten
greetings
had
hadn't
happens
hardly
has
hasn't
have
haven't
having
he
he's
hello
help
hence
her
here
here's
hereafter
hereby
herein
hereupon
hers
herself
hi
him
himself
his
hither
hopefully
how
howbeit
however
i'd
i'll
i'm
i've
ie
if
ignored
immediate
in
inasmuch
inc
indeed
indicate
indicated
indicates
inner
insofar
instead
into
inward
is
isn't
it
it'd
it'll
it's
its
itself
just
keep
keeps
kept
know
known
knows
last
lately
later
latter
latterly
least
less
lest
let
let's
like
liked
likely
little
look
looking
looks
ltd
mainly
many
may
maybe
me
mean
meanwhile
merely
might
more
moreover
most
mostly
much
must
my
myself
name
namely
nd
near
nearly
necessary
need
needs
neither
never
nevertheless
new
next
nine
no
nobody
non
none
noone
nor
normally
not
nothing
novel
now
nowhere
obviously
of
off
often
oh
ok
okay
old
on
once
one
ones
only
onto
or
other
others
otherwise
ought
our
ours
ourselves
out
outside
over
overall
own
particular
particularly
per
perhaps
placed
please
plus
possible
presumably
probably
provides
que
quite
qv
rather
rd
re
really
reasonably
regarding
regardless
regards
relatively
respectively
right
said
same
saw
say
saying
says
second
secondly
see
seeing
seem
seemed
seeming
seems
seen
self
selves
sensible
sent
serious
seriously
seven
several
shall
she
should
shouldn't
since
six
so
some
somebody
somehow
someone
something
sometime
sometimes
somewhat
somewhere
soon
sorry
specified
specify
specifying
still
sub
such
sup
sure
t's
take
taken
tell
tends
th
than
thank
thanks
thanx
that
that's
thats
the
their
theirs
them
themselves
then
thence
there
there's
thereafter
thereby
therefore
therein
theres
thereupon
these
they
they'd
they'll
they're
they've
think
third
this
thorough
thoroughly
those
though
three
through
throughout
thru
thus
to
together
too
took
toward
towards
tried
tries
truly
try
trying
twice
two
un
under
unfortunately
unless
unlikely
until
unto
up
upon
us
use
used
useful
uses
using
usually
value
various
very
via
viz
vs
want
wants
was
wasn't
way
we
we'd
we'll
we're
we've
welcome
well
went
were
weren't
what
what's
whatever
when
whence
whenever
where
where's
whereafter
whereas
whereby
wherein
whereupon
wherever
whether
which
while
whither
who
who's
whoever
whole
whom
whose
why
will
willing
wish
with
within
without
won't
wonder
would
wouldn't
yes
yet
you
you'd
you'll
you're
you've
your
yours
yourself
yourselves
zero

View file

@ -2844,7 +2844,9 @@ phutil_register_library_map(array(
'PhabricatorFulltextEngineExtensionModule' => 'applications/search/index/PhabricatorFulltextEngineExtensionModule.php',
'PhabricatorFulltextIndexEngineExtension' => 'applications/search/engineextension/PhabricatorFulltextIndexEngineExtension.php',
'PhabricatorFulltextInterface' => 'applications/search/interface/PhabricatorFulltextInterface.php',
'PhabricatorFulltextResultSet' => 'applications/search/query/PhabricatorFulltextResultSet.php',
'PhabricatorFulltextStorageEngine' => 'applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php',
'PhabricatorFulltextToken' => 'applications/search/query/PhabricatorFulltextToken.php',
'PhabricatorFundApplication' => 'applications/fund/application/PhabricatorFundApplication.php',
'PhabricatorGDSetupCheck' => 'applications/config/check/PhabricatorGDSetupCheck.php',
'PhabricatorGarbageCollector' => 'infrastructure/daemon/garbagecollector/PhabricatorGarbageCollector.php',
@ -8005,7 +8007,9 @@ phutil_register_library_map(array(
'PhabricatorFulltextEngineExtension' => 'Phobject',
'PhabricatorFulltextEngineExtensionModule' => 'PhabricatorConfigModule',
'PhabricatorFulltextIndexEngineExtension' => 'PhabricatorIndexEngineExtension',
'PhabricatorFulltextResultSet' => 'Phobject',
'PhabricatorFulltextStorageEngine' => 'Phobject',
'PhabricatorFulltextToken' => 'Phobject',
'PhabricatorFundApplication' => 'PhabricatorApplication',
'PhabricatorGDSetupCheck' => 'PhabricatorSetupCheck',
'PhabricatorGarbageCollector' => 'Phobject',

View file

@ -978,9 +978,15 @@ abstract class PhabricatorApplicationSearchEngine extends Phobject {
$objects = $query->executeWithCursorPager($pager);
}
$this->didExecuteQuery($query);
return $objects;
}
protected function didExecuteQuery(PhabricatorPolicyAwareQuery $query) {
return;
}
/* -( Rendering )---------------------------------------------------------- */

View file

@ -101,4 +101,8 @@ abstract class PhabricatorFulltextStorageEngine extends Phobject {
public function initIndex() {}
public function getFulltextTokens() {
return array();
}
}

View file

@ -3,6 +3,9 @@
final class PhabricatorMySQLFulltextStorageEngine
extends PhabricatorFulltextStorageEngine {
private $fulltextTokens = array();
private $engineLimits;
public function getEngineIdentifier() {
return 'mysql';
}
@ -203,8 +206,64 @@ final class PhabricatorMySQLFulltextStorageEngine
$title_field = PhabricatorSearchDocumentFieldType::FIELD_TITLE;
$title_boost = 1024;
$stemmer = new PhutilSearchStemmer();
$raw_query = $query->getParameter('query');
$compiled_query = $this->compileQuery($raw_query);
$raw_query = trim($raw_query);
if (strlen($raw_query)) {
$compiler = PhabricatorSearchDocument::newQueryCompiler()
->setStemmer($stemmer);
$tokens = $compiler->newTokens($raw_query);
list($min_length, $stopword_list) = $this->getEngineLimits($conn);
// Process all the parts of the user's query so we can show them which
// parts we searched for and which ones we ignored.
$fulltext_tokens = array();
foreach ($tokens as $key => $token) {
$fulltext_token = id(new PhabricatorFulltextToken())
->setToken($token);
$fulltext_tokens[$key] = $fulltext_token;
$value = $token->getValue();
if (phutil_utf8_strlen($value) < $min_length) {
$fulltext_token->setIsShort(true);
continue;
}
if (isset($stopword_list[phutil_utf8_strtolower($value)])) {
$fulltext_token->setIsStopword(true);
continue;
}
}
$this->fulltextTokens = $fulltext_tokens;
// Remove tokens which aren't queryable from the query. This is mostly
// a workaround for the peculiar behaviors described in T12137.
foreach ($this->fulltextTokens as $key => $fulltext_token) {
if (!$fulltext_token->isQueryable()) {
unset($tokens[$key]);
}
}
if (!$tokens) {
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'All of your search terms are too short or too common to '.
'appear in the search index. Search for longer or more '.
'distinctive terms.'));
}
$queries = array();
$queries[] = $compiler->compileLiteralQuery($tokens);
$queries[] = $compiler->compileStemmedQuery($tokens);
$compiled_query = implode(' ', array_filter($queries));
} else {
$compiled_query = null;
}
if (strlen($compiled_query)) {
$select[] = qsprintf(
$conn,
@ -394,21 +453,6 @@ final class PhabricatorMySQLFulltextStorageEngine
return $sql;
}
private function compileQuery($raw_query) {
$stemmer = new PhutilSearchStemmer();
$compiler = PhabricatorSearchDocument::newQueryCompiler()
->setStemmer($stemmer);
$tokens = $compiler->newTokens($raw_query);
$queries = array();
$queries[] = $compiler->compileLiteralQuery($tokens);
$queries[] = $compiler->compileStemmedQuery($tokens);
return implode(' ', array_filter($queries));
}
public function indexExists() {
return true;
}
@ -417,4 +461,56 @@ final class PhabricatorMySQLFulltextStorageEngine
return false;
}
public function getFulltextTokens() {
return $this->fulltextTokens;
}
private function getEngineLimits(AphrontDatabaseConnection $conn) {
if ($this->engineLimits === null) {
$this->engineLimits = $this->newEngineLimits($conn);
}
return $this->engineLimits;
}
private function newEngineLimits(AphrontDatabaseConnection $conn) {
$result = queryfx_one(
$conn,
'SELECT
@@innodb_ft_min_token_size innodb_max,
@@ft_min_word_len myisam_max,
@@ft_stopword_file myisam_stopwords');
if ($result['innodb_max']) {
$min_len = $result['innodb_max'];
$stopwords = queryfx_all(
$conn,
'SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_DEFAULT_STOPWORD');
$stopwords = ipull($stopwords, 'value');
$stopwords = array_fuse($stopwords);
} else {
$min_len = $result['myisam_max'];
$file = $result['myisam_stopwords'];
if (preg_match('(/resources/sql/stopwords\.txt\z)', $file)) {
// If this is set to something that looks like the Phabricator
// stopword file, read that.
$file = 'stopwords.txt';
} else {
// Otherwise, just use the default stopwords. This might be wrong
// but we can't read the actual value dynamically and reading
// whatever file the variable is set to could be a big headache
// to get right from a security perspective.
$file = 'stopwords_myisam.txt';
}
$root = dirname(phutil_get_library_root('phabricator'));
$data = Filesystem::readFile($root.'/resources/sql/'.$file);
$stopwords = explode("\n", $data);
$stopwords = array_filter($stopwords);
$stopwords = array_fuse($stopwords);
}
return array($min_len, $stopwords);
}
}

View file

@ -0,0 +1,26 @@
<?php
final class PhabricatorFulltextResultSet extends Phobject {
private $phids;
private $fulltextTokens;
public function setPHIDs($phids) {
$this->phids = $phids;
return $this;
}
public function getPHIDs() {
return $this->phids;
}
public function setFulltextTokens($fulltext_tokens) {
$this->fulltextTokens = $fulltext_tokens;
return $this;
}
public function getFulltextTokens() {
return $this->fulltextTokens;
}
}

View file

@ -0,0 +1,88 @@
<?php
final class PhabricatorFulltextToken extends Phobject {
private $token;
private $isShort;
private $isStopword;
public function setToken(PhutilSearchQueryToken $token) {
$this->token = $token;
return $this;
}
public function getToken() {
return $this->token;
}
public function isQueryable() {
return !$this->getIsShort() && !$this->getIsStopword();
}
public function setIsShort($is_short) {
$this->isShort = $is_short;
return $this;
}
public function getIsShort() {
return $this->isShort;
}
public function setIsStopword($is_stopword) {
$this->isStopword = $is_stopword;
return $this;
}
public function getIsStopword() {
return $this->isStopword;
}
public function newTag() {
$token = $this->getToken();
$tip = null;
$icon = null;
if ($this->getIsShort()) {
$shade = PHUITagView::COLOR_GREY;
$tip = pht('Ignored Short Word');
} else if ($this->getIsStopword()) {
$shade = PHUITagView::COLOR_GREY;
$tip = pht('Ignored Common Word');
} else {
$operator = $token->getOperator();
switch ($operator) {
case PhutilSearchQueryCompiler::OPERATOR_NOT:
$shade = PHUITagView::COLOR_RED;
$icon = 'fa-minus';
break;
default:
$shade = PHUITagView::COLOR_BLUE;
break;
}
}
$tag = id(new PHUITagView())
->setType(PHUITagView::TYPE_SHADE)
->setShade($shade)
->setName($token->getValue());
if ($tip !== null) {
Javelin::initBehavior('phabricator-tooltips');
$tag
->addSigil('has-tooltip')
->setMetadata(
array(
'tip' => $tip,
));
}
if ($icon !== null) {
$tag->setIcon($icon);
}
return $tag;
}
}

View file

@ -3,6 +3,8 @@
final class PhabricatorSearchApplicationSearchEngine
extends PhabricatorApplicationSearchEngine {
private $resultSet;
public function getResultTypeDescription() {
return pht('Fulltext Search Results');
}
@ -243,6 +245,9 @@ final class PhabricatorSearchApplicationSearchEngine
PhabricatorSavedQuery $query,
array $handles) {
$result_set = $this->resultSet;
$fulltext_tokens = $result_set->getFulltextTokens();
$viewer = $this->requireViewer();
$list = new PHUIObjectItemListView();
$list->setNoDataString(pht('No results found.'));
@ -263,7 +268,28 @@ final class PhabricatorSearchApplicationSearchEngine
}
}
$fulltext_view = null;
if ($fulltext_tokens) {
require_celerity_resource('phabricator-search-results-css');
$fulltext_view = array();
foreach ($fulltext_tokens as $token) {
$fulltext_view[] = $token->newTag();
}
$fulltext_view = phutil_tag(
'div',
array(
'class' => 'phui-fulltext-tokens',
),
array(
pht('Searched For:'),
' ',
$fulltext_view,
));
}
$result = new PhabricatorApplicationSearchResultView();
$result->setContent($fulltext_view);
$result->setObjectList($list);
return $result;
@ -280,4 +306,8 @@ final class PhabricatorSearchApplicationSearchEngine
return $owner_phids;
}
protected function didExecuteQuery(PhabricatorPolicyAwareQuery $query) {
$this->resultSet = $query->getFulltextResultSet();
}
}

View file

@ -6,6 +6,7 @@ final class PhabricatorSearchDocumentQuery
private $savedQuery;
private $objectCapabilities;
private $unfilteredOffset;
private $fulltextResultSet;
public function withSavedQuery(PhabricatorSavedQuery $query) {
$this->savedQuery = $query;
@ -25,8 +26,17 @@ final class PhabricatorSearchDocumentQuery
return $this->getRequiredCapabilities();
}
public function getFulltextResultSet() {
if (!$this->fulltextResultSet) {
throw new PhutilInvalidStateException('execute');
}
return $this->fulltextResultSet;
}
protected function willExecute() {
$this->unfilteredOffset = 0;
$this->fulltextResultSet = null;
}
protected function loadPage() {
@ -39,8 +49,10 @@ final class PhabricatorSearchDocumentQuery
->setParameter('offset', $this->unfilteredOffset)
->setParameter('limit', $this->getRawResultLimit());
$phids = PhabricatorSearchService::executeSearch($query);
$result_set = PhabricatorSearchService::newResultSet($query, $this);
$phids = $result_set->getPHIDs();
$this->fulltextResultSet = $result_set;
$this->unfilteredOffset += count($phids);
$handles = id(new PhabricatorHandleQuery())

View file

@ -245,14 +245,25 @@ class PhabricatorSearchService
* @throws PhutilAggregateException
*/
public static function executeSearch(PhabricatorSavedQuery $query) {
$result_set = self::newResultSet($query);
return $result_set->getPHIDs();
}
public static function newResultSet(PhabricatorSavedQuery $query) {
$exceptions = array();
// try all services until one succeeds
foreach (self::getAllServices() as $service) {
if (!$service->isReadable()) {
continue;
}
try {
$engine = $service->getEngine();
$res = $engine->executeSearch($query);
// return immediately if we get results
return $res;
$phids = $engine->executeSearch($query);
return id(new PhabricatorFulltextResultSet())
->setPHIDs($phids)
->setFulltextTokens($engine->getFulltextTokens());
} catch (PhutilSearchQueryCompilerSyntaxException $ex) {
// If there's a query compilation error, return it directly to the
// user: they issued a query with bad syntax.

View file

@ -16,3 +16,16 @@
font-weight: normal;
color: #000;
}
.phui-fulltext-tokens {
margin: 16px 8px;
font-weight: bold;
}
.phui-fulltext-tokens .phui-tag-view {
margin: 0 2px;
}
.phui-fulltext-tokens .phui-tag-view.phui-tag-shade-grey {
opacity: 0.5;
}