mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-27 09:12:41 +01:00
Write search bolding in a way which is certainly HTML-safe
Summary: This algorithm is tricky, and uses `phutil_safe_html()` directly, which makes it potentially unsafe. In particular, D8859 fixes a bug with it which caused it to produce non-utf8 output. This doesn't guarantee it's a security problem, but does make it suspicious. I don't actually see a way to break it, but rewrite it so that it's absolutely bulletproof and does not need to call `phutil_safe_html()`. Test Plan: {F147487} @rugabarbo, if you have a chance, can you check if this still works for you? Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley, rugabarbo Differential Revision: https://secure.phabricator.com/D8862
This commit is contained in:
parent
1b0d53ec65
commit
88ae246593
1 changed files with 81 additions and 22 deletions
|
@ -76,33 +76,92 @@ final class PhabricatorSearchResultView extends AphrontView {
|
|||
$link);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the words which are part of the query string, and bold them in a
|
||||
* result string. This makes it easier for users to see why a result
|
||||
* matched their query.
|
||||
*/
|
||||
private function emboldenQuery($str) {
|
||||
if (!$this->query) {
|
||||
$query = $this->query->getParameter('query');
|
||||
|
||||
if (!strlen($query) || !strlen($str)) {
|
||||
return $str;
|
||||
}
|
||||
|
||||
$query = $this->query->getParameter('query');
|
||||
|
||||
$quoted_regexp = '/"([^"]*)"/';
|
||||
$matches = array(1 => array());
|
||||
preg_match_all($quoted_regexp, $query, $matches);
|
||||
$quoted_queries = $matches[1];
|
||||
$query = preg_replace($quoted_regexp, '', $query);
|
||||
|
||||
$query = preg_split('/\s+[+|]?/u', $query);
|
||||
$query = array_filter($query);
|
||||
$query = array_merge($query, $quoted_queries);
|
||||
$str = phutil_escape_html($str);
|
||||
foreach ($query as $word) {
|
||||
$word = phutil_escape_html($word);
|
||||
$word = preg_quote($word, '/');
|
||||
$word = preg_replace('/\\\\\*$/', '\w*', $word);
|
||||
$str = preg_replace(
|
||||
'/(?:^|\b)('.$word.')(?:\b|$)/i',
|
||||
'<strong>\1</strong>',
|
||||
$str);
|
||||
// This algorithm is safe but not especially fast, so don't bother if
|
||||
// we're dealing with a lot of data. This mostly prevents silly/malicious
|
||||
// queries from doing anything bad.
|
||||
if (strlen($query) + strlen($str) > 2048) {
|
||||
return $str;
|
||||
}
|
||||
return phutil_safe_html($str);
|
||||
|
||||
// Keep track of which characters we're going to make bold. This is
|
||||
// byte oriented, but we'll make sure we don't put a bold in the middle
|
||||
// of a character later.
|
||||
$bold = array_fill(0, strlen($str), false);
|
||||
|
||||
// Split the query into words.
|
||||
$parts = preg_split('/ +/', $query);
|
||||
|
||||
// Find all occurrences of each word, and mark them to be emboldened.
|
||||
foreach ($parts as $part) {
|
||||
$part = trim($part);
|
||||
$part = trim($part, '"+');
|
||||
if (!strlen($part)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$matches = null;
|
||||
$has_matches = preg_match_all(
|
||||
'/(?:^|\b)('.preg_quote($part, '/').')/i',
|
||||
$str,
|
||||
$matches,
|
||||
PREG_OFFSET_CAPTURE);
|
||||
|
||||
if (!$has_matches) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Flag the matching part of the range for boldening.
|
||||
foreach ($matches[1] as $match) {
|
||||
$offset = $match[1];
|
||||
for ($ii = 0; $ii < strlen($match[0]); $ii++) {
|
||||
$bold[$offset + $ii] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Split the string into ranges, applying bold styling as required.
|
||||
$out = array();
|
||||
$buf = '';
|
||||
$pos = 0;
|
||||
$is_bold = false;
|
||||
foreach (phutil_utf8v($str) as $chr) {
|
||||
if ($bold[$pos] != $is_bold) {
|
||||
if (strlen($buf)) {
|
||||
if ($is_bold) {
|
||||
$out[] = phutil_tag('strong', array(), $buf);
|
||||
} else {
|
||||
$out[] = $buf;
|
||||
}
|
||||
$buf = '';
|
||||
}
|
||||
$is_bold = !$is_bold;
|
||||
}
|
||||
$buf .= $chr;
|
||||
$pos += strlen($chr);
|
||||
}
|
||||
|
||||
if (strlen($buf)) {
|
||||
if ($is_bold) {
|
||||
$out[] = phutil_tag('strong', array(), $buf);
|
||||
} else {
|
||||
$out[] = $buf;
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue