mirror of
https://we.phorge.it/source/phorge.git
synced 2025-04-07 09:58:33 +02:00
Write search bolding in a way which is certainly HTML-safe
Summary: This algorithm is tricky, and uses `phutil_safe_html()` directly, which makes it potentially unsafe. In particular, D8859 fixes a bug with it which caused it to produce non-utf8 output. This doesn't guarantee it's a security problem, but does make it suspicious. I don't actually see a way to break it, but rewrite it so that it's absolutely bulletproof and does not need to call `phutil_safe_html()`. Test Plan: {F147487} @rugabarbo, if you have a chance, can you check if this still works for you? Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley, rugabarbo Differential Revision: https://secure.phabricator.com/D8862
This commit is contained in:
parent
1b0d53ec65
commit
88ae246593
1 changed files with 81 additions and 22 deletions
|
@ -76,33 +76,92 @@ final class PhabricatorSearchResultView extends AphrontView {
|
||||||
$link);
|
$link);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the words which are part of the query string, and bold them in a
|
||||||
|
* result string. This makes it easier for users to see why a result
|
||||||
|
* matched their query.
|
||||||
|
*/
|
||||||
private function emboldenQuery($str) {
|
private function emboldenQuery($str) {
|
||||||
if (!$this->query) {
|
$query = $this->query->getParameter('query');
|
||||||
|
|
||||||
|
if (!strlen($query) || !strlen($str)) {
|
||||||
return $str;
|
return $str;
|
||||||
}
|
}
|
||||||
|
|
||||||
$query = $this->query->getParameter('query');
|
// This algorithm is safe but not especially fast, so don't bother if
|
||||||
|
// we're dealing with a lot of data. This mostly prevents silly/malicious
|
||||||
$quoted_regexp = '/"([^"]*)"/';
|
// queries from doing anything bad.
|
||||||
$matches = array(1 => array());
|
if (strlen($query) + strlen($str) > 2048) {
|
||||||
preg_match_all($quoted_regexp, $query, $matches);
|
return $str;
|
||||||
$quoted_queries = $matches[1];
|
|
||||||
$query = preg_replace($quoted_regexp, '', $query);
|
|
||||||
|
|
||||||
$query = preg_split('/\s+[+|]?/u', $query);
|
|
||||||
$query = array_filter($query);
|
|
||||||
$query = array_merge($query, $quoted_queries);
|
|
||||||
$str = phutil_escape_html($str);
|
|
||||||
foreach ($query as $word) {
|
|
||||||
$word = phutil_escape_html($word);
|
|
||||||
$word = preg_quote($word, '/');
|
|
||||||
$word = preg_replace('/\\\\\*$/', '\w*', $word);
|
|
||||||
$str = preg_replace(
|
|
||||||
'/(?:^|\b)('.$word.')(?:\b|$)/i',
|
|
||||||
'<strong>\1</strong>',
|
|
||||||
$str);
|
|
||||||
}
|
}
|
||||||
return phutil_safe_html($str);
|
|
||||||
|
// Keep track of which characters we're going to make bold. This is
|
||||||
|
// byte oriented, but we'll make sure we don't put a bold in the middle
|
||||||
|
// of a character later.
|
||||||
|
$bold = array_fill(0, strlen($str), false);
|
||||||
|
|
||||||
|
// Split the query into words.
|
||||||
|
$parts = preg_split('/ +/', $query);
|
||||||
|
|
||||||
|
// Find all occurrences of each word, and mark them to be emboldened.
|
||||||
|
foreach ($parts as $part) {
|
||||||
|
$part = trim($part);
|
||||||
|
$part = trim($part, '"+');
|
||||||
|
if (!strlen($part)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$matches = null;
|
||||||
|
$has_matches = preg_match_all(
|
||||||
|
'/(?:^|\b)('.preg_quote($part, '/').')/i',
|
||||||
|
$str,
|
||||||
|
$matches,
|
||||||
|
PREG_OFFSET_CAPTURE);
|
||||||
|
|
||||||
|
if (!$has_matches) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flag the matching part of the range for boldening.
|
||||||
|
foreach ($matches[1] as $match) {
|
||||||
|
$offset = $match[1];
|
||||||
|
for ($ii = 0; $ii < strlen($match[0]); $ii++) {
|
||||||
|
$bold[$offset + $ii] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split the string into ranges, applying bold styling as required.
|
||||||
|
$out = array();
|
||||||
|
$buf = '';
|
||||||
|
$pos = 0;
|
||||||
|
$is_bold = false;
|
||||||
|
foreach (phutil_utf8v($str) as $chr) {
|
||||||
|
if ($bold[$pos] != $is_bold) {
|
||||||
|
if (strlen($buf)) {
|
||||||
|
if ($is_bold) {
|
||||||
|
$out[] = phutil_tag('strong', array(), $buf);
|
||||||
|
} else {
|
||||||
|
$out[] = $buf;
|
||||||
|
}
|
||||||
|
$buf = '';
|
||||||
|
}
|
||||||
|
$is_bold = !$is_bold;
|
||||||
|
}
|
||||||
|
$buf .= $chr;
|
||||||
|
$pos += strlen($chr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strlen($buf)) {
|
||||||
|
if ($is_bold) {
|
||||||
|
$out[] = phutil_tag('strong', array(), $buf);
|
||||||
|
} else {
|
||||||
|
$out[] = $buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue