1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-10 23:01:04 +01:00

Digest changeset anchors into purely alphanumeric strings

Summary:
Ref T13045. See that task for discussion.

This replaces `digestForIndex()` with a "clever" algorithm in `digestForAnchor()`. The new digest is the same as `digestForIndex()` except when the original output was "." or "_". In those cases, a replacement character is selected based on entropy accumulated by the digest function as it iterates through the string.

Test Plan: Added unit tests.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13045

Differential Revision: https://secure.phabricator.com/D18909
This commit is contained in:
epriestley 2018-01-22 07:23:04 -08:00
parent b8a515cb29
commit 6b99aac49d
3 changed files with 103 additions and 3 deletions

View file

@ -173,7 +173,7 @@ final class DifferentialChangeset
}
public function getAnchorName() {
return 'change-'.PhabricatorHash::digestForIndex($this->getFilename());
return 'change-'.PhabricatorHash::digestForAnchor($this->getFilename());
}
public function getAbsoluteRepositoryPath(

View file

@ -3,6 +3,7 @@
final class PhabricatorHash extends Phobject {
const INDEX_DIGEST_LENGTH = 12;
const ANCHOR_DIGEST_LENGTH = 12;
/**
* Digest a string using HMAC+SHA1.
@ -38,8 +39,8 @@ final class PhabricatorHash extends Phobject {
* related hashing (for general purpose hashing, see @{method:digest}).
*
* @param string Input string.
* @return string 12-byte, case-sensitive alphanumeric hash of the string
* which
* @return string 12-byte, case-sensitive, mostly-alphanumeric hash of
* the string.
*/
public static function digestForIndex($string) {
$hash = sha1($string, $raw_output = true);
@ -63,6 +64,62 @@ final class PhabricatorHash extends Phobject {
return $result;
}
/**
* Digest a string for use in HTML page anchors. This is similar to
* @{method:digestForIndex} but produces purely alphanumeric output.
*
* This tries to be mostly compatible with the index digest to limit how
* much stuff we're breaking by switching to it. For additional discussion,
* see T13045.
*
* @param string Input string.
* @return string 12-byte, case-sensitive, purely-alphanumeric hash of
* the string.
*/
public static function digestForAnchor($string) {
$hash = sha1($string, $raw_output = true);
static $map;
if ($map === null) {
$map = '0123456789'.
'abcdefghij'.
'klmnopqrst'.
'uvwxyzABCD'.
'EFGHIJKLMN'.
'OPQRSTUVWX'.
'YZ';
}
$result = '';
$accum = 0;
$map_size = strlen($map);
for ($ii = 0; $ii < self::ANCHOR_DIGEST_LENGTH; $ii++) {
$byte = ord($hash[$ii]);
$low_bits = ($byte & 0x3F);
$accum = ($accum + $byte) % $map_size;
if ($low_bits < $map_size) {
// If an index digest would produce any alphanumeric character, just
// use that character. This means that these digests are the same as
// digests created with "digestForIndex()" in all positions where the
// output character is some character other than "." or "_".
$result .= $map[$low_bits];
} else {
// If an index digest would produce a non-alphumeric character ("." or
// "_"), pick an alphanumeric character instead. We accumulate an
// index into the alphanumeric character list to try to preserve
// entropy here. We could use this strategy for all bytes instead,
// but then these digests would differ from digests created with
// "digestForIndex()" in all positions, instead of just a small number
// of positions.
$result .= $map[$accum];
}
}
return $result;
}
public static function digestToRange($string, $min, $max) {
if ($min > $max) {
throw new Exception(pht('Maximum must be larger than minimum.'));

View file

@ -52,4 +52,47 @@ final class PhabricatorHashTestCase extends PhabricatorTestCase {
pht('Distinct characters in hash of: %s', $input));
}
public function testHashForAnchor() {
$map = array(
// For inputs with no "." or "_" in the output, digesting for an index
// or an anchor should be the same.
'dog' => array(
'Aliif7Qjd5ct',
'Aliif7Qjd5ct',
),
// When an output would contain "." or "_", it should be replaced with
// an alphanumeric character in those positions instead.
'fig' => array(
'OpB9tY4i.MOX',
'OpB9tY4imMOX',
),
'cot' => array(
'_iF26XU_PsVY',
'3iF26XUkPsVY',
),
// The replacement characters are well-distributed and generally keep
// the entropy of the output high: here, "_" characters in the initial
// positions of the digests of "cot" (above) and "dug" (this test) have
// different outputs.
'dug' => array(
'_XuQnp0LUlUW',
'7XuQnp0LUlUW',
),
);
foreach ($map as $input => $expect) {
list($expect_index, $expect_anchor) = $expect;
$this->assertEqual(
$expect_index,
PhabricatorHash::digestForIndex($input),
pht('Index digest of "%s".', $input));
$this->assertEqual(
$expect_anchor,
PhabricatorHash::digestForAnchor($input),
pht('Anchor digest of "%s".', $input));
}
}
}