mirror of
https://we.phorge.it/source/phorge.git
synced 2025-01-10 23:01:04 +01:00
Digest changeset anchors into purely alphanumeric strings
Summary: Ref T13045. See that task for discussion. This replaces `digestForIndex()` with a "clever" algorithm in `digestForAnchor()`. The new digest is the same as `digestForIndex()` except when the original output was "." or "_". In those cases, a replacement character is selected based on entropy accumulated by the digest function as it iterates through the string. Test Plan: Added unit tests. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13045 Differential Revision: https://secure.phabricator.com/D18909
This commit is contained in:
parent
b8a515cb29
commit
6b99aac49d
3 changed files with 103 additions and 3 deletions
|
@ -173,7 +173,7 @@ final class DifferentialChangeset
|
|||
}
|
||||
|
||||
public function getAnchorName() {
|
||||
return 'change-'.PhabricatorHash::digestForIndex($this->getFilename());
|
||||
return 'change-'.PhabricatorHash::digestForAnchor($this->getFilename());
|
||||
}
|
||||
|
||||
public function getAbsoluteRepositoryPath(
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
final class PhabricatorHash extends Phobject {
|
||||
|
||||
const INDEX_DIGEST_LENGTH = 12;
|
||||
const ANCHOR_DIGEST_LENGTH = 12;
|
||||
|
||||
/**
|
||||
* Digest a string using HMAC+SHA1.
|
||||
|
@ -38,8 +39,8 @@ final class PhabricatorHash extends Phobject {
|
|||
* related hashing (for general purpose hashing, see @{method:digest}).
|
||||
*
|
||||
* @param string Input string.
|
||||
* @return string 12-byte, case-sensitive alphanumeric hash of the string
|
||||
* which
|
||||
* @return string 12-byte, case-sensitive, mostly-alphanumeric hash of
|
||||
* the string.
|
||||
*/
|
||||
public static function digestForIndex($string) {
|
||||
$hash = sha1($string, $raw_output = true);
|
||||
|
@ -63,6 +64,62 @@ final class PhabricatorHash extends Phobject {
|
|||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Digest a string for use in HTML page anchors. This is similar to
|
||||
* @{method:digestForIndex} but produces purely alphanumeric output.
|
||||
*
|
||||
* This tries to be mostly compatible with the index digest to limit how
|
||||
* much stuff we're breaking by switching to it. For additional discussion,
|
||||
* see T13045.
|
||||
*
|
||||
* @param string Input string.
|
||||
* @return string 12-byte, case-sensitive, purely-alphanumeric hash of
|
||||
* the string.
|
||||
*/
|
||||
public static function digestForAnchor($string) {
|
||||
$hash = sha1($string, $raw_output = true);
|
||||
|
||||
static $map;
|
||||
if ($map === null) {
|
||||
$map = '0123456789'.
|
||||
'abcdefghij'.
|
||||
'klmnopqrst'.
|
||||
'uvwxyzABCD'.
|
||||
'EFGHIJKLMN'.
|
||||
'OPQRSTUVWX'.
|
||||
'YZ';
|
||||
}
|
||||
|
||||
$result = '';
|
||||
$accum = 0;
|
||||
$map_size = strlen($map);
|
||||
for ($ii = 0; $ii < self::ANCHOR_DIGEST_LENGTH; $ii++) {
|
||||
$byte = ord($hash[$ii]);
|
||||
$low_bits = ($byte & 0x3F);
|
||||
$accum = ($accum + $byte) % $map_size;
|
||||
|
||||
if ($low_bits < $map_size) {
|
||||
// If an index digest would produce any alphanumeric character, just
|
||||
// use that character. This means that these digests are the same as
|
||||
// digests created with "digestForIndex()" in all positions where the
|
||||
// output character is some character other than "." or "_".
|
||||
$result .= $map[$low_bits];
|
||||
} else {
|
||||
// If an index digest would produce a non-alphumeric character ("." or
|
||||
// "_"), pick an alphanumeric character instead. We accumulate an
|
||||
// index into the alphanumeric character list to try to preserve
|
||||
// entropy here. We could use this strategy for all bytes instead,
|
||||
// but then these digests would differ from digests created with
|
||||
// "digestForIndex()" in all positions, instead of just a small number
|
||||
// of positions.
|
||||
$result .= $map[$accum];
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
public static function digestToRange($string, $min, $max) {
|
||||
if ($min > $max) {
|
||||
throw new Exception(pht('Maximum must be larger than minimum.'));
|
||||
|
|
|
@ -52,4 +52,47 @@ final class PhabricatorHashTestCase extends PhabricatorTestCase {
|
|||
pht('Distinct characters in hash of: %s', $input));
|
||||
}
|
||||
|
||||
public function testHashForAnchor() {
|
||||
$map = array(
|
||||
// For inputs with no "." or "_" in the output, digesting for an index
|
||||
// or an anchor should be the same.
|
||||
'dog' => array(
|
||||
'Aliif7Qjd5ct',
|
||||
'Aliif7Qjd5ct',
|
||||
),
|
||||
// When an output would contain "." or "_", it should be replaced with
|
||||
// an alphanumeric character in those positions instead.
|
||||
'fig' => array(
|
||||
'OpB9tY4i.MOX',
|
||||
'OpB9tY4imMOX',
|
||||
),
|
||||
'cot' => array(
|
||||
'_iF26XU_PsVY',
|
||||
'3iF26XUkPsVY',
|
||||
),
|
||||
// The replacement characters are well-distributed and generally keep
|
||||
// the entropy of the output high: here, "_" characters in the initial
|
||||
// positions of the digests of "cot" (above) and "dug" (this test) have
|
||||
// different outputs.
|
||||
'dug' => array(
|
||||
'_XuQnp0LUlUW',
|
||||
'7XuQnp0LUlUW',
|
||||
),
|
||||
);
|
||||
|
||||
foreach ($map as $input => $expect) {
|
||||
list($expect_index, $expect_anchor) = $expect;
|
||||
|
||||
$this->assertEqual(
|
||||
$expect_index,
|
||||
PhabricatorHash::digestForIndex($input),
|
||||
pht('Index digest of "%s".', $input));
|
||||
|
||||
$this->assertEqual(
|
||||
$expect_anchor,
|
||||
PhabricatorHash::digestForAnchor($input),
|
||||
pht('Anchor digest of "%s".', $input));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue