mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-11-29 10:12:41 +01:00
Slightly improve base85 performance for 64-bit systems
Summary: Ref T13130. I wasn't able make this much better, but it looks like this is about ~20% faster on my system. This kind of thing is somewhat difficult to micro-optimize because XHProf tends to over-estimate the cost of function calls. In XHProf, this looks much much faster than the old version (~100% faster) but the actual cost of `bin/conduit call --method differential.getrawdiff` hasn't improved that much. Still, it seems consistently faster across multiple runs. Test Plan: - Pulled binary diffs over Conduit with `bin/conduit call --method differential.getrawdiff`. - Verified that they are byte-for-byte identical with the pre-change diffs, and look like they're ~20% faster. - Profiled the differences and saw a far more dramatic improvement, but I believe XHProf is exaggerating the effect of this change because it tends to overestimate function call cost. - Ran unit tests (from D19407), got byte-for-byte identical output under both 32bit and 64bit mode. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13130 Differential Revision: https://secure.phabricator.com/D19408
This commit is contained in:
parent
bcab677a7a
commit
a604548101
1 changed files with 61 additions and 36 deletions
|
@ -800,13 +800,16 @@ final class ArcanistBundle extends Phobject {
|
||||||
}
|
}
|
||||||
|
|
||||||
$input = gzcompress($data);
|
$input = gzcompress($data);
|
||||||
|
$is_64bit = (PHP_INT_SIZE >= 8);
|
||||||
} else {
|
} else {
|
||||||
switch ($mode) {
|
switch ($mode) {
|
||||||
case '32bit':
|
case '32bit':
|
||||||
$input = $data;
|
$input = $data;
|
||||||
|
$is_64bit = false;
|
||||||
break;
|
break;
|
||||||
case '64bit':
|
case '64bit':
|
||||||
$input = $data;
|
$input = $data;
|
||||||
|
$is_64bit = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Exception(
|
throw new Exception(
|
||||||
|
@ -818,25 +821,6 @@ final class ArcanistBundle extends Phobject {
|
||||||
|
|
||||||
// See emit_binary_diff_body() in diff.c for git's implementation.
|
// See emit_binary_diff_body() in diff.c for git's implementation.
|
||||||
|
|
||||||
$buf = '';
|
|
||||||
|
|
||||||
$lines = str_split($input, 52);
|
|
||||||
foreach ($lines as $line) {
|
|
||||||
$len = strlen($line);
|
|
||||||
// The first character encodes the line length.
|
|
||||||
if ($len <= 26) {
|
|
||||||
$buf .= chr($len + ord('A') - 1);
|
|
||||||
} else {
|
|
||||||
$buf .= chr($len - 26 + ord('a') - 1);
|
|
||||||
}
|
|
||||||
$buf .= self::encodeBase85($line);
|
|
||||||
$buf .= $eol;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static function encodeBase85($data) {
|
|
||||||
// This is implemented awkwardly in order to closely mirror git's
|
// This is implemented awkwardly in order to closely mirror git's
|
||||||
// implementation in base85.c
|
// implementation in base85.c
|
||||||
|
|
||||||
|
@ -864,6 +848,9 @@ final class ArcanistBundle extends Phobject {
|
||||||
// (Since PHP overflows integer operations into floats, we don't need much
|
// (Since PHP overflows integer operations into floats, we don't need much
|
||||||
// additional casting.)
|
// additional casting.)
|
||||||
|
|
||||||
|
// On 64 bit systems, we skip all this fanfare and just use integers. This
|
||||||
|
// is significantly faster.
|
||||||
|
|
||||||
static $map = array(
|
static $map = array(
|
||||||
'0',
|
'0',
|
||||||
'1',
|
'1',
|
||||||
|
@ -952,29 +939,67 @@ final class ArcanistBundle extends Phobject {
|
||||||
'~',
|
'~',
|
||||||
);
|
);
|
||||||
|
|
||||||
|
$len_map = array();
|
||||||
|
for ($ii = 0; $ii <= 52; $ii++) {
|
||||||
|
if ($ii <= 26) {
|
||||||
|
$len_map[$ii] = chr($ii + ord('A') - 1);
|
||||||
|
} else {
|
||||||
|
$len_map[$ii] = chr($ii - 26 + ord('a') - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$buf = '';
|
$buf = '';
|
||||||
|
|
||||||
|
$lines = str_split($input, 52);
|
||||||
|
$final = (count($lines) - 1);
|
||||||
|
|
||||||
|
foreach ($lines as $idx => $line) {
|
||||||
|
if ($idx === $final) {
|
||||||
|
$len = strlen($line);
|
||||||
|
} else {
|
||||||
|
$len = 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The first character encodes the line length.
|
||||||
|
$buf .= $len_map[$len];
|
||||||
|
|
||||||
$pos = 0;
|
$pos = 0;
|
||||||
$bytes = strlen($data);
|
while ($len) {
|
||||||
while ($bytes) {
|
|
||||||
$accum = 0;
|
$accum = 0;
|
||||||
for ($count = 24; $count >= 0; $count -= 8) {
|
for ($count = 24; $count >= 0; $count -= 8) {
|
||||||
$val = ord($data[$pos++]);
|
$val = ord($line[$pos++]);
|
||||||
$val = $val * (1 << $count);
|
$val = $val * (1 << $count);
|
||||||
$accum = $accum + $val;
|
$accum = $accum + $val;
|
||||||
if (--$bytes == 0) {
|
if (--$len == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$slice = '';
|
$slice = '';
|
||||||
|
|
||||||
|
// If we're in 64bit mode, we can just use integers. Otherwise, we
|
||||||
|
// need to use floating point math to avoid overflows.
|
||||||
|
|
||||||
|
if ($is_64bit) {
|
||||||
|
for ($count = 4; $count >= 0; $count--) {
|
||||||
|
$val = $accum % 85;
|
||||||
|
$accum = $accum / 85;
|
||||||
|
$slice .= $map[$val];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
for ($count = 4; $count >= 0; $count--) {
|
for ($count = 4; $count >= 0; $count--) {
|
||||||
$val = (int)fmod($accum, 85.0);
|
$val = (int)fmod($accum, 85.0);
|
||||||
$accum = floor($accum / 85.0);
|
$accum = floor($accum / 85.0);
|
||||||
$slice .= $map[$val];
|
$slice .= $map[$val];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$buf .= strrev($slice);
|
$buf .= strrev($slice);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$buf .= $eol;
|
||||||
|
}
|
||||||
|
|
||||||
return $buf;
|
return $buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue