From a604548101025875de20a9c263df3790fea425b3 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 25 Apr 2018 07:57:54 -0700 Subject: [PATCH] Slightly improve base85 performance for 64-bit systems Summary: Ref T13130. I wasn't able make this much better, but it looks like this is about ~20% faster on my system. This kind of thing is somewhat difficult to micro-optimize because XHProf tends to over-estimate the cost of function calls. In XHProf, this looks much much faster than the old version (~100% faster) but the actual cost of `bin/conduit call --method differential.getrawdiff` hasn't improved that much. Still, it seems consistently faster across multiple runs. Test Plan: - Pulled binary diffs over Conduit with `bin/conduit call --method differential.getrawdiff`. - Verified that they are byte-for-byte identical with the pre-change diffs, and look like they're ~20% faster. - Profiled the differences and saw a far more dramatic improvement, but I believe XHProf is exaggerating the effect of this change because it tends to overestimate function call cost. - Ran unit tests (from D19407), got byte-for-byte identical output under both 32bit and 64bit mode. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13130 Differential Revision: https://secure.phabricator.com/D19408 --- src/parser/ArcanistBundle.php | 97 ++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/src/parser/ArcanistBundle.php b/src/parser/ArcanistBundle.php index 265883f6..399ff611 100644 --- a/src/parser/ArcanistBundle.php +++ b/src/parser/ArcanistBundle.php @@ -800,13 +800,16 @@ final class ArcanistBundle extends Phobject { } $input = gzcompress($data); + $is_64bit = (PHP_INT_SIZE >= 8); } else { switch ($mode) { case '32bit': $input = $data; + $is_64bit = false; break; case '64bit': $input = $data; + $is_64bit = true; break; default: throw new Exception( @@ -818,25 +821,6 @@ final class ArcanistBundle extends Phobject { // See emit_binary_diff_body() in diff.c for git's implementation. - $buf = ''; - - $lines = str_split($input, 52); - foreach ($lines as $line) { - $len = strlen($line); - // The first character encodes the line length. - if ($len <= 26) { - $buf .= chr($len + ord('A') - 1); - } else { - $buf .= chr($len - 26 + ord('a') - 1); - } - $buf .= self::encodeBase85($line); - $buf .= $eol; - } - - return $buf; - } - - private static function encodeBase85($data) { // This is implemented awkwardly in order to closely mirror git's // implementation in base85.c @@ -864,6 +848,9 @@ final class ArcanistBundle extends Phobject { // (Since PHP overflows integer operations into floats, we don't need much // additional casting.) + // On 64 bit systems, we skip all this fanfare and just use integers. This + // is significantly faster. + static $map = array( '0', '1', @@ -952,27 +939,65 @@ final class ArcanistBundle extends Phobject { '~', ); + $len_map = array(); + for ($ii = 0; $ii <= 52; $ii++) { + if ($ii <= 26) { + $len_map[$ii] = chr($ii + ord('A') - 1); + } else { + $len_map[$ii] = chr($ii - 26 + ord('a') - 1); + } + } + $buf = ''; - $pos = 0; - $bytes = strlen($data); - while ($bytes) { - $accum = 0; - for ($count = 24; $count >= 0; $count -= 8) { - $val = ord($data[$pos++]); - $val = $val * (1 << $count); - $accum = $accum + $val; - if (--$bytes == 0) { - break; + $lines = str_split($input, 52); + $final = (count($lines) - 1); + + foreach ($lines as $idx => $line) { + if ($idx === $final) { + $len = strlen($line); + } else { + $len = 52; + } + + // The first character encodes the line length. + $buf .= $len_map[$len]; + + $pos = 0; + while ($len) { + $accum = 0; + for ($count = 24; $count >= 0; $count -= 8) { + $val = ord($line[$pos++]); + $val = $val * (1 << $count); + $accum = $accum + $val; + if (--$len == 0) { + break; + } } + + $slice = ''; + + // If we're in 64bit mode, we can just use integers. Otherwise, we + // need to use floating point math to avoid overflows. + + if ($is_64bit) { + for ($count = 4; $count >= 0; $count--) { + $val = $accum % 85; + $accum = $accum / 85; + $slice .= $map[$val]; + } + } else { + for ($count = 4; $count >= 0; $count--) { + $val = (int)fmod($accum, 85.0); + $accum = floor($accum / 85.0); + $slice .= $map[$val]; + } + } + + $buf .= strrev($slice); } - $slice = ''; - for ($count = 4; $count >= 0; $count--) { - $val = (int)fmod($accum, 85.0); - $accum = floor($accum / 85.0); - $slice .= $map[$val]; - } - $buf .= strrev($slice); + + $buf .= $eol; } return $buf;