From bcab677a7a8cf42d34b820c08101877c65334953 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 25 Apr 2018 07:15:51 -0700 Subject: [PATCH 1/2] Restructure base85 unit tests to support inlining and multiple encoding pathways Summary: Ref T13130. I want to take a crack at improving performance here, but two possible approaches (inlining the actual encoding; using integers if they're big enough) aren't easy to test right now. Restructure the tests so they can support these kinds of refactoring. The "32bit" and "64bit" modes currently do the same thing, but I expect to introduce introduce separate encoding pathways in a future change, if the profiler says it actually helps. (I'll hold this and everything that comes after it until I make meaningful performance improvements.) Test Plan: Ran `arc unit`, got passes on tests. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13130 Differential Revision: https://secure.phabricator.com/D19407 --- src/parser/ArcanistBundle.php | 43 +++++++--- .../__tests__/ArcanistBundleTestCase.php | 30 ++++--- src/parser/__tests__/base85/expect1.txt | 11 ++- src/parser/__tests__/base85/expect2.txt | 80 ++++++++++++++++++- 4 files changed, 144 insertions(+), 20 deletions(-) diff --git a/src/parser/ArcanistBundle.php b/src/parser/ArcanistBundle.php index 7ec8b89d..265883f6 100644 --- a/src/parser/ArcanistBundle.php +++ b/src/parser/ArcanistBundle.php @@ -781,21 +781,46 @@ final class ArcanistBundle extends Phobject { private function emitBinaryDiffBody($data) { $eol = $this->getEOL('git'); + return self::newBase85Data($data, $eol); + } - if (!function_exists('gzcompress')) { - throw new Exception( - pht( - 'This patch has binary data. The PHP zlib extension is required to '. - 'apply patches with binary data to git. Install the PHP zlib '. - 'extension to continue.')); + public static function newBase85Data($data, $eol, $mode = null) { + // The "32bit" and "64bit" modes are used by unit tests to verify that all + // of the encoding pathways here work identically. In these modes, we skip + // compression because `gzcompress()` may not be stable and we just want + // to test that the output matches some expected result. + + if ($mode === null) { + if (!function_exists('gzcompress')) { + throw new Exception( + pht( + 'This patch has binary data. The PHP zlib extension is required '. + 'to apply patches with binary data to git. Install the PHP zlib '. + 'extension to continue.')); + } + + $input = gzcompress($data); + } else { + switch ($mode) { + case '32bit': + $input = $data; + break; + case '64bit': + $input = $data; + break; + default: + throw new Exception( + pht( + 'Unsupported base85 encoding mode "%s".', + $mode)); + } } // See emit_binary_diff_body() in diff.c for git's implementation. $buf = ''; - $deflated = gzcompress($data); - $lines = str_split($deflated, 52); + $lines = str_split($input, 52); foreach ($lines as $line) { $len = strlen($line); // The first character encodes the line length. @@ -811,7 +836,7 @@ final class ArcanistBundle extends Phobject { return $buf; } - public static function encodeBase85($data) { + private static function encodeBase85($data) { // This is implemented awkwardly in order to closely mirror git's // implementation in base85.c diff --git a/src/parser/__tests__/ArcanistBundleTestCase.php b/src/parser/__tests__/ArcanistBundleTestCase.php index 94b3517f..f64e8992 100644 --- a/src/parser/__tests__/ArcanistBundleTestCase.php +++ b/src/parser/__tests__/ArcanistBundleTestCase.php @@ -663,11 +663,7 @@ EODIFF; } $expect = Filesystem::readFile(dirname(__FILE__).'/base85/expect1.txt'); - $expect = trim($expect); - - $this->assertEqual( - $expect, - ArcanistBundle::encodeBase85($data)); + $this->assertBase85($expect, $data, pht('Byte Sequences')); // This is just a large block of random binary data, it has no special // significance. @@ -948,11 +944,27 @@ EODIFF; "\xe8\x1d\xa4\x18\xf3\x73\x82\xb4\x50\x59\xc2\x34\x36\x05\xeb"; $expect = Filesystem::readFile(dirname(__FILE__).'/base85/expect2.txt'); - $expect = trim($expect); - $this->assertEqual( - $expect, - ArcanistBundle::encodeBase85($data)); + $this->assertBase85($expect, $data, pht('Random Data')); + } + + private function assertBase85($expect, $data, $label) { + $modes = array( + '32bit', + ); + + // If this is a 64-bit machine, we can also test 64-bit mode. + $has_64bit = (PHP_INT_SIZE >= 8); + if ($has_64bit) { + $modes[] = '64bit'; + } + + foreach ($modes as $mode) { + $this->assertEqual( + $expect, + ArcanistBundle::newBase85Data($data, "\n", $mode), + pht('base85/%s: %s', $mode, $label)); + } } } diff --git a/src/parser/__tests__/base85/expect1.txt b/src/parser/__tests__/base85/expect1.txt index 67fc430a..0844e377 100644 --- a/src/parser/__tests__/base85/expect1.txt +++ b/src/parser/__tests__/base85/expect1.txt @@ -1 +1,10 @@ -009C61O)~M2nh-c3=Iws5D^j+6crX17#SKH9337XAR!_nBqb&%C@Cr{EG;fCFflSSG&MFiI5|2yJUu=?KtV!7L`6nNNJ&adOifNtP*GA-R8>}2SXo+ITwPvYU}0ioWMyV&XlZI|Y;A6DaB*^Tbai%jczJqze0_d@fPsR8goTEOh>41ejE#+0#~=jP?)3Q(qoSdppPrqZo0^%JmzI^3lai5;kB*Iui;9VehlYiOgMxv8e|~*@dwO|zcXoAjb8>NTZ*FaDYiem|XJ%z&V`5=oUtV2YTUuFIS5{S2Q&Le-PfkrtOG-&dM@B_NLqb77KR!J?J32WyH#RjiGcqwSFD@-CD=H}{CnhB%BO)OnA08bX8yXoH7Zw#16A}>+4-O3s3knGc2L=TM0|Eg6 +z009C61O)~M2nh-c3=Iws5D^j+6crX17#SKH9337XAR!_nBqb&%C@Cr{EG;fCFflSS +zG&MFiI5|2yJUu=?KtV!7L`6nNNJ&adOifNtP*GA-R8>}2SXo+ITwPvYU}0ioWMyV& +zXlZI|Y;A6DaB*^Tbai%jczJqze0_d@fPsR8goTEOh>41ejE#+0#~=jP?)3Q(qoSdppPrqZo0^%JmzI^3 +zlai5;kB*Iui;9VehlYiOgMxv8e|~*@dwO|zcXoAjb8>NTZ*FaDYiem|XJ%z&V`5=o +zUtV2YTUuFIS5{S2Q&Le-PfkrtOG-&dM@B_NLqb77KR!J?J32WyH#RjiGcqwSFD@-C +rD=H}{CnhB%BO)OnA08bX8yXoH7Zw#16A}>+4-O3s3knGc2L=TM0|Eg6 diff --git a/src/parser/__tests__/base85/expect2.txt b/src/parser/__tests__/base85/expect2.txt index 1f47b081..be01a9f0 100644 --- a/src/parser/__tests__/base85/expect2.txt +++ b/src/parser/__tests__/base85/expect2.txt @@ -1 +1,79 @@ -R!p;FbKc@>2*X8DfcAdeEEln%ePoVCCeLp`nS06Z4@b}#LjI(R92uzlkiBndzvzrF7pc85OYN!or+OltA3#j&4DMoz+MC*`m*T=TX#p~V>o4xX8j?fF*jO&ndb2H~}CRa}UmEinhlcy-=3`TR^0nS64P8M`qHj-LqOyi!AEt(oVX*p*E*7#pV?z_}u&y1TfX_y+AS4*i$_8WF6*W-_4m0JelsAgdKNlrjhGMjWZz*h7#?1R!QX<$_mJ5SaM_5?U}w!(r<`h!-wHA2J7+$i*ENU7jaL3w8WVbSuRd)~x8kB_#T3Uv5!12;D?jVfx%QK8~ApA(5aB}K_Q9-tZnARK{f{I$vA+OdZ;pt5o{Xc7fed}wPPDpo3ff6$y0sj^Jwie=Ylt@HCMoahh-isB$=WA(=1H*L6dGOKI7hI@9_)N}$ss&Cg->yp?Mjp^QAnIakZ|b^gjLd{Nc#O$5)rvF{2gZJLgQX#^gZetQ{#I}?%Q-I`5ZuqL}*VNT6>SfM@ZB2<@IJYNWNs>{x$K#)6z{g@!AiD4`z;JAaQ=TJhA{5EE5CB<#DG2u-QF8D;^wrXwugPtr0FBL)a=cX7dgnsKo%)8sER9w7SYTPC&^jW8Mq^#|8j}}7>mk=y)@1C;B-{z;A2zjITne|!Z)rk=0$@k|eat6j!!_CnDY53n6@@5=qK)jdnNnUtjQbvEzL{N)g=cwz|gd~B$EM@nVz4DGj`)Wx%YUL_9@cM~aUpaNOZv^q+EX{W`>}y39;}|3bxoE*oaZCS*80rmHNmbwFuvy!5o(nhwpAc`^GzUZ}2(i?6#)J*Kwbcm)83AG`MN%EcPbGMMysihc)ZiJB1J;c%0LijuJ5D`y_amAM)=o;ul#0z=Q=vS7hX>7_ic`avzZA4M|DIWLs1L~Uuo$ybc!^rSkraAEnoLEL!o_rIu)<9AWcB`@D6YX>l&&f8g7sSnqL?IYRH;S{V`k4kWNMaf9kR(t-(4WspxzZu7MuDqix!f8|(6Ec0Tbd_Zmgjvw^X`fPW;q4%KKhKN=|&4#Fm4463OhOkrh72fk_a$?Pv2`dKb=z!$|#`noS~q&@i#@GV@aL%&gMjplgZXIJpZ4wsx)BQUIE-$t0EuTH1stTDaoYM`*>lzvh6iGe<=Ph_{nQE|<50;vL-T&oTgM>_<;n^_u@l1yyZ{6_V-8wCc>%p)|vC2HzC$7IyivuEujC(~!ay`Ou183bI>mVTwjyRw96m^cxT`(hRS=C>qwvUtaHk;z~kk!=JkLFvi*(QKyGHZIv!ZjfWQ_m0LWqd)*)nmqph127R7k}q2$;ZB+4g2_?h6>4@|XRf27oP|-~M2Taiahbd=oU5AqAU9anEsK3Z6E#Fk5tn8s7pz3+Y6Ezph{iT?T3T9iqv>)Ckum(Pd!Th83c*X2%j3fC0}r^eQOC%fjcq#TPAsbHUP+VSkD*kP=X_l;#{~c=6jw*H1uYknC`ImtI$5@+1i)MeALd&&Q(}T-)wc5WXBiRh(s0ETW5IRdB;^5FQhbOPwBQx#5(UkOde!Ph!@FhF-3{J0l$Z@H`#djc&?w@C*(dXQtx#x|R9@@6DsYg-n;pX^Tu@Lb2Eh7&8<@AlVZ8sF{RF{%mXh4YZLn-fGmUp7)Jg$GSnHQ%FZ{kBTn-oLx6>Ad^XjTiV1}?z(-kwyQy8m`2ZC;*;IxCwXL%A9bSY_AQ91R0@RPD@T_c_gj-!h26JJe#h>NMETygG?EWMD+zwsu*um1-sk@bqydi}-=v4KIx1aKe_t*qh`(?2G-q$u={$yjeFH2{2yoXRc>Ratrh-g8$3c*_~~CrXWI4D+(_~nOk;knK`MpG6oU&8PPw$)*om3)vkJRjJ!RxUtEEFp*e607bLeLhEUia$ZT1f-0*P$+SpT=9CJ4D35Ln1CMCcFJ`+wU>+p@^I0IDfH~Ud8wDuAovW#YR05DtcS@~nBbO({-oVUq|*{M;J%dw|q2zigRZ3)-KzY5z>;2eX>_|K<-y9?&e8x9KK&1mA@#4Wm2$HyLQUdo#wb(WNpgzWVqC3t7ez5)JIrOawTGlQghM3W2oOtO`>eoLgthW?82cR*GX!7(h1N~nop3^_A+jMY#(iNn)?9H+K6!T^m5f6UV$bYO5cecIf_Nd)lbY|*~U`|+aI)Z~{xH3?hV25W7GQzHx}Q7RRx;&c1QK~&sbnF|UY)Z1{Qj{RHks6LzjSD3unDpu1O`3u=+4-lLq$7bgoq~YP^crMUmo-AeV9HRo05Qf+{CZ+*64t_d5Ou?_jbre7qCHJ=)j%xM6*1t`3@U)&M4#|J&xiMS8UWG5v7N*EERa1!47a9Tf0>klPN&+SK!z)L_}C$XhcswIg&aUu#yCoEs{v`Tj=3@tY)mOUfl!K@el;=V;o@woO})$*D2xTq@6l`e`x`ol>VxwRU@fRhZ^YW@E2hYV4SokS1@k4Pu(SZ@1Bl`YuFGY!bDSZWRE~1;pn2UKZ*xt&16rU9*K4YZL7k&7KMNTrLp1?1THy;jPAVq2WHh7)Lv$n8|EvBae||Yn6MVvyO)bsc|lfg!jIqjTgPZyxCucqd>P@DE(J`iIDy);i#{BAnlY?CJ#DM=T3*L)jycVHZ({DbbKQbMZwchXu9D~&nV?>x67=HW=9O4FMXmlLd0&Fgm56ieM#uoH!z$cyy#1N3Os9A2d|1D`QfOj53wdNXWv9z~>9RAmSyFo!v;r#!4LT85lBFUmyylk$5(Vdx^ZclJxot$;GllE1F}wih=|$_vtVNXEM=28fk91_+M?esa(9&9NFxHM=OQr9rV(TcNUO&OS3TUg>`l7dw60n)PlOVX2jmpQT*)AEZ2DwgFj&$!T1lV;1I4Z}Wq>1ySiqtGDSZ@FWBe<^uNPSR7Z(0aJ<5q3U;!o<1t8~rz@6Y)j@%XuHsyjE)Awp;D)HU;Yd +zR!p;FbKc@>2*X8DfcAdeEEln%ePoVCCeLp`nS06Z4@ +zb}#LjI(R9 +z2uzlkiBndzvzrF7pc85OYN!or+OltA3#j&4DMoz+MC*`m*T=TX#p~V>o4xX8j?fF* +zjO&ndb2H~}CRa}UmEinhlcy-=3`TR^0nS64P8M`qHj-LqOyi!AEt(oVX*p*E*7#pV +z?z_}u&y1TfX_y+AS4*i$_8WF6*W-_4m0JelsAgdKNlrjhGMjWZz*h7#?1R!QX<$_m +zJ5SaM_5?U}w!(r<`h!-wHA2J7+$i*ENU7jaL3w8WVbSuRd)~x8kB_#T3Uv5!12;D? +zjVfx%QK8~ApA(5aB}K_Q +z9-tZnARK{f{I$vA+OdZ;pt5o{Xc7fed}wPPDpo3ff6$y0sj^Jwie=Ylt@HCMoahh- +zisB$=WA(=1H*L6dGOKI7hI@9_)N}$ss&C +zg->yp?Mjp^QAnIakZ|b^gjLd{Nc#O$5)rvF{2gZJLgQX#^gZetQ{#I}?%Q-I`5ZuqL}*VNT6>SfM@ZB2<@IJYNWNs>{x$K#)6z{g@!AiD4`z;JAaQ=TJhA{5EE5CB<#D +zG2u-QF8D;^wrXwugPtr0FBL)a=cX7dgnsKo%)8sER9w7SYTPC&^jW8Mq^#|8j}}7> +zmk=y)@1C;B-{z;A2zjITne|!Z)rk=0$@k|eat6j!!_CnDY53n6@@5=qK)jdnNnUtj +zQbvEzL{N)g=cwz|gd~B$EM@nVz4DGj`)Wx%YUL_9@cM~aUpaNOZv^q+EX{W`>}y39 +z;}|3bxoE*oaZCS*80rmHNmbwFuvy!5o(nhwpAc`^GzUZ}2(i?6#)J*Kwbcm)83AG` +zMN%EcPbGMMysihc)ZiJB1J;c%0LijuJ5D`y_amAM)=o;ul#0z=Q=vS7hX>7_ic`avzZA4M|DIWLs1 +zL~Uuo$ybc!^rSkraAEnoLEL!o_rIu)<9AWcB`@D6YX>l&&f8g7sSnqL?IYRH;S{V`k4kWNMaf9kR(t-(4WspxzZu7MuDqix!f8|(6Ec0Tbd_Zmgj +zvw^X`fPW;q4%KKhKN=|&4#Fm4463OhOkrh72fk_a$?Pv2`dKb=z!$|#`noS~q&@i#@GV@aL%&gM +zjplgZXIJpZ4wsx)BQUIE-$t0EuTH1stTDaoYM`*>lzvh6iGe<=Ph_{nQE|<50;vL- +zT&oTgM>_<;n^_u@l1yyZ +z{6_V-8wCc>%p)|vC2HzC$7IyivuEujC(~!ay`Ou183bI>mVTwjyRw96m^cxT`(hRS +z=C>qwvUtaHk;z~kk!=JkLFvi*(QKyGHZIv!ZjfWQ_m0LWqd)*)nmqph127R7k}q2$;ZB+4 +zg2_?h6>4@|XRf27oP|-~M2Taiahbd=oU5AqAU9anEsK3Z6E#Fk5tn8s7pz3+Y6Ezp +zh{iT?T3T9iqv>)Ckum(Pd!Th83c*X2%j3fC0}r^eQOC%fjcq#TPAsbHUP+VSkD*kP +z=X_l;#{~c=6jw*H1uYknC`ImtI$5@+1i)MeALd&&Q(}T-)wc5WXBiRh(s0ETW5IRd +zB;^5FQhbOPwBQx#5(UkOde!Ph!@FhF-3{J0l$Z@H`#djc&?w@C*(dXQtx#x|R9@@6 +zDsYg-n;pX^Tu@Lb2Eh7&8<@AlVZ8sF{RF{%m +zXh4YZLn-fGmUp7)Jg$GSnHQ%FZ{kBTn-oLx6>Ad^XjTiV1}?z(-kwyQy8m`2ZC;*; +zIxCwXL%A9bSY_AQ91R0@RPD@T_c_gj-!h26JJe#h>NMETygG?EWMD+z +zwsu*um1-sk@bqydi}-=v4KIx1aKe_t*qh`(?2G-q$u={$yjeFH2{2yoXRc>Ratrh- +zg8$3c*_~~CrXWI4D+(_~nOk;knK`MpG6oU&8PPw$)*om3)vkJRjJ!RxUtEEFp*e60 +z7bLeLhEUia$ZT1f-0*P$+SpT=9CJ4D35Ln1CMCcFJ`+wU>+p@^I0IDfH~Ud8wDuAo +zvW#YR05DtcS@~nBbO({-oVUq|*{M;J%dw|q2zigRZ3)-KzY5z>;2eX>_|K<-y9?&e +z8x9KK&1mA@#4Wm2$HyLQUdo#wb(WNpgzWVqC3t7ez5)JIrOawTGlQghM3W2oOtO`> +zeoLgthW?82cR*GX!7(h1N~nop3^_A+jMY#(iNn)?9H+K6!T^m5f6UV$bYO5cecIf_ +zNd)lbY|*~U`|+aI)Z~{xH3?hV25W7GQzHx}Q7RRx;&c1QK~&sbnF|UY)Z1{Qj{RHk +zs6LzjSD3unDpu1O`3u=+4-lLq$7bgoq~YP^crMUmo-AeV9HRo05Qf+{CZ+*64t_d5Ou?_jbre7qCHJ=)j%xM6*1 +zt`3@U)&M4#|J&xiMS8UWG5v7N*EERa1!47a9Tf0>klPN&+SK!z)L_}C$XhcswIg&a +zUu#yCoEs{v`Tj=3@tY)mOUfl!K@el;=V;o@woO})$*D2xTq@6l`e`x`ol>VxwRU@fRhZ^YW@E2hYV4SokS1@k4Pu(SZ@1Bl`YuFGY!bDSZWRE~1;pn2U +zKZ*xt&16rU9*K4YZL7k&7KMNTrLp1?1THy;jPAVq2WHh7)Lv$n8|EvBae||Yn6MVvyO)bsc|lfg!jIqjTgPZyxCucq +zd>P@DE(J`iIDy);i#{BAnlY?CJ#DM=T3*L)jycVHZ({DbbKQbMZwchXu9D~&nV?>x +z67=HW=9O4FMXmlLd0&Fgm56ieM#uoH!z$cyy#1N3Os9A2d|1D`QfOj5 +z3wdNXWv9z~>9RAmSyFo!v;r#!4LT85lBFUmyylk$5(Vdx^ZclJxot$;GllE1F}wih=|$_vtVN +zXEM=28fk91_+M?esa(9&9NFxHM=OQr9rV(TcNUO&OS3TUg>`l7dw60n)PlOVX2jmpQT +z*)AEZ2DwgFj&$!T1lV;1I4Z}Wq>1ySiqtGDSZ@FWBe<^uNPSR7Z(0aJ<5q3U;!o<1 +mt8~rz@6Y)j@%XuHsyjE)Awp;D)HU;Yd From a604548101025875de20a9c263df3790fea425b3 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 25 Apr 2018 07:57:54 -0700 Subject: [PATCH 2/2] Slightly improve base85 performance for 64-bit systems Summary: Ref T13130. I wasn't able make this much better, but it looks like this is about ~20% faster on my system. This kind of thing is somewhat difficult to micro-optimize because XHProf tends to over-estimate the cost of function calls. In XHProf, this looks much much faster than the old version (~100% faster) but the actual cost of `bin/conduit call --method differential.getrawdiff` hasn't improved that much. Still, it seems consistently faster across multiple runs. Test Plan: - Pulled binary diffs over Conduit with `bin/conduit call --method differential.getrawdiff`. - Verified that they are byte-for-byte identical with the pre-change diffs, and look like they're ~20% faster. - Profiled the differences and saw a far more dramatic improvement, but I believe XHProf is exaggerating the effect of this change because it tends to overestimate function call cost. - Ran unit tests (from D19407), got byte-for-byte identical output under both 32bit and 64bit mode. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13130 Differential Revision: https://secure.phabricator.com/D19408 --- src/parser/ArcanistBundle.php | 97 ++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/src/parser/ArcanistBundle.php b/src/parser/ArcanistBundle.php index 265883f6..399ff611 100644 --- a/src/parser/ArcanistBundle.php +++ b/src/parser/ArcanistBundle.php @@ -800,13 +800,16 @@ final class ArcanistBundle extends Phobject { } $input = gzcompress($data); + $is_64bit = (PHP_INT_SIZE >= 8); } else { switch ($mode) { case '32bit': $input = $data; + $is_64bit = false; break; case '64bit': $input = $data; + $is_64bit = true; break; default: throw new Exception( @@ -818,25 +821,6 @@ final class ArcanistBundle extends Phobject { // See emit_binary_diff_body() in diff.c for git's implementation. - $buf = ''; - - $lines = str_split($input, 52); - foreach ($lines as $line) { - $len = strlen($line); - // The first character encodes the line length. - if ($len <= 26) { - $buf .= chr($len + ord('A') - 1); - } else { - $buf .= chr($len - 26 + ord('a') - 1); - } - $buf .= self::encodeBase85($line); - $buf .= $eol; - } - - return $buf; - } - - private static function encodeBase85($data) { // This is implemented awkwardly in order to closely mirror git's // implementation in base85.c @@ -864,6 +848,9 @@ final class ArcanistBundle extends Phobject { // (Since PHP overflows integer operations into floats, we don't need much // additional casting.) + // On 64 bit systems, we skip all this fanfare and just use integers. This + // is significantly faster. + static $map = array( '0', '1', @@ -952,27 +939,65 @@ final class ArcanistBundle extends Phobject { '~', ); + $len_map = array(); + for ($ii = 0; $ii <= 52; $ii++) { + if ($ii <= 26) { + $len_map[$ii] = chr($ii + ord('A') - 1); + } else { + $len_map[$ii] = chr($ii - 26 + ord('a') - 1); + } + } + $buf = ''; - $pos = 0; - $bytes = strlen($data); - while ($bytes) { - $accum = 0; - for ($count = 24; $count >= 0; $count -= 8) { - $val = ord($data[$pos++]); - $val = $val * (1 << $count); - $accum = $accum + $val; - if (--$bytes == 0) { - break; + $lines = str_split($input, 52); + $final = (count($lines) - 1); + + foreach ($lines as $idx => $line) { + if ($idx === $final) { + $len = strlen($line); + } else { + $len = 52; + } + + // The first character encodes the line length. + $buf .= $len_map[$len]; + + $pos = 0; + while ($len) { + $accum = 0; + for ($count = 24; $count >= 0; $count -= 8) { + $val = ord($line[$pos++]); + $val = $val * (1 << $count); + $accum = $accum + $val; + if (--$len == 0) { + break; + } } + + $slice = ''; + + // If we're in 64bit mode, we can just use integers. Otherwise, we + // need to use floating point math to avoid overflows. + + if ($is_64bit) { + for ($count = 4; $count >= 0; $count--) { + $val = $accum % 85; + $accum = $accum / 85; + $slice .= $map[$val]; + } + } else { + for ($count = 4; $count >= 0; $count--) { + $val = (int)fmod($accum, 85.0); + $accum = floor($accum / 85.0); + $slice .= $map[$val]; + } + } + + $buf .= strrev($slice); } - $slice = ''; - for ($count = 4; $count >= 0; $count--) { - $val = (int)fmod($accum, 85.0); - $accum = floor($accum / 85.0); - $slice .= $map[$val]; - } - $buf .= strrev($slice); + + $buf .= $eol; } return $buf;