1
0
Fork 0
mirror of https://git.tukaani.org/xz.git synced 2024-04-04 12:36:23 +02:00

Apply a minor speed optimization to LZMA decoder.

This commit is contained in:
Lasse Collin 2008-03-11 15:35:34 +02:00
parent f310c50286
commit bfde3b24a5

View file

@ -179,43 +179,41 @@ decode_dummy(const lzma_coder *restrict coder,
coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0)); coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0));
uint32_t symbol = 1; uint32_t symbol = 1;
if (!is_char_state(state)) { if (is_char_state(state)) {
// Decode literal with match byte. // Decode literal without match byte.
do {
if_bit_0(subcoder[symbol]) {
update_bit_0_dummy();
symbol <<= 1;
} else {
update_bit_1_dummy();
symbol = (symbol << 1) | 1;
}
} while (symbol < 0x100);
assert(rep0 != UINT32_MAX); } else {
// Decode literal with match byte.
uint32_t match_byte = lz_get_byte(coder->lz, rep0); uint32_t match_byte = lz_get_byte(coder->lz, rep0);
uint32_t subcoder_offset = 0x100;
do { do {
match_byte <<= 1; match_byte <<= 1;
const uint32_t match_bit = match_byte & 0x100; const uint32_t match_bit = match_byte & subcoder_offset;
const uint32_t subcoder_index = 0x100 + match_bit + symbol; const uint32_t subcoder_index
= subcoder_offset + match_bit + symbol;
if_bit_0(subcoder[subcoder_index]) { if_bit_0(subcoder[subcoder_index]) {
update_bit_0_dummy(); update_bit_0_dummy();
symbol <<= 1; symbol <<= 1;
if (match_bit != 0) subcoder_offset &= ~match_bit;
break;
} else { } else {
update_bit_1_dummy(); update_bit_1_dummy();
symbol = (symbol << 1) | 1; symbol = (symbol << 1) | 1;
if (match_bit == 0) subcoder_offset &= match_bit;
break;
} }
} while (symbol < 0x100); } while (symbol < 0x100);
} }
// Decode literal without match byte. This is also
// the tail of the with-match-byte function.
while (symbol < 0x100) {
if_bit_0(subcoder[symbol]) {
update_bit_0_dummy();
symbol <<= 1;
} else {
update_bit_1_dummy();
symbol = (symbol << 1) | 1;
}
}
break; break;
} }
@ -366,43 +364,46 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
now_pos, lz_get_byte(coder->lz, 0)); now_pos, lz_get_byte(coder->lz, 0));
uint32_t symbol = 1; uint32_t symbol = 1;
if (!is_char_state(state)) { if (is_char_state(state)) {
// Decode literal with match byte. // Decode literal without match byte.
do {
if_bit_0(subcoder[symbol]) {
update_bit_0(subcoder[symbol]);
symbol <<= 1;
} else {
update_bit_1(subcoder[symbol]);
symbol = (symbol << 1) | 1;
}
} while (symbol < 0x100);
assert(rep0 != UINT32_MAX); } else {
// Decode literal with match byte.
//
// The usage of subcoder_offset allows omitting some
// branches, which should give tiny speed improvement on
// some CPUs. subcoder_offset gets set to zero if match_bit
// didn't match.
uint32_t match_byte = lz_get_byte(coder->lz, rep0); uint32_t match_byte = lz_get_byte(coder->lz, rep0);
uint32_t subcoder_offset = 0x100;
do { do {
match_byte <<= 1; match_byte <<= 1;
const uint32_t match_bit = match_byte & 0x100; const uint32_t match_bit = match_byte & subcoder_offset;
const uint32_t subcoder_index = 0x100 + match_bit + symbol; const uint32_t subcoder_index
= subcoder_offset + match_bit + symbol;
if_bit_0(subcoder[subcoder_index]) { if_bit_0(subcoder[subcoder_index]) {
update_bit_0(subcoder[subcoder_index]); update_bit_0(subcoder[subcoder_index]);
symbol <<= 1; symbol <<= 1;
if (match_bit != 0) subcoder_offset &= ~match_bit;
break;
} else { } else {
update_bit_1(subcoder[subcoder_index]); update_bit_1(subcoder[subcoder_index]);
symbol = (symbol << 1) | 1; symbol = (symbol << 1) | 1;
if (match_bit == 0) subcoder_offset &= match_bit;
break;
} }
} while (symbol < 0x100); } while (symbol < 0x100);
} }
// Decode literal without match byte. This is also
// the tail of the with-match-byte function.
while (symbol < 0x100) {
if_bit_0(subcoder[symbol]) {
update_bit_0(subcoder[symbol]);
symbol <<= 1;
} else {
update_bit_1(subcoder[symbol]);
symbol = (symbol << 1) | 1;
}
}
// Put the decoded byte to the dictionary, update the // Put the decoded byte to the dictionary, update the
// decoder state, and start a new decoding loop. // decoder state, and start a new decoding loop.
coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol); coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol);