Apply a minor speed optimization to LZMA decoder.

2024-04-04 12:36:23 +02:00 · 2008-03-11 15:35:34 +02:00 · 2008-03-11 15:35:34 +02:00 · bfde3b24a5
commit bfde3b24a5
parent f310c50286
1 changed files with 43 additions and 42 deletions
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@ -179,43 +179,41 @@ decode_dummy(const lzma_coder *restrict coder,
 					coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0));
 			uint32_t symbol = 1;
-			if (!is_char_state(state)) {
+			if (is_char_state(state)) {
-				// Decode literal with match byte.
+				// Decode literal without match byte.
 				do {
 					if_bit_0(subcoder[symbol]) {
 						update_bit_0_dummy();
 						symbol <<= 1;
 					} else {
 						update_bit_1_dummy();
 						symbol = (symbol << 1) | 1;
 					}
 				} while (symbol < 0x100);
-				assert(rep0 != UINT32_MAX);
+			} else {
 				// Decode literal with match byte.
 				uint32_t match_byte = lz_get_byte(coder->lz, rep0);
 				uint32_t subcoder_offset = 0x100;
 				do {
 					match_byte <<= 1;
-					const uint32_t match_bit = match_byte & 0x100;
+					const uint32_t match_bit = match_byte & subcoder_offset;
-					const uint32_t subcoder_index = 0x100 + match_bit + symbol;
+					const uint32_t subcoder_index
 							= subcoder_offset + match_bit + symbol;
 					if_bit_0(subcoder[subcoder_index]) {
 						update_bit_0_dummy();
 						symbol <<= 1;
-						if (match_bit != 0)
+						subcoder_offset &= ~match_bit;
 							break;
 					} else {
 						update_bit_1_dummy();
 						symbol = (symbol << 1) | 1;
-						if (match_bit == 0)
+						subcoder_offset &= match_bit;
 							break;
 					}
 				} while (symbol < 0x100);
 			}
 			// Decode literal without match byte. This is also
 			// the tail of the with-match-byte function.
 			while (symbol < 0x100) {
 				if_bit_0(subcoder[symbol]) {
 					update_bit_0_dummy();
 					symbol <<= 1;
 				} else {
 					update_bit_1_dummy();
 					symbol = (symbol << 1) | 1;
 				}
 			}
 			break;
 		}
@ -366,43 +364,46 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
 					now_pos, lz_get_byte(coder->lz, 0));
 			uint32_t symbol = 1;
-			if (!is_char_state(state)) {
+			if (is_char_state(state)) {
-				// Decode literal with match byte.
+				// Decode literal without match byte.
 				do {
 					if_bit_0(subcoder[symbol]) {
 						update_bit_0(subcoder[symbol]);
 						symbol <<= 1;
 					} else {
 						update_bit_1(subcoder[symbol]);
 						symbol = (symbol << 1) | 1;
 					}
 				} while (symbol < 0x100);
-				assert(rep0 != UINT32_MAX);
+			} else {
 				// Decode literal with match byte.
 				//
 				// The usage of subcoder_offset allows omitting some
 				// branches, which should give tiny speed improvement on
 				// some CPUs. subcoder_offset gets set to zero if match_bit
 				// didn't match.
 				uint32_t match_byte = lz_get_byte(coder->lz, rep0);
 				uint32_t subcoder_offset = 0x100;
 				do {
 					match_byte <<= 1;
-					const uint32_t match_bit = match_byte & 0x100;
+					const uint32_t match_bit = match_byte & subcoder_offset;
-					const uint32_t subcoder_index = 0x100 + match_bit + symbol;
+					const uint32_t subcoder_index
 							= subcoder_offset + match_bit + symbol;
 					if_bit_0(subcoder[subcoder_index]) {
 						update_bit_0(subcoder[subcoder_index]);
 						symbol <<= 1;
-						if (match_bit != 0)
+						subcoder_offset &= ~match_bit;
 							break;
 					} else {
 						update_bit_1(subcoder[subcoder_index]);
 						symbol = (symbol << 1) | 1;
-						if (match_bit == 0)
+						subcoder_offset &= match_bit;
 							break;
 					}
 				} while (symbol < 0x100);
 			}
 			// Decode literal without match byte. This is also
 			// the tail of the with-match-byte function.
 			while (symbol < 0x100) {
 				if_bit_0(subcoder[symbol]) {
 					update_bit_0(subcoder[symbol]);
 					symbol <<= 1;
 				} else {
 					update_bit_1(subcoder[symbol]);
 					symbol = (symbol << 1) | 1;
 				}
 			}
 			// Put the decoded byte to the dictionary, update the
 			// decoder state, and start a new decoding loop.
 			coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol);