From 33be3c0e24d8f43376ccf71cc77d53671e792f07 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Thu, 17 Jan 2008 18:56:53 +0200 Subject: [PATCH] Subblock decoder: Don't exit the main loop in decode_buffer() too early if we hit End of Input while decoding a Subblock of type Repeating Data. To keep the loop termination condition elegant, the order of enumerations in coder->sequence were changed. To keep the case-labels in roughly the same order as the enumerations in coder->sequence, large chunks of code was moved around. This made the diff big and ugly compared to the amount of the actual changes made. --- src/liblzma/subblock/subblock_decoder.c | 272 ++++++++++++------------ 1 file changed, 139 insertions(+), 133 deletions(-) diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c index 4eb9e55f..6f38caff 100644 --- a/src/liblzma/subblock/subblock_decoder.c +++ b/src/liblzma/subblock/subblock_decoder.c @@ -30,20 +30,24 @@ struct lzma_coder_s { lzma_next_coder next; enum { + // These require that there is at least one input + // byte available. SEQ_FLAGS, - SEQ_SIZE_1, - SEQ_SIZE_2, - SEQ_SIZE_3, - SEQ_DATA, + SEQ_FILTER_FLAGS, + SEQ_FILTER_END, SEQ_REPEAT_COUNT_1, SEQ_REPEAT_COUNT_2, SEQ_REPEAT_COUNT_3, SEQ_REPEAT_SIZE, SEQ_REPEAT_READ_DATA, + SEQ_SIZE_1, + SEQ_SIZE_2, + SEQ_SIZE_3, // This must be right before SEQ_DATA. + + // These don't require any input to be available. + SEQ_DATA, SEQ_REPEAT_FAST, SEQ_REPEAT_NORMAL, - SEQ_FILTER_FLAGS, - SEQ_FILTER_END, } sequence; /// Number of bytes left in the current Subblock Data field. @@ -167,7 +171,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, size_t *restrict out_pos, size_t out_size, lzma_action action) { while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_DATA)) + || coder->sequence >= SEQ_DATA)) switch (coder->sequence) { case SEQ_FLAGS: { if ((in[*in_pos] >> 4) != FLAG_PADDING) @@ -284,8 +288,68 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; } - case SEQ_SIZE_1: + case SEQ_FILTER_FLAGS: { + const lzma_ret ret = coder->filter_flags_decoder.code( + coder->filter_flags_decoder.coder, allocator, + in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret == LZMA_HEADER_ERROR + ? LZMA_DATA_ERROR : ret; + + // Don't free the filter_flags_decoder. It doesn't take much + // memory and we may need it again. + + // Initialize the Subfilter. Subblock and Copy filters are + // not allowed. + if (coder->filter_flags.id == LZMA_FILTER_COPY + || coder->filter_flags.id + == LZMA_FILTER_SUBBLOCK) + return LZMA_DATA_ERROR; + + coder->helper.end_was_reached = false; + + lzma_options_filter filters[3] = { + { + .id = coder->filter_flags.id, + .options = coder->filter_flags.options, + }, { + .id = LZMA_FILTER_SUBBLOCK_HELPER, + .options = &coder->helper, + }, { + .id = LZMA_VLI_VALUE_UNKNOWN, + .options = NULL, + } + }; + + // Optimization: We know that LZMA uses End of Payload Marker + // (not End of Input), so we can omit the helper filter. + if (filters[0].id == LZMA_FILTER_LZMA) + filters[1].id = LZMA_VLI_VALUE_UNKNOWN; + + return_if_error(lzma_raw_decoder_init( + &coder->subfilter, allocator, + filters, LZMA_VLI_VALUE_UNKNOWN, false)); + + coder->sequence = SEQ_FLAGS; + break; + } + + case SEQ_FILTER_END: + // We are in the beginning of a Subblock. The next Subblock + // whose type is not Padding, must indicate end of Subfilter. + if (in[*in_pos] == (FLAG_PADDING << 4)) { + ++*in_pos; + break; + } + + if (in[*in_pos] != (FLAG_END_SUBFILTER << 4)) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_FLAGS; + break; + case SEQ_REPEAT_COUNT_1: + case SEQ_SIZE_1: // We use the same code to parse // - the Size (28 bits) in Subblocks of type Data; and // - the Repeat count (28 bits) in Subblocks of type @@ -295,21 +359,23 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, ++coder->sequence; break; - case SEQ_SIZE_2: case SEQ_REPEAT_COUNT_2: + case SEQ_SIZE_2: coder->size |= (size_t)(in[*in_pos]) << 12; ++*in_pos; ++coder->sequence; break; - case SEQ_SIZE_3: case SEQ_REPEAT_COUNT_3: + case SEQ_SIZE_3: coder->size |= (size_t)(in[*in_pos]) << 20; + ++*in_pos; // The real value is the stored value plus one. ++coder->size; - ++*in_pos; + // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why + // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence. ++coder->sequence; break; @@ -348,6 +414,68 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; } + case SEQ_DATA: { + // Limit the amount of input to match the available + // Subblock Data size. + size_t in_limit; + if (in_size - *in_pos > coder->size) + in_limit = *in_pos + coder->size; + else + in_limit = in_size; + + if (coder->subfilter.code == NULL) { + const size_t copy_size = bufcpy( + in, in_pos, in_limit, + out, out_pos, out_size); + + coder->size -= copy_size; + + if (update_uncompressed_size(coder, copy_size)) + return LZMA_DATA_ERROR; + + } else { + const size_t in_start = *in_pos; + const lzma_ret ret = subfilter_decode( + coder, allocator, + in, in_pos, in_limit, + out, out_pos, out_size, + action); + + // Update the number of unprocessed bytes left in + // this Subblock. This assert() is true because + // in_limit prevents *in_pos getting too big. + assert(*in_pos - in_start <= coder->size); + coder->size -= *in_pos - in_start; + + if (ret == LZMA_STREAM_END) { + // End of Subfilter can occur only at + // a Subblock boundary. + if (coder->size != 0) + return LZMA_DATA_ERROR; + + // We need a Subblock with Unset + // Subfilter before more data. + coder->sequence = SEQ_FILTER_END; + break; + } + + if (ret != LZMA_OK) + return ret; + } + + // If we couldn't process the whole Subblock Data yet, return. + if (coder->size > 0) + return LZMA_OK; + + // Check if we have decoded all the data. + if (coder->uncompressed_size == 0 + && coder->subfilter.code == NULL) + return LZMA_STREAM_END; + + coder->sequence = SEQ_FLAGS; + break; + } + case SEQ_REPEAT_FAST: { // Optimization for cases when there is only one byte to // repeat and no Subfilter. @@ -432,128 +560,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; - case SEQ_DATA: { - // Limit the amount of input to match the available - // Subblock Data size. - size_t in_limit; - if (in_size - *in_pos > coder->size) - in_limit = *in_pos + coder->size; - else - in_limit = in_size; - - if (coder->subfilter.code == NULL) { - const size_t copy_size = bufcpy( - in, in_pos, in_limit, - out, out_pos, out_size); - - coder->size -= copy_size; - - if (update_uncompressed_size(coder, copy_size)) - return LZMA_DATA_ERROR; - - } else { - const size_t in_start = *in_pos; - const lzma_ret ret = subfilter_decode( - coder, allocator, - in, in_pos, in_limit, - out, out_pos, out_size, - action); - - // Update the number of unprocessed bytes left in - // this Subblock. This assert() is true because - // in_limit prevents *in_pos getting too big. - assert(*in_pos - in_start <= coder->size); - coder->size -= *in_pos - in_start; - - if (ret == LZMA_STREAM_END) { - // End of Subfilter can occur only at - // a Subblock boundary. - if (coder->size != 0) - return LZMA_DATA_ERROR; - - // We need a Subblock with Unset - // Subfilter before more data. - coder->sequence = SEQ_FILTER_END; - break; - } - - if (ret != LZMA_OK) - return ret; - } - - // If we couldn't process the whole Subblock Data yet, return. - if (coder->size > 0) - return LZMA_OK; - - // Check if we have decoded all the data. - if (coder->uncompressed_size == 0 - && coder->subfilter.code == NULL) - return LZMA_STREAM_END; - - coder->sequence = SEQ_FLAGS; - break; - } - - case SEQ_FILTER_FLAGS: { - const lzma_ret ret = coder->filter_flags_decoder.code( - coder->filter_flags_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret == LZMA_HEADER_ERROR - ? LZMA_DATA_ERROR : ret; - - // Don't free the filter_flags_decoder. It doesn't take much - // memory and we may need it again. - - // Initialize the Subfilter. Subblock and Copy filters are - // not allowed. - if (coder->filter_flags.id == LZMA_FILTER_COPY - || coder->filter_flags.id - == LZMA_FILTER_SUBBLOCK) - return LZMA_DATA_ERROR; - - coder->helper.end_was_reached = false; - - lzma_options_filter filters[3] = { - { - .id = coder->filter_flags.id, - .options = coder->filter_flags.options, - }, { - .id = LZMA_FILTER_SUBBLOCK_HELPER, - .options = &coder->helper, - }, { - .id = LZMA_VLI_VALUE_UNKNOWN, - .options = NULL, - } - }; - - // Optimization: We know that LZMA uses End of Payload Marker - // (not End of Input), so we can omit the helper filter. - if (filters[0].id == LZMA_FILTER_LZMA) - filters[1].id = LZMA_VLI_VALUE_UNKNOWN; - - return_if_error(lzma_raw_decoder_init( - &coder->subfilter, allocator, - filters, LZMA_VLI_VALUE_UNKNOWN, false)); - - coder->sequence = SEQ_FLAGS; - break; - } - - case SEQ_FILTER_END: - // We are in the beginning of a Subblock. The next Subblock - // whose type is not Padding, must indicate end of Subfilter. - if (in[*in_pos] == (FLAG_PADDING << 4)) { - ++*in_pos; - break; - } - - if (in[*in_pos] != (FLAG_END_SUBFILTER << 4)) - return LZMA_DATA_ERROR; - - coder->sequence = SEQ_FLAGS; - break; - default: return LZMA_PROG_ERROR; }