mirror of
https://git.tukaani.org/xz.git
synced 2024-04-04 12:36:23 +02:00
215 lines
6.7 KiB
C
215 lines
6.7 KiB
C
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
/// \file lz_decoder.h
|
||
|
/// \brief LZ out window
|
||
|
//
|
||
|
// Copyright (C) 1999-2006 Igor Pavlov
|
||
|
// Copyright (C) 2007 Lasse Collin
|
||
|
//
|
||
|
// This library is free software; you can redistribute it and/or
|
||
|
// modify it under the terms of the GNU Lesser General Public
|
||
|
// License as published by the Free Software Foundation; either
|
||
|
// version 2.1 of the License, or (at your option) any later version.
|
||
|
//
|
||
|
// This library is distributed in the hope that it will be useful,
|
||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
// Lesser General Public License for more details.
|
||
|
//
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
#ifndef LZMA_LZ_OUT_H
|
||
|
#define LZMA_LZ_OUT_H
|
||
|
|
||
|
#include "common.h"
|
||
|
|
||
|
|
||
|
/// Get a byte from the history buffer.
|
||
|
#define lz_get_byte(lz, distance) \
|
||
|
((distance) < (lz).pos \
|
||
|
? (lz).dict[(lz).pos - (distance) - 1] \
|
||
|
: (lz).dict[(lz).pos - (distance) - 1 + (lz).end])
|
||
|
|
||
|
|
||
|
#define LZMA_LZ_DECODER_INIT \
|
||
|
(lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 }
|
||
|
|
||
|
|
||
|
typedef struct {
|
||
|
/// Function to do the actual decoding (LZMA or Inflate)
|
||
|
bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in,
|
||
|
size_t *restrict in_pos, size_t size_in,
|
||
|
bool has_safe_buffer);
|
||
|
|
||
|
/// Pointer to dictionary (history) buffer.
|
||
|
/// \note Not 'restrict' because can alias next_out.
|
||
|
uint8_t *dict;
|
||
|
|
||
|
/// Next write goes to dict[pos].
|
||
|
size_t pos;
|
||
|
|
||
|
/// Next byte to flush is buffer[start].
|
||
|
size_t start;
|
||
|
|
||
|
/// First byte to not flush is buffer[end].
|
||
|
size_t end;
|
||
|
|
||
|
/// First position to which data must not be written.
|
||
|
size_t limit;
|
||
|
|
||
|
/// True if dictionary has needed wrapping.
|
||
|
bool is_full;
|
||
|
|
||
|
/// True if process() has detected End of Payload Marker.
|
||
|
bool eopm_detected;
|
||
|
|
||
|
/// True if the next coder in the chain has returned LZMA_STREAM_END.
|
||
|
bool next_finished;
|
||
|
|
||
|
/// True if the LZ decoder (e.g. LZMA) has detected End of Payload
|
||
|
/// Marker. This may become true before next_finished becomes true.
|
||
|
bool this_finished;
|
||
|
|
||
|
/// When pos >= must_flush_pos, we must not call process().
|
||
|
size_t must_flush_pos;
|
||
|
|
||
|
/// Maximum number of bytes that a single decoding loop inside
|
||
|
/// process() can produce data into dict. This amount is kept
|
||
|
/// always available at dict + pos i.e. it is safe to write a byte
|
||
|
/// to dict[pos + match_max_len - 1].
|
||
|
size_t match_max_len;
|
||
|
|
||
|
/// Number of bytes allocated to dict.
|
||
|
size_t size;
|
||
|
|
||
|
/// Requested size of the dictionary. This is needed because we avoid
|
||
|
/// using extremely tiny history buffers.
|
||
|
size_t requested_size;
|
||
|
|
||
|
/// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown.
|
||
|
lzma_vli uncompressed_size;
|
||
|
|
||
|
/// Number of bytes currently in temp[].
|
||
|
size_t temp_size;
|
||
|
|
||
|
/// Temporary buffer needed when
|
||
|
/// 1) we cannot make the input buffer completely empty; or
|
||
|
/// 2) we are not the last filter in the chain.
|
||
|
uint8_t temp[LZMA_BUFFER_SIZE];
|
||
|
|
||
|
} lzma_lz_decoder;
|
||
|
|
||
|
|
||
|
/////////////////////////
|
||
|
// Function prototypes //
|
||
|
/////////////////////////
|
||
|
|
||
|
extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz,
|
||
|
lzma_allocator *allocator, bool (*process)(
|
||
|
lzma_coder *restrict coder, const uint8_t *restrict in,
|
||
|
size_t *restrict in_pos, size_t in_size,
|
||
|
bool has_safe_buffer),
|
||
|
lzma_vli uncompressed_size,
|
||
|
size_t history_size, size_t match_max_len);
|
||
|
|
||
|
extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator,
|
||
|
const uint8_t *restrict in, size_t *restrict in_pos,
|
||
|
size_t in_size, uint8_t *restrict out,
|
||
|
size_t *restrict out_pos, size_t out_size,
|
||
|
lzma_action action);
|
||
|
|
||
|
/// Deallocates the history buffer if one exists.
|
||
|
extern void lzma_lz_decoder_end(
|
||
|
lzma_lz_decoder *lz, lzma_allocator *allocator);
|
||
|
|
||
|
//////////////////////
|
||
|
// Inline functions //
|
||
|
//////////////////////
|
||
|
|
||
|
// Repeat a block of data from the history. Because memcpy() is faster
|
||
|
// than copying byte by byte in a loop, the copying process gets split
|
||
|
// into three cases:
|
||
|
// 1. distance < length
|
||
|
// Source and target areas overlap, thus we can't use memcpy()
|
||
|
// (nor memmove()) safely.
|
||
|
// TODO: If this is common enough, it might be worth optimizing this
|
||
|
// more e.g. by checking if distance > sizeof(uint8_t*) and using
|
||
|
// memcpy in small chunks.
|
||
|
// 2. distance < pos
|
||
|
// This is the easiest and the fastest case. The block being copied
|
||
|
// is a contiguous piece in the history buffer. The buffer offset
|
||
|
// doesn't need wrapping.
|
||
|
// 3. distance >= pos
|
||
|
// We need to wrap the position, because otherwise we would try copying
|
||
|
// behind the first byte of the allocated buffer. It is possible that
|
||
|
// the block is fragmeneted into two pieces, thus we might need to call
|
||
|
// memcpy() twice.
|
||
|
// NOTE: The function using this macro must ensure that length is positive
|
||
|
// and that distance is FIXME
|
||
|
static inline bool
|
||
|
lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length)
|
||
|
{
|
||
|
// Validate offset of the block to be repeated. It doesn't
|
||
|
// make sense to copy data behind the beginning of the stream.
|
||
|
// Leaving this check away would lead to a security problem,
|
||
|
// in which e.g. the data of the previously decoded file(s)
|
||
|
// would be leaked (or whatever happens to be in unused
|
||
|
// part of the dictionary buffer).
|
||
|
if (distance >= lz->pos && !lz->is_full)
|
||
|
return false;
|
||
|
|
||
|
// It also doesn't make sense to copy data farer than
|
||
|
// the dictionary size.
|
||
|
if (distance >= lz->requested_size)
|
||
|
return false;
|
||
|
|
||
|
// The caller must have checked these!
|
||
|
assert(distance <= lz->size);
|
||
|
assert(length > 0);
|
||
|
assert(length <= lz->match_max_len);
|
||
|
|
||
|
// Copy the amount of data requested by the decoder.
|
||
|
if (distance < length) {
|
||
|
// Source and target areas overlap, thus we can't use
|
||
|
// memcpy() nor even memmove() safely. :-(
|
||
|
// TODO: Copying byte by byte is slow. It might be
|
||
|
// worth optimizing this more if this case is common.
|
||
|
do {
|
||
|
lz->dict[lz->pos] = lz_get_byte(*lz, distance);
|
||
|
++lz->pos;
|
||
|
} while (--length > 0);
|
||
|
|
||
|
} else if (distance < lz->pos) {
|
||
|
// The easiest and fastest case
|
||
|
memcpy(lz->dict + lz->pos,
|
||
|
lz->dict + lz->pos - distance - 1,
|
||
|
length);
|
||
|
lz->pos += length;
|
||
|
|
||
|
} else {
|
||
|
// The bigger the dictionary, the more rare this
|
||
|
// case occurs. We need to "wrap" the dict, thus
|
||
|
// we might need two memcpy() to copy all the data.
|
||
|
assert(lz->is_full);
|
||
|
const uint32_t copy_pos = lz->pos - distance - 1 + lz->end;
|
||
|
uint32_t copy_size = lz->end - copy_pos;
|
||
|
|
||
|
if (copy_size < length) {
|
||
|
memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
|
||
|
copy_size);
|
||
|
lz->pos += copy_size;
|
||
|
copy_size = length - copy_size;
|
||
|
memcpy(lz->dict + lz->pos, lz->dict, copy_size);
|
||
|
lz->pos += copy_size;
|
||
|
} else {
|
||
|
memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
|
||
|
length);
|
||
|
lz->pos += length;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
#endif
|