xz-archive/src/liblzma/lz/lz_decoder.h

///////////////////////////////////////////////////////////////////////////////
//
/// \file       lz_decoder.h
/// \brief      LZ out window
//
//  Copyright (C) 1999-2006 Igor Pavlov
//  Copyright (C) 2007 Lasse Collin
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////

#ifndef LZMA_LZ_OUT_H
#define LZMA_LZ_OUT_H

#include "common.h"


/// Get a byte from the history buffer.
#define lz_get_byte(lz, distance) \
	((distance) < (lz).pos \
		? (lz).dict[(lz).pos - (distance) - 1] \
		: (lz).dict[(lz).pos - (distance) - 1 + (lz).end])


/// Test if dictionary is empty.
#define lz_is_empty(lz) \
	((lz).pos == 0 && !(lz).is_full)


#define LZMA_LZ_DECODER_INIT \
	(lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 }


typedef struct {
	/// Function to do the actual decoding (LZMA or Inflate)
	bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in,
			size_t *restrict in_pos, size_t size_in,
			bool has_safe_buffer);

	/// Pointer to dictionary (history) buffer.
	/// \note Not 'restrict' because can alias next_out.
	uint8_t *dict;

	/// Next write goes to dict[pos].
	size_t pos;

	/// Next byte to flush is buffer[start].
	size_t start;

	/// First byte to not flush is buffer[end].
	size_t end;

	/// First position to which data must not be written.
	size_t limit;

	/// True if dictionary has needed wrapping.
	bool is_full;

	/// True if process() has detected End of Payload Marker.
	bool eopm_detected;

	/// True if the next coder in the chain has returned LZMA_STREAM_END.
	bool next_finished;

	/// True if the LZ decoder (e.g. LZMA) has detected End of Payload
	/// Marker. This may become true before next_finished becomes true.
	bool this_finished;

	/// When pos >= must_flush_pos, we must not call process().
	size_t must_flush_pos;

	/// Maximum number of bytes that a single decoding loop inside
	/// process() can produce data into dict. This amount is kept
	/// always available at dict + pos i.e. it is safe to write a byte
	/// to dict[pos + match_max_len - 1].
	size_t match_max_len;

	/// Number of bytes allocated to dict.
	size_t size;

	/// Requested size of the dictionary. This is needed because we avoid
	/// using extremely tiny history buffers.
	size_t requested_size;

	/// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown.
	lzma_vli uncompressed_size;

	/// Number of bytes currently in temp[].
	size_t temp_size;

	/// Temporary buffer needed when
	/// 1) we cannot make the input buffer completely empty; or
	/// 2) we are not the last filter in the chain.
	uint8_t temp[LZMA_BUFFER_SIZE];

} lzma_lz_decoder;


/////////////////////////
// Function prototypes //
/////////////////////////

extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz,
		lzma_allocator *allocator, bool (*process)(
			lzma_coder *restrict coder, const uint8_t *restrict in,
			size_t *restrict in_pos, size_t in_size,
			bool has_safe_buffer),
		size_t history_size, size_t match_max_len);

extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator,
		const uint8_t *restrict in, size_t *restrict in_pos,
		size_t in_size, uint8_t *restrict out,
		size_t *restrict out_pos, size_t out_size,
		lzma_action action);

/// Deallocates the history buffer if one exists.
extern void lzma_lz_decoder_end(
		lzma_lz_decoder *lz, lzma_allocator *allocator);

//////////////////////
// Inline functions //
//////////////////////

// Repeat a block of data from the history. Because memcpy() is faster
// than copying byte by byte in a loop, the copying process gets split
// into three cases:
// 1. distance < length
//    Source and target areas overlap, thus we can't use memcpy()
//    (nor memmove()) safely.
//    TODO: If this is common enough, it might be worth optimizing this
//    more e.g. by checking if distance > sizeof(uint8_t*) and using
//    memcpy in small chunks.
// 2. distance < pos
//    This is the easiest and the fastest case. The block being copied
//    is a contiguous piece in the history buffer. The buffer offset
//    doesn't need wrapping.
// 3. distance >= pos
//    We need to wrap the position, because otherwise we would try copying
//    behind the first byte of the allocated buffer. It is possible that
//    the block is fragmeneted into two pieces, thus we might need to call
//    memcpy() twice.
// NOTE: The function using this macro must ensure that length is positive
// and that distance is FIXME
static inline bool
lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length)
{
	// Validate offset of the block to be repeated. It doesn't
	// make sense to copy data behind the beginning of the stream.
	// Leaving this check away would lead to a security problem,
	// in which e.g. the data of the previously decoded file(s)
	// would be leaked (or whatever happens to be in unused
	// part of the dictionary buffer).
	if (unlikely(distance >= lz->pos && !lz->is_full))
		return false;

	// It also doesn't make sense to copy data farer than
	// the dictionary size.
	if (unlikely(distance >= lz->requested_size))
		return false;

	// The caller must have checked these!
	assert(distance <= lz->size);
	assert(length > 0);
	assert(length <= lz->match_max_len);

	// Copy the amount of data requested by the decoder.
	if (distance < length) {
		// Source and target areas overlap, thus we can't use
		// memcpy() nor even memmove() safely. :-(
		// TODO: Copying byte by byte is slow. It might be
		// worth optimizing this more if this case is common.
		do {
			lz->dict[lz->pos] = lz_get_byte(*lz, distance);
			++lz->pos;
		} while (--length > 0);

	} else if (distance < lz->pos) {
		// The easiest and fastest case
		memcpy(lz->dict + lz->pos,
				lz->dict + lz->pos - distance - 1,
				length);
		lz->pos += length;

	} else {
		// The bigger the dictionary, the more rare this
		// case occurs. We need to "wrap" the dict, thus
		// we might need two memcpy() to copy all the data.
		assert(lz->is_full);
		const uint32_t copy_pos = lz->pos - distance - 1 + lz->end;
		uint32_t copy_size = lz->end - copy_pos;

		if (copy_size < length) {
			memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
					copy_size);
			lz->pos += copy_size;
			copy_size = length - copy_size;
			memcpy(lz->dict + lz->pos, lz->dict, copy_size);
			lz->pos += copy_size;
		} else {
			memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
					length);
			lz->pos += length;
		}
	}

	return true;
}

#endif
Imported to git. 2007-12-08 23:42:33 +01:00			`///////////////////////////////////////////////////////////////////////////////`
			`//`
			`/// \file lz_decoder.h`
			`/// \brief LZ out window`
			`//`
			`// Copyright (C) 1999-2006 Igor Pavlov`
			`// Copyright (C) 2007 Lasse Collin`
			`//`
			`// This library is free software; you can redistribute it and/or`
			`// modify it under the terms of the GNU Lesser General Public`
			`// License as published by the Free Software Foundation; either`
			`// version 2.1 of the License, or (at your option) any later version.`
			`//`
			`// This library is distributed in the hope that it will be useful,`
			`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`// Lesser General Public License for more details.`
			`//`
			`///////////////////////////////////////////////////////////////////////////////`

			`#ifndef LZMA_LZ_OUT_H`
			`#define LZMA_LZ_OUT_H`

			`#include "common.h"`


			`/// Get a byte from the history buffer.`
			`#define lz_get_byte(lz, distance) \`
			`((distance) < (lz).pos \`
			`? (lz).dict[(lz).pos - (distance) - 1] \`
			`: (lz).dict[(lz).pos - (distance) - 1 + (lz).end])`


Update the code to mostly match the new simpler file format specification. Simplify things by removing most of the support for known uncompressed size in most places. There are some miscellaneous changes here and there too. The API of liblzma has got many changes and still some more will be done soon. While most of the code has been updated, some things are not fixed (the command line tool will choke with invalid filter chain, if nothing else). Subblock filter is somewhat broken for now. It will be updated once the encoded format of the Subblock filter has been decided. 2008-06-18 17:02:10 +02:00			`/// Test if dictionary is empty.`
			`#define lz_is_empty(lz) \`
			`((lz).pos == 0 && !(lz).is_full)`


Imported to git. 2007-12-08 23:42:33 +01:00			`#define LZMA_LZ_DECODER_INIT \`
			`(lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 }`


			`typedef struct {`
			`/// Function to do the actual decoding (LZMA or Inflate)`
			`bool (process)(lzma_coder restrict coder, const uint8_t *restrict in,`
			`size_t *restrict in_pos, size_t size_in,`
			`bool has_safe_buffer);`

			`/// Pointer to dictionary (history) buffer.`
			`/// \note Not 'restrict' because can alias next_out.`
			`uint8_t *dict;`

			`/// Next write goes to dict[pos].`
			`size_t pos;`

			`/// Next byte to flush is buffer[start].`
			`size_t start;`

			`/// First byte to not flush is buffer[end].`
			`size_t end;`

			`/// First position to which data must not be written.`
			`size_t limit;`

			`/// True if dictionary has needed wrapping.`
			`bool is_full;`

			`/// True if process() has detected End of Payload Marker.`
			`bool eopm_detected;`

			`/// True if the next coder in the chain has returned LZMA_STREAM_END.`
			`bool next_finished;`

			`/// True if the LZ decoder (e.g. LZMA) has detected End of Payload`
			`/// Marker. This may become true before next_finished becomes true.`
			`bool this_finished;`

			`/// When pos >= must_flush_pos, we must not call process().`
			`size_t must_flush_pos;`

			`/// Maximum number of bytes that a single decoding loop inside`
			`/// process() can produce data into dict. This amount is kept`
			`/// always available at dict + pos i.e. it is safe to write a byte`
			`/// to dict[pos + match_max_len - 1].`
			`size_t match_max_len;`

			`/// Number of bytes allocated to dict.`
			`size_t size;`

			`/// Requested size of the dictionary. This is needed because we avoid`
			`/// using extremely tiny history buffers.`
			`size_t requested_size;`

			`/// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown.`
			`lzma_vli uncompressed_size;`

			`/// Number of bytes currently in temp[].`
			`size_t temp_size;`

			`/// Temporary buffer needed when`
			`/// 1) we cannot make the input buffer completely empty; or`
			`/// 2) we are not the last filter in the chain.`
			`uint8_t temp[LZMA_BUFFER_SIZE];`

			`} lzma_lz_decoder;`


			`/////////////////////////`
			`// Function prototypes //`
			`/////////////////////////`

			`extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz,`
			`lzma_allocator allocator, bool (process)(`
			`lzma_coder restrict coder, const uint8_t restrict in,`
			`size_t *restrict in_pos, size_t in_size,`
			`bool has_safe_buffer),`
			`size_t history_size, size_t match_max_len);`

			`extern lzma_ret lzma_lz_decode(lzma_coder coder, lzma_allocator allocator,`
			`const uint8_t restrict in, size_t restrict in_pos,`
			`size_t in_size, uint8_t *restrict out,`
			`size_t *restrict out_pos, size_t out_size,`
			`lzma_action action);`

			`/// Deallocates the history buffer if one exists.`
			`extern void lzma_lz_decoder_end(`
			`lzma_lz_decoder lz, lzma_allocator allocator);`

			`//////////////////////`
			`// Inline functions //`
			`//////////////////////`

			`// Repeat a block of data from the history. Because memcpy() is faster`
			`// than copying byte by byte in a loop, the copying process gets split`
			`// into three cases:`
			`// 1. distance < length`
			`// Source and target areas overlap, thus we can't use memcpy()`
			`// (nor memmove()) safely.`
			`// TODO: If this is common enough, it might be worth optimizing this`
			`// more e.g. by checking if distance > sizeof(uint8_t*) and using`
			`// memcpy in small chunks.`
			`// 2. distance < pos`
			`// This is the easiest and the fastest case. The block being copied`
			`// is a contiguous piece in the history buffer. The buffer offset`
			`// doesn't need wrapping.`
			`// 3. distance >= pos`
			`// We need to wrap the position, because otherwise we would try copying`
			`// behind the first byte of the allocated buffer. It is possible that`
			`// the block is fragmeneted into two pieces, thus we might need to call`
			`// memcpy() twice.`
			`// NOTE: The function using this macro must ensure that length is positive`
			`// and that distance is FIXME`
			`static inline bool`
			`lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length)`
			`{`
			`// Validate offset of the block to be repeated. It doesn't`
			`// make sense to copy data behind the beginning of the stream.`
			`// Leaving this check away would lead to a security problem,`
			`// in which e.g. the data of the previously decoded file(s)`
			`// would be leaked (or whatever happens to be in unused`
			`// part of the dictionary buffer).`
Update the code to mostly match the new simpler file format specification. Simplify things by removing most of the support for known uncompressed size in most places. There are some miscellaneous changes here and there too. The API of liblzma has got many changes and still some more will be done soon. While most of the code has been updated, some things are not fixed (the command line tool will choke with invalid filter chain, if nothing else). Subblock filter is somewhat broken for now. It will be updated once the encoded format of the Subblock filter has been decided. 2008-06-18 17:02:10 +02:00			`if (unlikely(distance >= lz->pos && !lz->is_full))`
Imported to git. 2007-12-08 23:42:33 +01:00			`return false;`

			`// It also doesn't make sense to copy data farer than`
			`// the dictionary size.`
Update the code to mostly match the new simpler file format specification. Simplify things by removing most of the support for known uncompressed size in most places. There are some miscellaneous changes here and there too. The API of liblzma has got many changes and still some more will be done soon. While most of the code has been updated, some things are not fixed (the command line tool will choke with invalid filter chain, if nothing else). Subblock filter is somewhat broken for now. It will be updated once the encoded format of the Subblock filter has been decided. 2008-06-18 17:02:10 +02:00			`if (unlikely(distance >= lz->requested_size))`
Imported to git. 2007-12-08 23:42:33 +01:00			`return false;`

			`// The caller must have checked these!`
			`assert(distance <= lz->size);`
			`assert(length > 0);`
			`assert(length <= lz->match_max_len);`

			`// Copy the amount of data requested by the decoder.`
			`if (distance < length) {`
			`// Source and target areas overlap, thus we can't use`
			`// memcpy() nor even memmove() safely. :-(`
			`// TODO: Copying byte by byte is slow. It might be`
			`// worth optimizing this more if this case is common.`
			`do {`
			`lz->dict[lz->pos] = lz_get_byte(*lz, distance);`
			`++lz->pos;`
			`} while (--length > 0);`

			`} else if (distance < lz->pos) {`
			`// The easiest and fastest case`
			`memcpy(lz->dict + lz->pos,`
			`lz->dict + lz->pos - distance - 1,`
			`length);`
			`lz->pos += length;`

			`} else {`
			`// The bigger the dictionary, the more rare this`
			`// case occurs. We need to "wrap" the dict, thus`
			`// we might need two memcpy() to copy all the data.`
			`assert(lz->is_full);`
			`const uint32_t copy_pos = lz->pos - distance - 1 + lz->end;`
			`uint32_t copy_size = lz->end - copy_pos;`

			`if (copy_size < length) {`
			`memcpy(lz->dict + lz->pos, lz->dict + copy_pos,`
			`copy_size);`
			`lz->pos += copy_size;`
			`copy_size = length - copy_size;`
			`memcpy(lz->dict + lz->pos, lz->dict, copy_size);`
			`lz->pos += copy_size;`
			`} else {`
			`memcpy(lz->dict + lz->pos, lz->dict + copy_pos,`
			`length);`
			`lz->pos += length;`
			`}`
			`}`

			`return true;`
			`}`

			`#endif`