1
0
Fork 0
mirror of https://git.tukaani.org/xz.git synced 2024-04-04 12:36:23 +02:00
xz-archive/src/liblzma/lz/lz_encoder.h
Lasse Collin 369f72fd65 Fix a buffer overflow in the LZMA encoder. It was due to my
misunderstanding of the code. There's no tiny fix for this
problem, so I also cleaned up the code in general.

This reduces the speed of the encoder 2-5 % in the fastest
compression mode ("lzma -1"). High compression modes should
have no noticeable performance difference.

This commit breaks things (especially LZMA_SYNC_FLUSH) but I
will fix them once the new format and LZMA2 has been roughly
implemented. Plain LZMA won't support LZMA_SYNC_FLUSH at all
and won't be supported in the new .lzma format. This may
change still but this is what it looks like now.

Support for known uncompressed size (that is, LZMA or LZMA2
without EOPM) is likely to go away. This means there will
be API changes.
2008-06-01 12:48:17 +03:00

155 lines
5.2 KiB
C

///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder.h
/// \brief LZ in window and match finder API
//
// Copyright (C) 1999-2006 Igor Pavlov
// Copyright (C) 2007 Lasse Collin
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZ_ENCODER_H
#define LZMA_LZ_ENCODER_H
#include "common.h"
typedef struct lzma_lz_encoder_s lzma_lz_encoder;
struct lzma_lz_encoder_s {
enum {
SEQ_RUN,
SEQ_FLUSH,
SEQ_FINISH,
} sequence;
/// Function to do the actual encoding from the sliding input window
/// to the output stream.
bool (*process)(lzma_coder *coder, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size);
///////////////
// In Window //
///////////////
/// Pointer to buffer with data to be compressed
uint8_t *buffer;
/// Total size of the allocated buffer (that is, including all
/// the extra space)
size_t size;
/// Match finders store locations of matches using 32-bit integers.
/// To avoid adjusting several megabytes of integers every time the
/// input window is moved with move_window(), we only adjust the
/// offset of the buffer. Thus, buffer[match_finder_pos - offset]
/// is the byte pointed by match_finder_pos.
size_t offset;
/// buffer[read_pos] is the current byte.
size_t read_pos;
/// As long as read_pos is less than read_limit, there is enough
/// input available in buffer for at least one encoding loop.
///
/// Because of the stateful API, read_limit may and will get greater
/// than read_pos quite often. This is taken into account when
/// calculating the value for keep_size_after.
size_t read_limit;
/// buffer[write_pos] is the first byte that doesn't contain valid
/// uncompressed data; that is, the next input byte will be copied
/// to buffer[write_pos].
size_t write_pos;
/// Number of bytes not hashed before read_pos. This is needed to
/// restart the match finder after LZMA_SYNC_FLUSH.
size_t pending;
/// Number of bytes that must be kept available in our input history.
/// That is, once keep_size_before bytes have been processed,
/// buffer[read_pos - keep_size_before] is the oldest byte that
/// must be available for reading.
size_t keep_size_before;
/// Number of bytes that must be kept in buffer after read_pos.
/// That is, read_pos <= write_pos - keep_size_after as long as
/// stream_end_was_reached is false (once it is true, read_pos
/// is allowed to reach write_pos).
size_t keep_size_after;
//////////////////
// Match Finder //
//////////////////
// Pointers to match finder functions
void (*get_matches)(lzma_lz_encoder *restrict lz,
uint32_t *restrict distances);
void (*skip)(lzma_lz_encoder *restrict lz, uint32_t num);
// Match finder data
uint32_t *hash; // TODO: Check if hash aliases son
uint32_t *son; // and add 'restrict' if possible.
uint32_t cyclic_buffer_pos;
uint32_t cyclic_buffer_size; // Must be dictionary_size + 1.
uint32_t hash_mask;
uint32_t cut_value;
uint32_t hash_size_sum;
uint32_t num_items;
uint32_t match_max_len;
};
#define LZMA_LZ_ENCODER_INIT \
(lzma_lz_encoder){ \
.buffer = NULL, \
.size = 0, \
.hash = NULL, \
.num_items = 0, \
}
/// Calculates
extern bool lzma_lz_encoder_hash_properties(lzma_match_finder match_finder,
uint32_t history_size, uint32_t *restrict hash_mask,
uint32_t *restrict hash_size_sum,
uint32_t *restrict num_items);
// NOTE: liblzma doesn't use callback API like LZMA SDK does. The caller
// must make sure that keep_size_after is big enough for single encoding pass
// i.e. keep_size_after >= maximum number of bytes possibly needed after
// the current position between calls to lzma_lz_read().
extern lzma_ret lzma_lz_encoder_reset(lzma_lz_encoder *lz,
lzma_allocator *allocator,
bool (*process)(lzma_coder *coder, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size),
size_t history_size, size_t additional_buffer_before,
size_t match_max_len, size_t additional_buffer_after,
lzma_match_finder match_finder, uint32_t match_finder_cycles,
const uint8_t *preset_dictionary,
size_t preset_dictionary_size);
/// Frees memory allocated for in window and match finder buffers.
extern void lzma_lz_encoder_end(
lzma_lz_encoder *lz, lzma_allocator *allocator);
extern lzma_ret lzma_lz_encode(lzma_coder *coder,
lzma_allocator *allocator lzma_attribute((unused)),
const uint8_t *restrict in, size_t *restrict in_pos,
size_t in_size, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size,
lzma_action action);
/// This should not be called directly, but only via move_pos() macro.
extern void lzma_lz_encoder_normalize(lzma_lz_encoder *lz);
#endif