1
0
Fork 0
mirror of https://git.tukaani.org/xz.git synced 2024-04-04 12:36:23 +02:00

Added initial support for preset dictionary for raw LZMA1

and LZMA2. It is not supported by the .xz format or the xz
command line tool yet.
This commit is contained in:
Lasse Collin 2009-01-27 18:36:05 +02:00
parent 449b8c832b
commit f76e39cf93
8 changed files with 77 additions and 27 deletions

View file

@ -210,7 +210,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
const lzma_filter_info *filters, const lzma_filter_info *filters,
lzma_ret (*lz_init)(lzma_lz_decoder *lz, lzma_ret (*lz_init)(lzma_lz_decoder *lz,
lzma_allocator *allocator, const void *options, lzma_allocator *allocator, const void *options,
size_t *dict_size)) lzma_lz_options *lz_options))
{ {
// Allocate the base structure if it isn't already allocated. // Allocate the base structure if it isn't already allocated.
if (next->coder == NULL) { if (next->coder == NULL) {
@ -229,17 +229,17 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
// Allocate and initialize the LZ-based decoder. It will also give // Allocate and initialize the LZ-based decoder. It will also give
// us the dictionary size. // us the dictionary size.
size_t dict_size; lzma_lz_options lz_options;
return_if_error(lz_init(&next->coder->lz, allocator, return_if_error(lz_init(&next->coder->lz, allocator,
filters[0].options, &dict_size)); filters[0].options, &lz_options));
// If the dictionary size is very small, increase it to 4096 bytes. // If the dictionary size is very small, increase it to 4096 bytes.
// This is to prevent constant wrapping of the dictionary, which // This is to prevent constant wrapping of the dictionary, which
// would slow things down. The downside is that since we don't check // would slow things down. The downside is that since we don't check
// separately for the real dictionary size, we may happily accept // separately for the real dictionary size, we may happily accept
// corrupt files. // corrupt files.
if (dict_size < 4096) if (lz_options.dict_size < 4096)
dict_size = 4096; lz_options.dict_size = 4096;
// Make dictionary size a multipe of 16. Some LZ-based decoders like // Make dictionary size a multipe of 16. Some LZ-based decoders like
// LZMA use the lowest bits lzma_dict.pos to know the alignment of the // LZMA use the lowest bits lzma_dict.pos to know the alignment of the
@ -248,23 +248,38 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
// recommended to give aligned buffers to liblzma. // recommended to give aligned buffers to liblzma.
// //
// Avoid integer overflow. // Avoid integer overflow.
if (dict_size > SIZE_MAX - 15) if (lz_options.dict_size > SIZE_MAX - 15)
return LZMA_MEM_ERROR; return LZMA_MEM_ERROR;
dict_size = (dict_size + 15) & ~((size_t)(15)); lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
// Allocate and initialize the dictionary. // Allocate and initialize the dictionary.
if (next->coder->dict.size != dict_size) { if (next->coder->dict.size != lz_options.dict_size) {
lzma_free(next->coder->dict.buf, allocator); lzma_free(next->coder->dict.buf, allocator);
next->coder->dict.buf = lzma_alloc(dict_size, allocator); next->coder->dict.buf
= lzma_alloc(lz_options.dict_size, allocator);
if (next->coder->dict.buf == NULL) if (next->coder->dict.buf == NULL)
return LZMA_MEM_ERROR; return LZMA_MEM_ERROR;
next->coder->dict.size = dict_size; next->coder->dict.size = lz_options.dict_size;
} }
lz_decoder_reset(next->coder); lz_decoder_reset(next->coder);
// Use the preset dictionary if it was given to us.
if (lz_options.preset_dict != NULL
&& lz_options.preset_dict_size > 0) {
// If the preset dictionary is bigger than the actual
// dictionary, copy only the tail.
const size_t copy_size = MIN(lz_options.preset_dict_size,
lz_options.dict_size);
const size_t offset = lz_options.preset_dict_size - copy_size;
memcpy(next->coder->dict.buf, lz_options.preset_dict + offset,
copy_size);
next->coder->dict.pos = copy_size;
next->coder->dict.full = copy_size;
}
// Miscellaneous initializations // Miscellaneous initializations
next->coder->next_finished = false; next->coder->next_finished = false;
next->coder->this_finished = false; next->coder->this_finished = false;

View file

@ -51,6 +51,13 @@ typedef struct {
} lzma_dict; } lzma_dict;
typedef struct {
size_t dict_size;
const uint8_t *preset_dict;
size_t preset_dict_size;
} lzma_lz_options;
typedef struct { typedef struct {
/// Data specific to the LZ-based decoder /// Data specific to the LZ-based decoder
lzma_coder *coder; lzma_coder *coder;
@ -86,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
lzma_allocator *allocator, const lzma_filter_info *filters, lzma_allocator *allocator, const lzma_filter_info *filters,
lzma_ret (*lz_init)(lzma_lz_decoder *lz, lzma_ret (*lz_init)(lzma_lz_decoder *lz,
lzma_allocator *allocator, const void *options, lzma_allocator *allocator, const void *options,
size_t *dict_size)); lzma_lz_options *lz_options));
extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);

View file

@ -363,7 +363,8 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
static bool static bool
lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator) lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
const lzma_lz_options *lz_options)
{ {
// Allocate the history buffer. // Allocate the history buffer.
if (mf->buffer == NULL) { if (mf->buffer == NULL) {
@ -421,6 +422,19 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
// we avoid wasting RAM and improve initialization speed a lot. // we avoid wasting RAM and improve initialization speed a lot.
//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
// Handle preset dictionary.
if (lz_options->preset_dict != NULL
&& lz_options->preset_dict_size > 0) {
// If the preset dictionary is bigger than the actual
// dictionary, use only the tail.
mf->write_pos = MIN(lz_options->preset_dict_size, mf->size);
memcpy(mf->buffer, lz_options->preset_dict
+ lz_options->preset_dict_size - mf->write_pos,
mf->write_pos);
mf->action = LZMA_SYNC_FLUSH;
mf->skip(mf, mf->write_pos);
}
mf->action = LZMA_RUN; mf->action = LZMA_RUN;
return false; return false;
@ -509,7 +523,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
// Allocate new buffers if needed, and do the rest of // Allocate new buffers if needed, and do the rest of
// the initialization. // the initialization.
if (lz_encoder_init(&next->coder->mf, allocator)) if (lz_encoder_init(&next->coder->mf, allocator, &lz_options))
return LZMA_MEM_ERROR; return LZMA_MEM_ERROR;
// Initialize the next filter in the chain, if any. // Initialize the next filter in the chain, if any.

View file

@ -230,7 +230,7 @@ lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
static lzma_ret static lzma_ret
lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *options, size_t *dict_size) const void *opt, lzma_lz_options *lz_options)
{ {
if (lz->coder == NULL) { if (lz->coder == NULL) {
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
@ -243,12 +243,15 @@ lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
lz->coder->lzma = LZMA_LZ_DECODER_INIT; lz->coder->lzma = LZMA_LZ_DECODER_INIT;
} }
const lzma_options_lzma *options = opt;
lz->coder->sequence = SEQ_CONTROL; lz->coder->sequence = SEQ_CONTROL;
lz->coder->need_properties = true; lz->coder->need_properties = true;
lz->coder->need_dictionary_reset = true; lz->coder->need_dictionary_reset = options->preset_dict == NULL
|| options->preset_dict_size == 0;
return lzma_lzma_decoder_create(&lz->coder->lzma, return lzma_lzma_decoder_create(&lz->coder->lzma,
allocator, options, dict_size); allocator, options, lz_options);
} }

View file

@ -318,15 +318,17 @@ lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
lz->coder->lzma = NULL; lz->coder->lzma = NULL;
} }
lz->coder->sequence = SEQ_INIT;
lz->coder->need_properties = true;
lz->coder->need_state_reset = false;
lz->coder->need_dictionary_reset = true;
lz->coder->opt_cur = *(const lzma_options_lzma *)(options); lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
lz->coder->opt_new = lz->coder->opt_cur.persistent lz->coder->opt_new = lz->coder->opt_cur.persistent
? options : NULL; ? options : NULL;
lz->coder->sequence = SEQ_INIT;
lz->coder->need_properties = true;
lz->coder->need_state_reset = false;
lz->coder->need_dictionary_reset
= lz->coder->opt_cur.preset_dict == NULL
|| lz->coder->opt_cur.preset_dict_size == 0;
// Initialize LZMA encoder // Initialize LZMA encoder
return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator, return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
&lz->coder->opt_cur, lz_options)); &lz->coder->opt_cur, lz_options));

View file

@ -941,7 +941,7 @@ lzma_decoder_reset(lzma_coder *coder, const void *opt)
extern lzma_ret extern lzma_ret
lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *opt, size_t *dict_size) const void *opt, lzma_lz_options *lz_options)
{ {
if (lz->coder == NULL) { if (lz->coder == NULL) {
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
@ -956,7 +956,9 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
// All dictionary sizes are OK here. LZ decoder will take care of // All dictionary sizes are OK here. LZ decoder will take care of
// the special cases. // the special cases.
const lzma_options_lzma *options = opt; const lzma_options_lzma *options = opt;
*dict_size = options->dict_size; lz_options->dict_size = options->dict_size;
lz_options->preset_dict = options->preset_dict;
lz_options->preset_dict_size = options->preset_dict_size;
return LZMA_OK; return LZMA_OK;
} }
@ -967,13 +969,13 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
/// the LZ initialization). /// the LZ initialization).
static lzma_ret static lzma_ret
lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *options, size_t *dict_size) const void *options, lzma_lz_options *lz_options)
{ {
if (!is_lclppb_valid(options)) if (!is_lclppb_valid(options))
return LZMA_PROG_ERROR; return LZMA_PROG_ERROR;
return_if_error(lzma_lzma_decoder_create( return_if_error(lzma_lzma_decoder_create(
lz, allocator, options, dict_size)); lz, allocator, options, lz_options));
lzma_decoder_reset(lz->coder, options); lzma_decoder_reset(lz->coder, options);
lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN); lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);

View file

@ -48,7 +48,7 @@ extern bool lzma_lzma_lclppb_decode(
/// LZMA2 decoders. /// LZMA2 decoders.
extern lzma_ret lzma_lzma_decoder_create( extern lzma_ret lzma_lzma_decoder_create(
lzma_lz_decoder *lz, lzma_allocator *allocator, lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *opt, size_t *dict_size); const void *opt, lzma_lz_options *lz_options);
/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2 /// Gets memory usage without validating lc/lp/pb. This is used by LZMA2
/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb. /// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb.

View file

@ -274,6 +274,8 @@ encode_symbol(lzma_coder *coder, lzma_mf *mf,
static bool static bool
encode_init(lzma_coder *coder, lzma_mf *mf) encode_init(lzma_coder *coder, lzma_mf *mf)
{ {
assert(mf_position(mf) == 0);
if (mf->read_pos == mf->read_limit) { if (mf->read_pos == mf->read_limit) {
if (mf->action == LZMA_RUN) if (mf->action == LZMA_RUN)
return false; // We cannot do anything. return false; // We cannot do anything.
@ -594,7 +596,12 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
return LZMA_OPTIONS_ERROR; return LZMA_OPTIONS_ERROR;
} }
coder->is_initialized = false; // We don't need to write the first byte as literal if there is
// a non-empty preset dictionary. encode_init() wouldn't even work
// if there is a non-empty preset dictionary, because encode_init()
// assumes that position is zero and previous byte is also zero.
coder->is_initialized = options->preset_dict != NULL
&& options->preset_dict_size > 0;
coder->is_flushed = false; coder->is_flushed = false;
set_lz_options(lz_options, options); set_lz_options(lz_options, options);