mirror of
https://git.tukaani.org/xz.git
synced 2024-04-04 12:36:23 +02:00
Added initial support for preset dictionary for raw LZMA1
and LZMA2. It is not supported by the .xz format or the xz command line tool yet.
This commit is contained in:
parent
449b8c832b
commit
f76e39cf93
8 changed files with 77 additions and 27 deletions
|
@ -210,7 +210,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|||
const lzma_filter_info *filters,
|
||||
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
|
||||
lzma_allocator *allocator, const void *options,
|
||||
size_t *dict_size))
|
||||
lzma_lz_options *lz_options))
|
||||
{
|
||||
// Allocate the base structure if it isn't already allocated.
|
||||
if (next->coder == NULL) {
|
||||
|
@ -229,17 +229,17 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|||
|
||||
// Allocate and initialize the LZ-based decoder. It will also give
|
||||
// us the dictionary size.
|
||||
size_t dict_size;
|
||||
lzma_lz_options lz_options;
|
||||
return_if_error(lz_init(&next->coder->lz, allocator,
|
||||
filters[0].options, &dict_size));
|
||||
filters[0].options, &lz_options));
|
||||
|
||||
// If the dictionary size is very small, increase it to 4096 bytes.
|
||||
// This is to prevent constant wrapping of the dictionary, which
|
||||
// would slow things down. The downside is that since we don't check
|
||||
// separately for the real dictionary size, we may happily accept
|
||||
// corrupt files.
|
||||
if (dict_size < 4096)
|
||||
dict_size = 4096;
|
||||
if (lz_options.dict_size < 4096)
|
||||
lz_options.dict_size = 4096;
|
||||
|
||||
// Make dictionary size a multipe of 16. Some LZ-based decoders like
|
||||
// LZMA use the lowest bits lzma_dict.pos to know the alignment of the
|
||||
|
@ -248,23 +248,38 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|||
// recommended to give aligned buffers to liblzma.
|
||||
//
|
||||
// Avoid integer overflow.
|
||||
if (dict_size > SIZE_MAX - 15)
|
||||
if (lz_options.dict_size > SIZE_MAX - 15)
|
||||
return LZMA_MEM_ERROR;
|
||||
|
||||
dict_size = (dict_size + 15) & ~((size_t)(15));
|
||||
lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
|
||||
|
||||
// Allocate and initialize the dictionary.
|
||||
if (next->coder->dict.size != dict_size) {
|
||||
if (next->coder->dict.size != lz_options.dict_size) {
|
||||
lzma_free(next->coder->dict.buf, allocator);
|
||||
next->coder->dict.buf = lzma_alloc(dict_size, allocator);
|
||||
next->coder->dict.buf
|
||||
= lzma_alloc(lz_options.dict_size, allocator);
|
||||
if (next->coder->dict.buf == NULL)
|
||||
return LZMA_MEM_ERROR;
|
||||
|
||||
next->coder->dict.size = dict_size;
|
||||
next->coder->dict.size = lz_options.dict_size;
|
||||
}
|
||||
|
||||
lz_decoder_reset(next->coder);
|
||||
|
||||
// Use the preset dictionary if it was given to us.
|
||||
if (lz_options.preset_dict != NULL
|
||||
&& lz_options.preset_dict_size > 0) {
|
||||
// If the preset dictionary is bigger than the actual
|
||||
// dictionary, copy only the tail.
|
||||
const size_t copy_size = MIN(lz_options.preset_dict_size,
|
||||
lz_options.dict_size);
|
||||
const size_t offset = lz_options.preset_dict_size - copy_size;
|
||||
memcpy(next->coder->dict.buf, lz_options.preset_dict + offset,
|
||||
copy_size);
|
||||
next->coder->dict.pos = copy_size;
|
||||
next->coder->dict.full = copy_size;
|
||||
}
|
||||
|
||||
// Miscellaneous initializations
|
||||
next->coder->next_finished = false;
|
||||
next->coder->this_finished = false;
|
||||
|
|
|
@ -51,6 +51,13 @@ typedef struct {
|
|||
} lzma_dict;
|
||||
|
||||
|
||||
typedef struct {
|
||||
size_t dict_size;
|
||||
const uint8_t *preset_dict;
|
||||
size_t preset_dict_size;
|
||||
} lzma_lz_options;
|
||||
|
||||
|
||||
typedef struct {
|
||||
/// Data specific to the LZ-based decoder
|
||||
lzma_coder *coder;
|
||||
|
@ -86,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
|
|||
lzma_allocator *allocator, const lzma_filter_info *filters,
|
||||
lzma_ret (*lz_init)(lzma_lz_decoder *lz,
|
||||
lzma_allocator *allocator, const void *options,
|
||||
size_t *dict_size));
|
||||
lzma_lz_options *lz_options));
|
||||
|
||||
extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
|
||||
|
||||
|
|
|
@ -363,7 +363,8 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
|
|||
|
||||
|
||||
static bool
|
||||
lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
|
||||
lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
|
||||
const lzma_lz_options *lz_options)
|
||||
{
|
||||
// Allocate the history buffer.
|
||||
if (mf->buffer == NULL) {
|
||||
|
@ -421,6 +422,19 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
|
|||
// we avoid wasting RAM and improve initialization speed a lot.
|
||||
//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
|
||||
|
||||
// Handle preset dictionary.
|
||||
if (lz_options->preset_dict != NULL
|
||||
&& lz_options->preset_dict_size > 0) {
|
||||
// If the preset dictionary is bigger than the actual
|
||||
// dictionary, use only the tail.
|
||||
mf->write_pos = MIN(lz_options->preset_dict_size, mf->size);
|
||||
memcpy(mf->buffer, lz_options->preset_dict
|
||||
+ lz_options->preset_dict_size - mf->write_pos,
|
||||
mf->write_pos);
|
||||
mf->action = LZMA_SYNC_FLUSH;
|
||||
mf->skip(mf, mf->write_pos);
|
||||
}
|
||||
|
||||
mf->action = LZMA_RUN;
|
||||
|
||||
return false;
|
||||
|
@ -509,7 +523,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
|
|||
|
||||
// Allocate new buffers if needed, and do the rest of
|
||||
// the initialization.
|
||||
if (lz_encoder_init(&next->coder->mf, allocator))
|
||||
if (lz_encoder_init(&next->coder->mf, allocator, &lz_options))
|
||||
return LZMA_MEM_ERROR;
|
||||
|
||||
// Initialize the next filter in the chain, if any.
|
||||
|
|
|
@ -230,7 +230,7 @@ lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
|
|||
|
||||
static lzma_ret
|
||||
lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
||||
const void *options, size_t *dict_size)
|
||||
const void *opt, lzma_lz_options *lz_options)
|
||||
{
|
||||
if (lz->coder == NULL) {
|
||||
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
||||
|
@ -243,12 +243,15 @@ lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
|||
lz->coder->lzma = LZMA_LZ_DECODER_INIT;
|
||||
}
|
||||
|
||||
const lzma_options_lzma *options = opt;
|
||||
|
||||
lz->coder->sequence = SEQ_CONTROL;
|
||||
lz->coder->need_properties = true;
|
||||
lz->coder->need_dictionary_reset = true;
|
||||
lz->coder->need_dictionary_reset = options->preset_dict == NULL
|
||||
|| options->preset_dict_size == 0;
|
||||
|
||||
return lzma_lzma_decoder_create(&lz->coder->lzma,
|
||||
allocator, options, dict_size);
|
||||
allocator, options, lz_options);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -318,15 +318,17 @@ lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
|
|||
lz->coder->lzma = NULL;
|
||||
}
|
||||
|
||||
lz->coder->sequence = SEQ_INIT;
|
||||
lz->coder->need_properties = true;
|
||||
lz->coder->need_state_reset = false;
|
||||
lz->coder->need_dictionary_reset = true;
|
||||
|
||||
lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
|
||||
lz->coder->opt_new = lz->coder->opt_cur.persistent
|
||||
? options : NULL;
|
||||
|
||||
lz->coder->sequence = SEQ_INIT;
|
||||
lz->coder->need_properties = true;
|
||||
lz->coder->need_state_reset = false;
|
||||
lz->coder->need_dictionary_reset
|
||||
= lz->coder->opt_cur.preset_dict == NULL
|
||||
|| lz->coder->opt_cur.preset_dict_size == 0;
|
||||
|
||||
// Initialize LZMA encoder
|
||||
return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
|
||||
&lz->coder->opt_cur, lz_options));
|
||||
|
|
|
@ -941,7 +941,7 @@ lzma_decoder_reset(lzma_coder *coder, const void *opt)
|
|||
|
||||
extern lzma_ret
|
||||
lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
||||
const void *opt, size_t *dict_size)
|
||||
const void *opt, lzma_lz_options *lz_options)
|
||||
{
|
||||
if (lz->coder == NULL) {
|
||||
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
||||
|
@ -956,7 +956,9 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
|||
// All dictionary sizes are OK here. LZ decoder will take care of
|
||||
// the special cases.
|
||||
const lzma_options_lzma *options = opt;
|
||||
*dict_size = options->dict_size;
|
||||
lz_options->dict_size = options->dict_size;
|
||||
lz_options->preset_dict = options->preset_dict;
|
||||
lz_options->preset_dict_size = options->preset_dict_size;
|
||||
|
||||
return LZMA_OK;
|
||||
}
|
||||
|
@ -967,13 +969,13 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
|||
/// the LZ initialization).
|
||||
static lzma_ret
|
||||
lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
|
||||
const void *options, size_t *dict_size)
|
||||
const void *options, lzma_lz_options *lz_options)
|
||||
{
|
||||
if (!is_lclppb_valid(options))
|
||||
return LZMA_PROG_ERROR;
|
||||
|
||||
return_if_error(lzma_lzma_decoder_create(
|
||||
lz, allocator, options, dict_size));
|
||||
lz, allocator, options, lz_options));
|
||||
|
||||
lzma_decoder_reset(lz->coder, options);
|
||||
lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
|
||||
|
|
|
@ -48,7 +48,7 @@ extern bool lzma_lzma_lclppb_decode(
|
|||
/// LZMA2 decoders.
|
||||
extern lzma_ret lzma_lzma_decoder_create(
|
||||
lzma_lz_decoder *lz, lzma_allocator *allocator,
|
||||
const void *opt, size_t *dict_size);
|
||||
const void *opt, lzma_lz_options *lz_options);
|
||||
|
||||
/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2
|
||||
/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb.
|
||||
|
|
|
@ -274,6 +274,8 @@ encode_symbol(lzma_coder *coder, lzma_mf *mf,
|
|||
static bool
|
||||
encode_init(lzma_coder *coder, lzma_mf *mf)
|
||||
{
|
||||
assert(mf_position(mf) == 0);
|
||||
|
||||
if (mf->read_pos == mf->read_limit) {
|
||||
if (mf->action == LZMA_RUN)
|
||||
return false; // We cannot do anything.
|
||||
|
@ -594,7 +596,12 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
|
|||
return LZMA_OPTIONS_ERROR;
|
||||
}
|
||||
|
||||
coder->is_initialized = false;
|
||||
// We don't need to write the first byte as literal if there is
|
||||
// a non-empty preset dictionary. encode_init() wouldn't even work
|
||||
// if there is a non-empty preset dictionary, because encode_init()
|
||||
// assumes that position is zero and previous byte is also zero.
|
||||
coder->is_initialized = options->preset_dict != NULL
|
||||
&& options->preset_dict_size > 0;
|
||||
coder->is_flushed = false;
|
||||
|
||||
set_lz_options(lz_options, options);
|
||||
|
|
Loading…
Reference in a new issue