From 5f0c5a04388f8334962c70bc37a8c2ff8f605e0a Mon Sep 17 00:00:00 2001 From: Jia Tan Date: Sat, 13 May 2023 19:54:33 +0800 Subject: [PATCH] xz: Allows --block-list filters to scale down memory usage. Previously, only the default filter chain could have its memory usage adjusted. The filter chains specified with --filtersX were not checked for memory usage. Now, all used filter chains will be adjusted if necessary. --- src/xz/coder.c | 267 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 213 insertions(+), 54 deletions(-) diff --git a/src/xz/coder.c b/src/xz/coder.c index 337b5363..a334e1e2 100644 --- a/src/xz/coder.c +++ b/src/xz/coder.c @@ -46,6 +46,12 @@ static lzma_filter filters[NUM_FILTER_CHAIN_MAX][LZMA_FILTERS_MAX + 1]; /// try to reference a filter chain that was not initialized. static uint32_t filters_init_mask = 1; +#ifdef HAVE_ENCODERS +/// Track the memory usage for all filter chains (default or --filtersX). +/// The memory usage may need to be scaled down depending on the memory limit. +static uint64_t filter_memusages[ARRAY_SIZE(filters)]; +#endif + /// Input and output buffers static io_buf in_buf; static io_buf out_buf; @@ -222,6 +228,7 @@ memlimit_too_small(uint64_t memory_usage) } +#ifdef HAVE_ENCODERS // For a given opt_block_list index, validate that the filter has been // set. If it has not been set, we must exit with error to avoid using // an uninitialized filter chain. @@ -233,6 +240,59 @@ validate_block_list_filter(const uint32_t filter_num) "not specified with --filters%u="), (unsigned)filter_num, (unsigned)filter_num); } + + +// Sets the memory usage for each filter chain. It will return the maximum +// memory usage of all of the filter chains. +static uint64_t +filters_memusage_max(const lzma_mt *mt, bool encode) +{ + uint64_t max_memusage = 0; + +#ifdef MYTHREAD_ENABLED + // Copy multithreaded options to a temporary struct since the + // filters member needs to be changed + lzma_mt mt_local; + if (mt != NULL) + mt_local = *mt; +#else + (void)mt; +#endif + + for (uint32_t i = 0; i < ARRAY_SIZE(filters); i++) { + if (!(filters_init_mask & (1 << i))) + continue; + + uint64_t memusage = UINT64_MAX; +#ifdef MYTHREAD_ENABLED + if (mt != NULL) { + mt_local.filters = filters[i]; + memusage = lzma_stream_encoder_mt_memusage(&mt_local); + filter_memusages[i] = memusage; + } + else +#endif + + if (encode) { + memusage = lzma_raw_encoder_memusage(filters[i]); + filter_memusages[i] = memusage; + } + +#ifdef HAVE_DECODERS + else { + memusage = lzma_raw_decoder_memusage(filters[i]); + } +#endif + + if (memusage > max_memusage) + max_memusage = memusage; + } + + return max_memusage; +} +#endif + + extern void coder_set_compression_settings(void) { @@ -241,11 +301,13 @@ coder_set_compression_settings(void) assert(opt_format != FORMAT_LZIP); #endif +#ifdef HAVE_ENCODERS if (opt_block_list != NULL) - for (uint32_t i = 0; opt_block_list[i].size != 0; i++) + for (uint32_t i = 0; opt_block_list[i].size != 0; i++) { validate_block_list_filter( opt_block_list[i].filters_index); - + } +#endif // The default check type is CRC64, but fallback to CRC32 // if CRC64 isn't supported by the copy of liblzma we are // using. CRC32 is always supported. @@ -333,11 +395,15 @@ coder_set_compression_settings(void) } } - // Get the memory usage. Note that if --format=raw was used, - // we can be decompressing. + // Get the memory usage and memory limit. The memory usage is the + // maximum of the default filters[] and any filters specified by + // --filtersX. + // Note that if --format=raw was used, we can be decompressing and + // do not need to account for any filter chains created + // with --filtersX. // - // If multithreaded .xz compression is done, this value will be - // replaced. + // If multithreaded .xz compression is done, the memory limit + // will be replaced. uint64_t memory_limit = hardware_memlimit_get(opt_mode); uint64_t memory_usage = UINT64_MAX; if (opt_mode == MODE_COMPRESS) { @@ -348,8 +414,9 @@ coder_set_compression_settings(void) mt_options.threads = hardware_threads_get(); mt_options.block_size = opt_block_size; mt_options.check = check; - memory_usage = lzma_stream_encoder_mt_memusage( - &mt_options); + + memory_usage = filters_memusage_max( + &mt_options, true); if (memory_usage != UINT64_MAX) message(V_DEBUG, _("Using up to %" PRIu32 " threads."), @@ -357,7 +424,7 @@ coder_set_compression_settings(void) } else # endif { - memory_usage = lzma_raw_encoder_memusage(default_filters); + memory_usage = filters_memusage_max(NULL, true); } #endif } else { @@ -377,7 +444,16 @@ coder_set_compression_settings(void) message_mem_needed(V_DEBUG, memory_usage); #ifdef HAVE_DECODERS if (opt_mode == MODE_COMPRESS) { - const uint64_t decmem = lzma_raw_decoder_memusage(default_filters); +#ifdef HAVE_ENCODERS + const uint64_t decmem = + filters_memusage_max(NULL, false); +#else + // If encoders are not enabled, then --block-list is never + // usable, so the other filter chains 1-9 can never be used. + // So there is no need to find the maximum decoder memory + // required in this case. + const uint64_t decmem = lzma_raw_decoder_memusage(filters[0]); +#endif if (decmem != UINT64_MAX) message(V_DEBUG, _("Decompression will need " "%s MiB of memory."), uint64_to_str( @@ -404,8 +480,8 @@ coder_set_compression_settings(void) // Reduce the number of threads by one and check // the memory usage. --mt_options.threads; - memory_usage = lzma_stream_encoder_mt_memusage( - &mt_options); + memory_usage = filters_memusage_max( + &mt_options, true); if (memory_usage == UINT64_MAX) message_bug(); @@ -457,7 +533,7 @@ coder_set_compression_settings(void) // the multithreaded mode but the output // is also different. hardware_threads_set(1); - memory_usage = lzma_raw_encoder_memusage(default_filters); + memory_usage = filters_memusage_max(NULL, true); message(V_WARNING, _("Switching to single-threaded mode " "to not exceed the memory usage limit of %s MiB"), uint64_to_str(round_up_to_mib(memory_limit), 0)); @@ -472,55 +548,138 @@ coder_set_compression_settings(void) if (!opt_auto_adjust) memlimit_too_small(memory_usage); - // Look for the last filter if it is LZMA2 or LZMA1, so we can make - // it use less RAM. With other filters we don't know what to do. - size_t i = 0; - while (default_filters[i].id != LZMA_FILTER_LZMA2 - && default_filters[i].id != LZMA_FILTER_LZMA1) { - if (default_filters[i].id == LZMA_VLI_UNKNOWN) - memlimit_too_small(memory_usage); + // Decrease the dictionary size until we meet the memory usage limit. + // The struct is used to track data needed to correctly reduce the + // memory usage and report which filters were adjusted. + typedef struct { + // Pointer to the filter chain that needs to be reduced. + // NULL indicates that this filter chain was either never + // set or was never above the memory limit. + lzma_filter *filters; - ++i; + // Original dictionary sizes are used to show how each + // filter's dictionary was reduced. + uint64_t orig_dict_size; + + // Index of the LZMA filter in the filters member. We only + // adjust this filter's memusage because we don't know how + // to reduce the memory usage of the other filters. + uint32_t lzma_idx; + + // Indicates if the filter's dictionary size needs to be + // reduced to fit under the memory limit (true) or if the + // filter chain is unused or is already under the memory + // limit (false). + bool reduce_dict_size; + } memusage_reduction_data; + + memusage_reduction_data memusage_reduction[ARRAY_SIZE(filters)]; + + // Counter represents how many filter chains are above the memory + // limit. + size_t count = 0; + + for (uint32_t i = 0; i < ARRAY_SIZE(filters); i++) { + // The short var name "r" will reduce the number of lines + // of code needed since less lines will stretch past 80 + // characters. + memusage_reduction_data *r = &memusage_reduction[i]; + r->filters = NULL; + r->reduce_dict_size = false; + + if (!(filters_init_mask & (1 << i))) + continue; + + for (uint32_t j = 0; filters[i][j].id != LZMA_VLI_UNKNOWN; + j++) + if ((filters[i][j].id == LZMA_FILTER_LZMA2 + || filters[i][j].id + == LZMA_FILTER_LZMA1) + && filter_memusages[i] + > memory_limit) { + count++; + r->filters = filters[i]; + r->lzma_idx = j; + r->reduce_dict_size = true; + + lzma_options_lzma *opt = r->filters + [r->lzma_idx].options; + r->orig_dict_size = opt->dict_size; + opt->dict_size &= ~((UINT32_C(1) << 20) - 1); + } } - // Decrease the dictionary size until we meet the memory - // usage limit. First round down to full mebibytes. - lzma_options_lzma *opt = default_filters[i].options; - const uint32_t orig_dict_size = opt->dict_size; - opt->dict_size &= ~((UINT32_C(1) << 20) - 1); - while (true) { - // If it is below 1 MiB, auto-adjusting failed. We could be - // more sophisticated and scale it down even more, but let's - // see if many complain about this version. - // - // FIXME: Displays the scaled memory usage instead - // of the original. - if (opt->dict_size < (UINT32_C(1) << 20)) - memlimit_too_small(memory_usage); + // Loop until all filters use <= memory_limit, or exit. + while (count > 0) { + for (uint32_t i = 0; i < ARRAY_SIZE(memusage_reduction); i++) { + memusage_reduction_data *r = &memusage_reduction[i]; - memory_usage = lzma_raw_encoder_memusage(default_filters); - if (memory_usage == UINT64_MAX) - message_bug(); + if (!r->reduce_dict_size) + continue; - // Accept it if it is low enough. - if (memory_usage <= memory_limit) - break; + lzma_options_lzma *opt = + r->filters[r->lzma_idx].options; - // Otherwise 1 MiB down and try again. I hope this - // isn't too slow method for cases where the original - // dict_size is very big. - opt->dict_size -= UINT32_C(1) << 20; + // If it is below 1 MiB, auto-adjusting failed. + // We could be more sophisticated and scale it + // down even more, but nobody has complained so far. + if (opt->dict_size < (UINT32_C(1) << 20)) + memlimit_too_small(memory_usage); + + uint64_t filt_mem_usage = + lzma_raw_encoder_memusage(r->filters); + + if (filt_mem_usage == UINT64_MAX) + message_bug(); + + if (filt_mem_usage < memory_limit) { + r->reduce_dict_size = false; + count--; + } + else { + opt->dict_size -= UINT32_C(1) << 20; + } + } } - // Tell the user that we decreased the dictionary size. - message(V_WARNING, _("Adjusted LZMA%c dictionary size " - "from %s MiB to %s MiB to not exceed " - "the memory usage limit of %s MiB"), - default_filters[i].id == LZMA_FILTER_LZMA2 - ? '2' : '1', - uint64_to_str(orig_dict_size >> 20, 0), - uint64_to_str(opt->dict_size >> 20, 1), - uint64_to_str(round_up_to_mib(memory_limit), 2)); + // Tell the user that we decreased the dictionary size for + // each filter that was adjusted. + for (uint32_t i = 0; i < ARRAY_SIZE(memusage_reduction); i++) { + memusage_reduction_data *r = &memusage_reduction[i]; + + // If the filters were never set, then the memory usage + // was never adjusted. + if (r->filters == NULL) + continue; + + lzma_filter *filter_lzma = &(r->filters[r->lzma_idx]); + lzma_options_lzma *opt = filter_lzma->options; + + // The first index is the default filter chain. The message + // should be slightly different if the default filter chain + // or if --filtersX was adjusted. + if (i == 0) + message(V_WARNING, _("Adjusted LZMA%c dictionary " + "size from %s MiB to %s MiB to not exceed the " + "memory usage limit of %s MiB"), + filter_lzma->id == LZMA_FILTER_LZMA2 + ? '2' : '1', + uint64_to_str(r->orig_dict_size >> 20, 0), + uint64_to_str(opt->dict_size >> 20, 1), + uint64_to_str(round_up_to_mib( + memory_limit), 2)); + else + message(V_WARNING, _("Adjusted LZMA%c dictionary size " + "for --filters%u from %s MiB to %s MiB to not " + "exceed the memory usage limit of %s MiB"), + filter_lzma->id == LZMA_FILTER_LZMA2 + ? '2' : '1', + (unsigned)i, + uint64_to_str(r->orig_dict_size >> 20, 0), + uint64_to_str(opt->dict_size >> 20, 1), + uint64_to_str(round_up_to_mib( + memory_limit), 2)); + } #endif return;