mirror of
https://git.tukaani.org/xz.git
synced 2024-04-04 12:36:23 +02:00
xz: Add incomplete support for --block-list.
It's broken with threads and when also --block-size is used.
This commit is contained in:
parent
972179cdcd
commit
88ccf47205
7 changed files with 151 additions and 10 deletions
|
@ -54,6 +54,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
parse_block_list(char *str)
|
||||||
|
{
|
||||||
|
// It must be non-empty and not begin with a comma.
|
||||||
|
if (str[0] == '\0' || str[0] == ',')
|
||||||
|
message_fatal(_("%s: Invalid argument to --block-list"), str);
|
||||||
|
|
||||||
|
// Count the number of comma-separated strings.
|
||||||
|
size_t count = 1;
|
||||||
|
for (size_t i = 0; str[i] != '\0'; ++i)
|
||||||
|
if (str[i] == ',')
|
||||||
|
++count;
|
||||||
|
|
||||||
|
// Prevent an unlikely integer overflow.
|
||||||
|
if (count > SIZE_MAX / sizeof(uint64_t) - 1)
|
||||||
|
message_fatal(_("%s: Too many arguments to --block-list"),
|
||||||
|
str);
|
||||||
|
|
||||||
|
// Allocate memory to hold all the sizes specified.
|
||||||
|
// If --block-list was specified already, its value is forgotten.
|
||||||
|
free(opt_block_list);
|
||||||
|
opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < count; ++i) {
|
||||||
|
// Locate the next comma and replace it with \0.
|
||||||
|
char *p = strchr(str, ',');
|
||||||
|
if (p != NULL)
|
||||||
|
*p = '\0';
|
||||||
|
|
||||||
|
if (str[0] == '\0') {
|
||||||
|
// There is no string, that is, a comma follows
|
||||||
|
// another comma. Use the previous value.
|
||||||
|
//
|
||||||
|
// NOTE: We checked earler that the first char
|
||||||
|
// of the whole list cannot be a comma.
|
||||||
|
assert(i > 0);
|
||||||
|
opt_block_list[i] = opt_block_list[i - 1];
|
||||||
|
} else {
|
||||||
|
opt_block_list[i] = str_to_uint64("block-list", str,
|
||||||
|
0, UINT64_MAX);
|
||||||
|
|
||||||
|
// Zero indicates no more new Blocks.
|
||||||
|
if (opt_block_list[i] == 0) {
|
||||||
|
if (i + 1 != count)
|
||||||
|
message_fatal(_("0 can only be used "
|
||||||
|
"as the last element "
|
||||||
|
"in --block-list"));
|
||||||
|
|
||||||
|
opt_block_list[i] = UINT64_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
str = p + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Terminate the array.
|
||||||
|
opt_block_list[count] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
parse_real(args_info *args, int argc, char **argv)
|
parse_real(args_info *args, int argc, char **argv)
|
||||||
{
|
{
|
||||||
|
@ -73,6 +134,7 @@ parse_real(args_info *args, int argc, char **argv)
|
||||||
OPT_FILES,
|
OPT_FILES,
|
||||||
OPT_FILES0,
|
OPT_FILES0,
|
||||||
OPT_BLOCK_SIZE,
|
OPT_BLOCK_SIZE,
|
||||||
|
OPT_BLOCK_LIST,
|
||||||
OPT_MEM_COMPRESS,
|
OPT_MEM_COMPRESS,
|
||||||
OPT_MEM_DECOMPRESS,
|
OPT_MEM_DECOMPRESS,
|
||||||
OPT_NO_ADJUST,
|
OPT_NO_ADJUST,
|
||||||
|
@ -107,6 +169,7 @@ parse_real(args_info *args, int argc, char **argv)
|
||||||
{ "format", required_argument, NULL, 'F' },
|
{ "format", required_argument, NULL, 'F' },
|
||||||
{ "check", required_argument, NULL, 'C' },
|
{ "check", required_argument, NULL, 'C' },
|
||||||
{ "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
|
{ "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
|
||||||
|
{ "block-list", required_argument, NULL, OPT_BLOCK_LIST },
|
||||||
{ "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
|
{ "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
|
||||||
{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
|
{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
|
||||||
{ "memlimit", required_argument, NULL, 'M' },
|
{ "memlimit", required_argument, NULL, 'M' },
|
||||||
|
@ -378,6 +441,11 @@ parse_real(args_info *args, int argc, char **argv)
|
||||||
0, LZMA_VLI_MAX);
|
0, LZMA_VLI_MAX);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OPT_BLOCK_LIST: {
|
||||||
|
parse_block_list(optarg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case OPT_SINGLE_STREAM:
|
case OPT_SINGLE_STREAM:
|
||||||
opt_single_stream = true;
|
opt_single_stream = true;
|
||||||
break;
|
break;
|
||||||
|
@ -590,3 +658,13 @@ args_parse(args_info *args, int argc, char **argv)
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
extern void
|
||||||
|
args_free(void)
|
||||||
|
{
|
||||||
|
free(opt_block_list);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -40,3 +40,4 @@ extern bool opt_robot;
|
||||||
extern const char stdin_filename[];
|
extern const char stdin_filename[];
|
||||||
|
|
||||||
extern void args_parse(args_info *args, int argc, char **argv);
|
extern void args_parse(args_info *args, int argc, char **argv);
|
||||||
|
extern void args_free(void);
|
||||||
|
|
|
@ -26,6 +26,7 @@ enum format_type opt_format = FORMAT_AUTO;
|
||||||
bool opt_auto_adjust = true;
|
bool opt_auto_adjust = true;
|
||||||
bool opt_single_stream = false;
|
bool opt_single_stream = false;
|
||||||
uint64_t opt_block_size = 0;
|
uint64_t opt_block_size = 0;
|
||||||
|
uint64_t *opt_block_list = NULL;
|
||||||
|
|
||||||
|
|
||||||
/// Stream used to communicate with liblzma
|
/// Stream used to communicate with liblzma
|
||||||
|
@ -522,15 +523,36 @@ coder_normal(file_pair *pair)
|
||||||
// Assume that something goes wrong.
|
// Assume that something goes wrong.
|
||||||
bool success = false;
|
bool success = false;
|
||||||
|
|
||||||
// block_remaining indicates how many input bytes to encode until
|
// block_remaining indicates how many input bytes to encode before
|
||||||
// finishing the current .xz Block. The Block size is set with
|
// finishing the current .xz Block. The Block size is set with
|
||||||
// --block-size=SIZE. It has an effect only when compressing
|
// --block-size=SIZE and --block-list. They have an effect only when
|
||||||
// to the .xz format. If block_remaining == UINT64_MAX, only
|
// compressing to the .xz format. If block_remaining == UINT64_MAX,
|
||||||
// a single block is created.
|
// only a single block is created.
|
||||||
uint64_t block_remaining = UINT64_MAX;
|
uint64_t block_remaining = UINT64_MAX;
|
||||||
if (hardware_threads_get() == 1 && opt_mode == MODE_COMPRESS
|
|
||||||
&& opt_format == FORMAT_XZ && opt_block_size > 0)
|
// Position in opt_block_list. Unused if --block-list wasn't used.
|
||||||
block_remaining = opt_block_size;
|
size_t list_pos = 0;
|
||||||
|
|
||||||
|
// Handle --block-size for single-threaded mode and the first step
|
||||||
|
// of --block-list.
|
||||||
|
if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
|
||||||
|
// --block-size doesn't do anything here in threaded mode,
|
||||||
|
// because the threaded encoder will take care of splitting
|
||||||
|
// to fixed-sized Blocks.
|
||||||
|
if (hardware_threads_get() == 1 && opt_block_size > 0)
|
||||||
|
block_remaining = opt_block_size;
|
||||||
|
|
||||||
|
// If --block-list was used, start with the first size.
|
||||||
|
//
|
||||||
|
// FIXME: Currently this overrides --block-size but this isn't
|
||||||
|
// good. For threaded case, we want --block-size to specify
|
||||||
|
// how big Blocks the encoder needs to be prepared to create
|
||||||
|
// at maximum and --block-list will simultaneously cause new
|
||||||
|
// Blocks to be started at specified intervals. To keep things
|
||||||
|
// logical, the same should be done in single-threaded mode.
|
||||||
|
if (opt_block_list != NULL)
|
||||||
|
block_remaining = opt_block_list[list_pos];
|
||||||
|
}
|
||||||
|
|
||||||
strm.next_out = out_buf.u8;
|
strm.next_out = out_buf.u8;
|
||||||
strm.avail_out = IO_BUFFER_SIZE;
|
strm.avail_out = IO_BUFFER_SIZE;
|
||||||
|
@ -575,7 +597,17 @@ coder_normal(file_pair *pair)
|
||||||
if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) {
|
if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) {
|
||||||
// Start a new Block.
|
// Start a new Block.
|
||||||
action = LZMA_RUN;
|
action = LZMA_RUN;
|
||||||
block_remaining = opt_block_size;
|
|
||||||
|
if (opt_block_list == NULL) {
|
||||||
|
block_remaining = opt_block_size;
|
||||||
|
} else {
|
||||||
|
// FIXME: Make it work together with
|
||||||
|
// --block-size.
|
||||||
|
if (opt_block_list[list_pos + 1] != 0)
|
||||||
|
++list_pos;
|
||||||
|
|
||||||
|
block_remaining = opt_block_list[list_pos];
|
||||||
|
}
|
||||||
|
|
||||||
} else if (ret != LZMA_OK) {
|
} else if (ret != LZMA_OK) {
|
||||||
// Determine if the return value indicates that we
|
// Determine if the return value indicates that we
|
||||||
|
|
|
@ -48,6 +48,10 @@ extern bool opt_single_stream;
|
||||||
/// of input. This has an effect only when compressing to the .xz format.
|
/// of input. This has an effect only when compressing to the .xz format.
|
||||||
extern uint64_t opt_block_size;
|
extern uint64_t opt_block_size;
|
||||||
|
|
||||||
|
/// This is non-NULL if --block-list was used. This contains the Block sizes
|
||||||
|
/// as an array that is terminated with 0.
|
||||||
|
extern uint64_t *opt_block_list;
|
||||||
|
|
||||||
/// Set the integrity check type used when compressing
|
/// Set the integrity check type used when compressing
|
||||||
extern void coder_set_check(lzma_check check);
|
extern void coder_set_check(lzma_check check);
|
||||||
|
|
||||||
|
|
|
@ -277,6 +277,7 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
coder_free();
|
coder_free();
|
||||||
|
args_free();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// If we have got a signal, raise it to kill the program instead
|
// If we have got a signal, raise it to kill the program instead
|
||||||
|
|
|
@ -1153,10 +1153,16 @@ message_help(bool long_help)
|
||||||
" does not affect decompressor memory requirements"));
|
" does not affect decompressor memory requirements"));
|
||||||
|
|
||||||
if (long_help) {
|
if (long_help) {
|
||||||
|
// FIXME? Mention something about threading?
|
||||||
puts(_(
|
puts(_(
|
||||||
" --block-size=SIZE\n"
|
" --block-size=SIZE\n"
|
||||||
" when compressing to the .xz format, start a new block\n"
|
" when compressing to the .xz format, start a new block\n"
|
||||||
" after every SIZE bytes of input; 0=disabled (default)"));
|
" after every SIZE bytes of input; 0=disabled (default)"));
|
||||||
|
// FIXME
|
||||||
|
puts(_(
|
||||||
|
" --block-list=SIZES\n"
|
||||||
|
" when compressing to the .xz format, start a new block\n"
|
||||||
|
" after the given intervals of uncompressed data"));
|
||||||
puts(_( // xgettext:no-c-format
|
puts(_( // xgettext:no-c-format
|
||||||
" --memlimit-compress=LIMIT\n"
|
" --memlimit-compress=LIMIT\n"
|
||||||
" --memlimit-decompress=LIMIT\n"
|
" --memlimit-decompress=LIMIT\n"
|
||||||
|
|
23
src/xz/xz.1
23
src/xz/xz.1
|
@ -5,7 +5,7 @@
|
||||||
.\" This file has been put into the public domain.
|
.\" This file has been put into the public domain.
|
||||||
.\" You can do whatever you want with this file.
|
.\" You can do whatever you want with this file.
|
||||||
.\"
|
.\"
|
||||||
.TH XZ 1 "2012-07-01" "Tukaani" "XZ Utils"
|
.TH XZ 1 "2012-07-03" "Tukaani" "XZ Utils"
|
||||||
.
|
.
|
||||||
.SH NAME
|
.SH NAME
|
||||||
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
|
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
|
||||||
|
@ -807,7 +807,26 @@ format, split the input data into blocks of
|
||||||
.I size
|
.I size
|
||||||
bytes.
|
bytes.
|
||||||
The blocks are compressed independently from each other.
|
The blocks are compressed independently from each other.
|
||||||
.\" FIXME: Explain how to his can be used for random access and threading.
|
.\" FIXME: Explain how to these can be used for random access and threading.
|
||||||
|
.TP
|
||||||
|
.BI \-\-block\-list= sizes
|
||||||
|
When compressing to the
|
||||||
|
.B .xz
|
||||||
|
format, start a new block after
|
||||||
|
the given intervals of uncompressed data.
|
||||||
|
.IP ""
|
||||||
|
The uncompressed
|
||||||
|
.I sizes
|
||||||
|
of the blocks are specified as a comma-separated list.
|
||||||
|
Omitting a size (two or more consecutive commas) is a shorthand
|
||||||
|
to use the size of the previous block.
|
||||||
|
A special value of
|
||||||
|
.B 0
|
||||||
|
may be used as the last value to indicate that
|
||||||
|
the rest of the file should be encoded as a single block.
|
||||||
|
.IP ""
|
||||||
|
.B "Currently this option is badly broken if used together with"
|
||||||
|
.B "\-\-block\-size or with multithreading."
|
||||||
.TP
|
.TP
|
||||||
.BI \-\-memlimit\-compress= limit
|
.BI \-\-memlimit\-compress= limit
|
||||||
Set a memory usage limit for compression.
|
Set a memory usage limit for compression.
|
||||||
|
|
Loading…
Reference in a new issue