1
0
Fork 0
mirror of https://git.tukaani.org/xz.git synced 2024-04-04 12:36:23 +02:00

Oh well, big messy commit again. Some highlights:

- Updated to the latest, probably final file format version.
  - Command line tool reworked to not use threads anymore.
    Threading will probably go into liblzma anyway.
  - Memory usage limit is now about 30 % for uncompression
    and about 90 % for compression.
  - Progress indicator with --verbose
  - Simplified --help and full --long-help
  - Upgraded to the last LGPLv2.1+ getopt_long from gnulib.
  - Some bug fixes
This commit is contained in:
Lasse Collin 2008-11-19 20:46:52 +02:00
parent 3c3905b534
commit e114502b2b
112 changed files with 3255 additions and 2739 deletions

1
THANKS
View file

@ -16,6 +16,7 @@ In alphabetical order:
- Jim Meyering
- Igor Pavlov
- Mikko Pouru
- Bernhard Reutner-Fischer
- Alexandre Sauvé
- Andreas Schwab
- Julian Seward

View file

@ -54,7 +54,7 @@ AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug], [Enable debugging code.]),
if test "x$enable_debug" = xyes; then
AC_MSG_RESULT([yes])
else
AC_DEFINE(NDEBUG, 1, [Define to disable debugging code.])
AC_DEFINE(NDEBUG, 1, [Define to 1 to disable debugging code.])
AC_MSG_RESULT([no])
fi
@ -440,13 +440,34 @@ AC_CHECK_HEADERS([fcntl.h limits.h sys/time.h],
[AC_MSG_ERROR([Required header file(s) are missing.])])
# If any of these headers are missing, things should still work correctly:
AC_CHECK_HEADERS([assert.h errno.h byteswap.h sys/param.h sys/sysctl.h],
AC_CHECK_HEADERS([sys/param.h sys/sysctl.h byteswap.h],
[], [], [
#ifdef HAVE_SYS_PARAM_H
# include <sys/param.h>
#endif
])
# Even if we have byteswap.h, we may lack the specific macros/functions.
if test x$ac_cv_header_byteswap_h = xyes ; then
m4_foreach([FUNC], [bswap_16,bswap_32,bswap_64], [
AC_MSG_CHECKING([if FUNC is available])
AC_LINK_IFELSE([AC_LANG_SOURCE([
#include <byteswap.h>
int
main(void)
{
FUNC[](42);
return 0;
}
])], [
AC_DEFINE(HAVE_[]m4_toupper(FUNC), [1],
[Define to 1 if] FUNC [is available.])
AC_MSG_RESULT([yes])
], [AC_MSG_RESULT([no])])
])dnl
fi
###############################################################################
# Checks for typedefs, structures, and compiler characteristics.
@ -469,9 +490,13 @@ AC_CHECK_SIZEOF([size_t])
# The command line tool can copy high resolution timestamps if such
# information is availabe in struct stat. Otherwise one second accuracy
# is used. Most systems seem to have st_xtim but BSDs have st_xtimespec.
AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec, struct stat.st_mtim.tv_nsec,
struct stat.st_atimespec.tv_nsec, struct stat.st_mtimespec.tv_nsec])
# is used.
AC_CHECK_MEMBERS([
struct stat.st_atim.tv_nsec,
struct stat.st_atimespec.tv_nsec,
struct stat.st_atimensec,
struct stat.st_uatime,
struct stat.st_atim.st__tim.tv_nsec])
AC_SYS_LARGEFILE
AC_C_BIGENDIAN
@ -484,16 +509,15 @@ AC_C_BIGENDIAN
# Gnulib replacements as needed
gl_GETOPT
# Functions that are not mandatory i.e. we have alternatives for them
# or we can just drop some functionality:
AC_CHECK_FUNCS([futimes futimesat])
# Find the best function to set timestamps.
AC_CHECK_FUNCS([futimens futimes futimesat utimes utime], [break])
# Check how to find out the amount of physical memory in the system. The
# lzma command line tool uses this to automatically limits its memory usage.
# - sysconf() gives all the needed info on GNU+Linux and Solaris.
# - BSDs use sysctl().
AC_MSG_CHECKING([how to detect the amount of physical memory])
AC_COMPILE_IFELSE([
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#include <unistd.h>
int
main()
@ -503,7 +527,7 @@ main()
i = sysconf(_SC_PHYS_PAGES);
return 0;
}
], [
]])], [
AC_DEFINE([HAVE_PHYSMEM_SYSCONF], 1,
[Define to 1 if the amount of physical memory can be detected
with sysconf(_SC_PAGESIZE) and sysconf(_SC_PHYS_PAGES).])
@ -537,7 +561,7 @@ main()
# sysconf(_SC_NPROCESSORS_ONLN) works on most systems, except that BSDs
# use sysctl().
AC_MSG_CHECKING([how to detect the number of available CPU cores])
AC_COMPILE_IFELSE([
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#include <unistd.h>
int
main()
@ -546,7 +570,7 @@ main()
i = sysconf(_SC_NPROCESSORS_ONLN);
return 0;
}
], [
]])], [
AC_DEFINE([HAVE_NCPU_SYSCONF], 1,
[Define to 1 if the number of available CPU cores can be
detected with sysconf(_SC_NPROCESSORS_ONLN).])

View file

@ -75,17 +75,17 @@ main(int argc, char **argv)
// Config
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma, 0)) {
if (lzma_lzma_preset(&opt_lzma, 1)) {
fprintf(stderr, "preset failed\n");
exit(1);
}
lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1];
lzma_filter filters[LZMA_FILTERS_MAX + 1];
filters[0].id = LZMA_FILTER_LZMA2;
filters[0].options = &opt_lzma;
filters[1].id = LZMA_VLI_UNKNOWN;
// Init
if (lzma_stream_encoder(&strm, filters, LZMA_CHECK_SHA256) != LZMA_OK) {
if (lzma_stream_encoder(&strm, filters, LZMA_CHECK_CRC32) != LZMA_OK) {
fprintf(stderr, "init failed\n");
exit(1);
}

View file

@ -48,7 +48,7 @@ main(void)
// Filter setup
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma, 0))
if (lzma_lzma_preset(&opt_lzma, 1))
return 1;
lzma_filter filters[] = {

View file

@ -26,7 +26,7 @@ main(void)
lzma_init();
lzma_options_lzma lzma = {
.dict_size = (1U << 27) + (1U << 26),
.dict_size = (1U << 30) + (1U << 29),
.lc = 3,
.lp = 0,
.pb = 2,

View file

@ -87,7 +87,7 @@ main(int argc, char **argv)
};
lzma_options_delta opt_delta = {
.distance = 16
.dist = 16
};
lzma_options_subblock opt_subblock = {
@ -102,7 +102,7 @@ main(int argc, char **argv)
opt_subblock.subfilter_options.id = LZMA_FILTER_DELTA;
opt_subblock.subfilter_options.options = &opt_delta;
lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1];
lzma_filter filters[LZMA_FILTERS_MAX + 1];
filters[0].id = LZMA_FILTER_LZMA2;
filters[0].options = &opt_lzma;
filters[1].id = LZMA_VLI_UNKNOWN;
@ -114,20 +114,20 @@ main(int argc, char **argv)
}
// Encoding
/*
encode(0, LZMA_SYNC_FLUSH);
encode(6, LZMA_SYNC_FLUSH);
encode(0, LZMA_SYNC_FLUSH);
encode(7, LZMA_SYNC_FLUSH);
encode(0, LZMA_SYNC_FLUSH);
encode(0, LZMA_FINISH);
*/
/*
encode(53, LZMA_SYNC_FLUSH);
// opt_lzma.literal_context_bits = 2;
// opt_lzma.literal_pos_bits = 1;
// opt_lzma.pos_bits = 0;
encode(404, LZMA_FINISH);
*/
// Clean up
lzma_end(&strm);

View file

@ -30,12 +30,13 @@ The .xz File Format
3.1.6. Header Padding
3.1.7. CRC32
3.2. Compressed Data
3.3. Check
3.3. Block Padding
3.4. Check
4. Index
4.1. Index Indicator
4.2. Number of Records
4.3. List of Records
4.3.1. Total Size
4.3.1. Unpadded Size
4.3.2. Uncompressed Size
4.4. Index Padding
4.5. CRC32
@ -56,7 +57,7 @@ The .xz File Format
0. Preface
This document describes the .xz file format (filename suffix
`.xz', MIME type `application/x-xz'). It is intended that this
".xz", MIME type "application/x-xz"). It is intended that this
this format replace the old .lzma format used by LZMA SDK and
LZMA Utils.
@ -80,12 +81,12 @@ The .xz File Format
Special thanks for helping with this document goes to
Igor Pavlov. Thanks for helping with this document goes to
Mark Adler, H. Peter Anvin, and Mikko Pouru.
Mark Adler, H. Peter Anvin, Mikko Pouru, and Lars Wirzenius.
0.2. Changes
Last modified: 2008-09-24 21:05+0300
Last modified: 2008-11-03 00:35+0200
(A changelog will be kept once the first official version
is made.)
@ -93,20 +94,19 @@ The .xz File Format
1. Conventions
The keywords `must', `must not', `required', `should',
`should not', `recommended', `may', and `optional' in this
The key words "MUST", "MUST NOT", "REQUIRED", "SHOULD",
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
document are to be interpreted as described in [RFC-2119].
These words are not capitalized in this document.
Indicating a warning means displaying a message, returning
appropriate exit status, or something else to let the user
know that something worth warning occurred. The operation
should still finish if a warning is indicated.
appropriate exit status, or doing something else to let the
user know that something worth warning occurred. The operation
SHOULD still finish if a warning is indicated.
Indicating an error means displaying a message, returning
appropriate exit status, or something else to let the user
know that something prevented successfully finishing the
operation. The operation must be aborted once an error has
appropriate exit status, or doing something else to let the
user know that something prevented successfully finishing the
operation. The operation MUST be aborted once an error has
been indicated.
@ -114,7 +114,7 @@ The .xz File Format
In this document, byte is always 8 bits.
A `nul byte' has all bits unset. That is, the value of a nul
A "null byte" has all bits unset. That is, the value of a null
byte is 0x00.
To represent byte blocks, this document uses notation that
@ -133,8 +133,25 @@ The .xz File Format
+=======+
In this document, a boxed byte or a byte sequence declared
using this notation is called `a field'. The example field
above would be called `the Foo field' or plain `Foo'.
using this notation is called "a field". The example field
above would be called "the Foo field" or plain "Foo".
If there are many fields, they may be split to multiple lines.
This is indicated with an arrow ("--->"):
+=====+
| Foo |
+=====+
+=====+
---> | Bar |
+=====+
The above is equivalent to this:
+=====+=====+
| Foo | Bar |
+=====+=====+
1.2. Multibyte Integers
@ -166,7 +183,7 @@ The .xz File Format
size_t
encode(uint8_t buf[static 9], uint64_t num)
{
if (num >= UINT64_MAX / 2)
if (num > UINT64_MAX / 2)
return 0;
size_t i = 0;
@ -194,7 +211,7 @@ The .xz File Format
size_t i = 0;
while (buf[i++] & 0x80) {
if (i > size_max || buf[i] == 0x00)
if (i >= size_max || buf[i] == 0x00)
return 0;
*num |= (uint64_t)(buf[i] & 0x7F) << (i * 7);
@ -206,15 +223,22 @@ The .xz File Format
2. Overall Structure of .xz File
A standalone .xz files consist of one or more Streams which may
have Stream Padding between or after them:
+========+================+========+================+
| Stream | Stream Padding | Stream | Stream Padding | ...
+========+================+========+================+
A file contains usually only one Stream. However, it is
possible to concatenate multiple Streams together with no
additional processing. It is up to the implementation to
decide if the decoder will continue decoding from the next
Stream once the end of the first Stream has been reached.
While a typical file contains only one Stream and no Stream
Padding, a decoder handling standalone .xz files SHOULD support
files that have more than one Stream or Stream Padding.
In contrast to standalone .xz files, when the .xz file format
is used as an internal part of some other file format or
communication protocol, it usually is expected that the decoder
stops after the first Stream, and doesn't look for Stream
Padding or possibly other Streams.
2.1. Stream
@ -229,7 +253,7 @@ The .xz File Format
All the above fields have a size that is a multiple of four. If
Stream is used as an internal part of another file format, it
is recommended to make the Stream start at an offset that is
is RECOMMENDED to make the Stream start at an offset that is
a multiple of four bytes.
Stream Header, Index, and Stream Footer are always present in
@ -238,12 +262,12 @@ The .xz File Format
There are zero or more Blocks. The maximum number of Blocks is
limited only by the maximum size of the Index field.
Total size of a Stream must be less than 8 EiB (2^63 bytes).
Total size of a Stream MUST be less than 8 EiB (2^63 bytes).
The same limit applies to the total amount of uncompressed
data stored in a Stream.
If an implementation supports handling .xz files with multiple
concatenated Streams, it may apply the above limits to the file
concatenated Streams, it MAY apply the above limits to the file
as a whole instead of limiting per Stream basis.
@ -273,20 +297,20 @@ The .xz File Format
- The sixth byte (0x00) was chosen to prevent applications
from misdetecting the file as a text file.
If the Header Magic Bytes don't match, the decoder must
If the Header Magic Bytes don't match, the decoder MUST
indicate an error.
2.1.1.2. Stream Flags
The first byte of Stream Flags is always a nul byte. In future
The first byte of Stream Flags is always a null byte. In future
this byte may be used to indicate new Stream version or other
Stream properties.
The second byte of Stream Flags is a bit field:
Bit(s) Mask Description
0-3 0x0F Type of Check (see Section 3.3):
0-3 0x0F Type of Check (see Section 3.4):
ID Size Check name
0x00 0 bytes None
0x01 4 bytes CRC32
@ -304,14 +328,14 @@ The .xz File Format
0x0D 64 bytes (Reserved)
0x0E 64 bytes (Reserved)
0x0F 64 bytes (Reserved)
4-7 0xF0 Reserved for future use; must be zero for now.
4-7 0xF0 Reserved for future use; MUST be zero for now.
Implementations must support at least the Check IDs 0x00 (None)
and 0x01 (CRC32). Supporting other Check IDs is optional. If
an unsupported Check is used, the decoder should indicate a
warning or error.
Implementations SHOULD support at least the Check IDs 0x00
(None) and 0x01 (CRC32). Supporting other Check IDs is
OPTIONAL. If an unsupported Check is used, the decoder SHOULD
indicate a warning or error.
If any reserved bit is set, the decoder must indicate an error.
If any reserved bit is set, the decoder MUST indicate an error.
It is possible that there is a new field present which the
decoder is not aware of, and can thus parse the Stream Header
incorrectly.
@ -322,7 +346,7 @@ The .xz File Format
The CRC32 is calculated from the Stream Flags field. It is
stored as an unsigned 32-bit little endian integer. If the
calculated value does not match the stored one, the decoder
must indicate an error.
MUST indicate an error.
The idea is that Stream Flags would always be two bytes, even
if new features are needed. This way old decoders will be able
@ -344,7 +368,7 @@ The .xz File Format
The CRC32 is calculated from the Backward Size and Stream Flags
fields. It is stored as an unsigned 32-bit little endian
integer. If the calculated value does not match the stored one,
the decoder must indicate an error.
the decoder MUST indicate an error.
The reason to have the CRC32 field before the Backward Size and
Stream Flags fields is to keep the four-byte fields aligned to
@ -359,8 +383,11 @@ The .xz File Format
real_backward_size = (stored_backward_size + 1) * 4;
Using a fixed-size integer to store this value makes it
slightly simpler to parse the Stream Footer when the
If the stored value does not match the real size of the Index
field, the decoder MUST indicate an error.
Using a fixed-size integer to store Backward Size makes
it slightly simpler to parse the Stream Footer when the
application needs to parse the Stream backwards.
@ -368,16 +395,16 @@ The .xz File Format
This is a copy of the Stream Flags field from the Stream
Header. The information stored to Stream Flags is needed
when parsing the Stream backwards. The decoder must compare
when parsing the Stream backwards. The decoder MUST compare
the Stream Flags fields in both Stream Header and Stream
Footer, and indicate an error if they are not identical.
2.1.2.4. Footer Magic Bytes
As the last step of the decoding process, the decoder must
As the last step of the decoding process, the decoder MUST
verify the existence of Footer Magic Bytes. If they don't
match, an error must be indicated.
match, an error MUST be indicated.
Using a C array and ASCII:
const uint8_t FOOTER_MAGIC[2] = { 'Y', 'Z' };
@ -396,28 +423,28 @@ The .xz File Format
2.2. Stream Padding
Only the decoders that support decoding of concatenated Streams
must support Stream Padding.
MUST support Stream Padding.
Stream Padding must contain only nul bytes. Any non-nul byte
should be considered as the beginning of a new Stream. To
preserve the four-byte alignment of consecutive Streams, the
size of Stream Padding must be a multiple of four bytes. Empty
Stream Padding is allowed.
Stream Padding MUST contain only null bytes. To preserve the
four-byte alignment of consecutive Streams, the size of Stream
Padding MUST be a multiple of four bytes. Empty Stream Padding
is allowed.
Note that non-empty Stream Padding is allowed at the end of the
file; there doesn't need to be a new Stream after non-empty
Stream Padding. This can be convenient in certain situations
[GNU-tar].
The possibility of Padding should be taken into account when
designing an application that parses the Stream backwards.
The possibility of Padding MUST be taken into account when
designing an application that parses Streams backwards, and
the application supports concatenated Streams.
3. Block
+==============+=================+=======+
| Block Header | Compressed Data | Check |
+==============+=================+=======+
+==============+=================+===============+=======+
| Block Header | Compressed Data | Block Padding | Check |
+==============+=================+===============+=======+
3.1. Block Header
@ -460,11 +487,11 @@ The .xz File Format
Bit(s) Mask Description
0-1 0x03 Number of filters (1-4)
2-5 0x3C Reserved for future use; must be zero for now.
2-5 0x3C Reserved for future use; MUST be zero for now.
6 0x40 The Compressed Size field is present.
7 0x80 The Uncompressed Size field is present.
If any reserved bit is set, the decoder must indicate an error.
If any reserved bit is set, the decoder MUST indicate an error.
It is possible that there is a new field present which the
decoder is not aware of, and can thus parse the Block Header
incorrectly.
@ -475,14 +502,11 @@ The .xz File Format
This field is present only if the appropriate bit is set in
the Block Flags field (see Section 3.1.2).
This field contains the size of the Compressed Data field as
multiple of four bytes, minimum value being four bytes:
real_compressed_size = (stored_compressed_size + 1) * 4;
The size is stored using the encoding described in Section 1.2.
If the Compressed Size does not match the real size of the
Compressed Data field, the decoder must indicate an error.
The Compressed Size field contains the size of the Compressed
Data field, which MUST be non-zero. Compressed Size is stored
using the encoding described in Section 1.2. If the Compressed
Size doesn't match the size of the Compressed Data field, the
decoder MUST indicate an error.
3.1.4. Uncompressed Size
@ -493,7 +517,7 @@ The .xz File Format
The Uncompressed Size field contains the size of the Block
after uncompressing. Uncompressed Size is stored using the
encoding described in Section 1.2. If the Uncompressed Size
does not match the real uncompressed size, the decoder must
does not match the real uncompressed size, the decoder MUST
indicate an error.
Storing the Compressed Size and Uncompressed Size fields serves
@ -532,14 +556,14 @@ The .xz File Format
Filter IDs greater than or equal to 0x4000_0000_0000_0000
(2^62) are reserved for implementation-specific internal use.
These Filter IDs must never be used in List of Filter Flags.
These Filter IDs MUST never be used in List of Filter Flags.
3.1.6. Header Padding
This field contains as many nul byte as it is needed to make
This field contains as many null byte as it is needed to make
the Block Header have the size specified in Block Header Size.
If any of the bytes are not nul bytes, the decoder must
If any of the bytes are not null bytes, the decoder MUST
indicate an error. It is possible that there is a new field
present which the decoder is not aware of, and can thus parse
the Block Header incorrectly.
@ -550,7 +574,7 @@ The .xz File Format
The CRC32 is calculated over everything in the Block Header
field except the CRC32 field itself. It is stored as an
unsigned 32-bit little endian integer. If the calculated
value does not match the stored one, the decoder must indicate
value does not match the stored one, the decoder MUST indicate
an error.
By verifying the CRC32 of the Block Header before parsing the
@ -565,20 +589,23 @@ The .xz File Format
filters in Section 5.3, the format of the filter-specific
encoded data is out of scope of this document.
If the natural size of Compressed Data is not a multiple of
four bytes, it must be padded with 1-3 nul bytes to make it
a multiple of four bytes.
3.3. Block Padding
Block Padding MUST contain 0-3 null bytes to make the size of
the Block a multiple of four bytes. This can be needed when
the size of Compressed Data is not a multiple of four.
3.3. Check
3.4. Check
The type and size of the Check field depends on which bits
are set in the Stream Flags field (see Section 2.1.1.2).
The Check, when used, is calculated from the original
uncompressed data. If the calculated Check does not match the
stored one, the decoder must indicate an error. If the selected
type of Check is not supported by the decoder, it must indicate
stored one, the decoder MUST indicate an error. If the selected
type of Check is not supported by the decoder, it MUST indicate
a warning or error.
@ -611,7 +638,7 @@ The .xz File Format
Stream. The value is stored using the encoding described in
Section 1.2. If the decoder has decoded all the Blocks of the
Stream, and then notices that the Number of Records doesn't
match the real number of Blocks, the decoder must indicate an
match the real number of Blocks, the decoder MUST indicate an
error.
@ -624,39 +651,49 @@ The .xz File Format
| Record | Record | ...
+========+========+
Each Record contains two fields:
Each Record contains information about one Block:
+============+===================+
| Total Size | Uncompressed Size |
+============+===================+
+===============+===================+
| Unpadded Size | Uncompressed Size |
+===============+===================+
If the decoder has decoded all the Blocks of the Stream, it
must verify that the contents of the Records match the real
Total Size and Uncompressed Size of the respective Blocks.
MUST verify that the contents of the Records match the real
Unpadded Size and Uncompressed Size of the respective Blocks.
Implementation hint: It is possible to verify the Index with
constant memory usage by calculating for example SHA256 of both
the real size values and the List of Records, then comparing
the check values. Implementing this using non-cryptographic
check like CRC32 should be avoided unless small code size is
check like CRC32 SHOULD be avoided unless small code size is
important.
If the decoder supports random-access reading, it must verify
that Total Size and Uncompressed Size of every completely
If the decoder supports random-access reading, it MUST verify
that Unpadded Size and Uncompressed Size of every completely
decoded Block match the sizes stored in the Index. If only
partial Block is decoded, the decoder must verify that the
partial Block is decoded, the decoder MUST verify that the
processed sizes don't exceed the sizes stored in the Index.
4.3.1. Total Size
4.3.1. Unpadded Size
This field indicates the encoded size of the respective Block
as multiples of four bytes, minimum value being four bytes:
This field indicates the size of the Block excluding the Block
Padding field. That is, Unpadded Size is the size of the Block
Header, Compressed Data, and Check fields. Unpadded Size is
stored using the encoding described in Section 1.2. The value
MUST never be zero; with the current structure of Blocks, the
actual minimum value for Unpadded Size is five.
real_total_size = (stored_total_size + 1) * 4;
Implementation note: Because the size of the Block Padding
field is not included in Unpadded Size, calculating the total
size of a Stream or doing random-access reading requires
calculating the actual size of the Blocks by rounding Unpadded
Sizes up to the next multiple of four.
The value is stored using the encoding described in Section
1.2.
The reason to exclude Block Padding from Unpadded Size is to
ease making a raw copy of Compressed Data without Block
Padding. This can be useful, for example, if someone wants
to convert Streams to some other file format quickly.
4.3.2. Uncompressed Size
@ -668,7 +705,7 @@ The .xz File Format
4.4. Index Padding
This field must contain 0-3 nul bytes to pad the Index to
This field MUST contain 0-3 null bytes to pad the Index to
a multiple of four bytes.
@ -677,7 +714,7 @@ The .xz File Format
The CRC32 is calculated over everything in the Index field
except the CRC32 field itself. The CRC32 is stored as an
unsigned 32-bit little endian integer. If the calculated
value does not match the stored one, the decoder must indicate
value does not match the stored one, the decoder MUST indicate
an error.
@ -748,7 +785,7 @@ The .xz File Format
gets very little work done.
To prevent this kind of slow files, there are restrictions on
how the filters can be chained. These restrictions must be
how the filters can be chained. These restrictions MUST be
taken into account when designing new filters.
The maximum number of filters in the chain has been limited to
@ -756,11 +793,11 @@ The .xz File Format
Of these three non-last filters, only two are allowed to change
the size of the data.
The non-last filters, that change the size of the data, must
The non-last filters, that change the size of the data, MUST
have a limit how much the decoder can compress the data: the
decoder should produce at least n bytes of output when the
decoder SHOULD produce at least n bytes of output when the
filter is given 2n bytes of input. This limit is not
absolute, but significant deviations must be avoided.
absolute, but significant deviations MUST be avoided.
The above limitations guarantee that if the last filter in the
chain produces 4n bytes of output, the chain as a whole will
@ -797,7 +834,7 @@ The .xz File Format
Bits Mask Description
0-5 0x3F Dictionary Size
6-7 0xC0 Reserved for future use; must be zero for now.
6-7 0xC0 Reserved for future use; MUST be zero for now.
Dictionary Size is encoded with one-bit mantissa and five-bit
exponent. The smallest dictionary size is 4 KiB and the biggest
@ -847,11 +884,6 @@ The .xz File Format
Allow as a non-last filter: Yes
Allow as the last filter: No
Detecting when all of the data has been decoded:
Uncompressed size: Yes
End of Payload Marker: No
End of Input: Yes
Below is the list of filters in this category. The alignment
is the same for both input and output data.
@ -968,7 +1000,7 @@ The .xz File Format
There are several incompatible variations to calculate CRC32
and CRC64. For simplicity and clarity, complete examples are
provided to calculate the checks as they are used in this file
format. Implementations may use different code as long as it
format. Implementations MAY use different code as long as it
gives identical results.
The program below reads data from standard input, calculates
@ -1069,19 +1101,19 @@ The .xz File Format
[RFC-1952]
GZIP file format specification version 4.3
http://www.ietf.org/rfc/rfc1952.txt
- Notation of byte boxes in section `2.1. Overall conventions'
- Notation of byte boxes in section "2.1. Overall conventions"
[RFC-2119]
Key words for use in RFCs to Indicate Requirement Levels
http://www.ietf.org/rfc/rfc2119.txt
[GNU-tar]
GNU tar 1.16.1 manual
GNU tar 1.20 manual
http://www.gnu.org/software/tar/manual/html_node/Blocking-Factor.html
- Node 9.4.2 `Blocking Factor', paragraph that begins
`gzip will complain about trailing garbage'
- Node 9.4.2 "Blocking Factor", paragraph that begins
"gzip will complain about trailing garbage"
- Note that this URL points to the latest version of the
manual, and may some day not contain the note which is in
1.16.1. For the exact version of the manual, download GNU
tar 1.16.1: ftp://ftp.gnu.org/pub/gnu/tar/tar-1.16.1.tar.gz
1.20. For the exact version of the manual, download GNU
tar 1.20: ftp://ftp.gnu.org/pub/gnu/tar/tar-1.20.tar.gz

View file

@ -1,6 +1,5 @@
##
## Copyright (C) 2004-2007 Free Software Foundation, Inc.
## Copyright (C) 2007 Lasse Collin
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@ -13,7 +12,8 @@
## GNU General Public License for more details.
##
## Not using gnulib-tool, at least for now. Less mess this way.
## Not using gnulib-tool, at least for now. It is likely that we won't
## need anything else from Gnulib than getopt_long().
noinst_LIBRARIES = libgnu.a
@ -21,12 +21,12 @@ libgnu_a_SOURCES =
libgnu_a_DEPENDENCIES = $(LIBOBJS)
libgnu_a_LIBADD = $(LIBOBJS)
EXTRA_DIST = gettext.h getopt_.h getopt.c getopt1.c getopt_int.h
EXTRA_DIST = getopt.in.h getopt.c getopt1.c getopt_int.h
BUILT_SOURCES = $(GETOPT_H)
MOSTLYCLEANFILES = getopt.h getopt.h-t
getopt.h: getopt_.h
getopt.h: getopt.in.h
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \
cat $(srcdir)/getopt_.h; \
cat $(srcdir)/getopt.in.h; \
} > $@-t
mv -f $@-t $@

View file

@ -7,16 +7,16 @@
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
GNU Lesser General Public License for more details.
You should have received a copy of the GNU General Public License along
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
@ -35,12 +35,18 @@
# include <unixlib.h>
#endif
/* Completely disable NLS for getopt. We won't include translations for it
anyway. If the system lacks getopt_long, missing translations probably
aren't a problem. */
/*
#ifdef _LIBC
# include <libintl.h>
#else
# include "gettext.h"
# define _(msgid) gettext (msgid)
#endif
*/
#define _(msgid) (msgid)
#if defined _LIBC && defined USE_IN_LIBIO
# include <wchar.h>

View file

@ -4,16 +4,16 @@
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
GNU Lesser General Public License for more details.
You should have received a copy of the GNU General Public License along
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */

View file

@ -4,16 +4,16 @@
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
GNU Lesser General Public License for more details.
You should have received a copy of the GNU General Public License along
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */

View file

@ -1,240 +0,0 @@
/* Convenience header for conditional use of GNU <libintl.h>.
Copyright (C) 1995-1998, 2000-2002, 2004-2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Library General Public License as published
by the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA. */
#ifndef _LIBGETTEXT_H
#define _LIBGETTEXT_H 1
/* NLS can be disabled through the configure --disable-nls option.
*
* Extra hack in LZMA Utils: if DISABLE_NLS is defined, NLS is disabled
* even if ENABLE_NLS is true. See Makefile.am for more information.
*/
#if ENABLE_NLS && !defined(DISABLE_NLS)
/* Get declarations of GNU message catalog functions. */
# include <libintl.h>
/* You can set the DEFAULT_TEXT_DOMAIN macro to specify the domain used by
the gettext() and ngettext() macros. This is an alternative to calling
textdomain(), and is useful for libraries. */
# ifdef DEFAULT_TEXT_DOMAIN
# undef gettext
# define gettext(Msgid) \
dgettext (DEFAULT_TEXT_DOMAIN, Msgid)
# undef ngettext
# define ngettext(Msgid1, Msgid2, N) \
dngettext (DEFAULT_TEXT_DOMAIN, Msgid1, Msgid2, N)
# endif
#else
/* Solaris /usr/include/locale.h includes /usr/include/libintl.h, which
chokes if dcgettext is defined as a macro. So include it now, to make
later inclusions of <locale.h> a NOP. We don't include <libintl.h>
as well because people using "gettext.h" will not include <libintl.h>,
and also including <libintl.h> would fail on SunOS 4, whereas <locale.h>
is OK. */
#if defined(__sun)
# include <locale.h>
#endif
/* Many header files from the libstdc++ coming with g++ 3.3 or newer include
<libintl.h>, which chokes if dcgettext is defined as a macro. So include
it now, to make later inclusions of <libintl.h> a NOP. */
#if defined(__cplusplus) && defined(__GNUG__) && (__GNUC__ >= 3)
# include <cstdlib>
# if (__GLIBC__ >= 2) || _GLIBCXX_HAVE_LIBINTL_H
# include <libintl.h>
# endif
#endif
/* Disabled NLS.
The casts to 'const char *' serve the purpose of producing warnings
for invalid uses of the value returned from these functions.
On pre-ANSI systems without 'const', the config.h file is supposed to
contain "#define const". */
# define gettext(Msgid) ((const char *) (Msgid))
# define dgettext(Domainname, Msgid) ((const char *) (Msgid))
# define dcgettext(Domainname, Msgid, Category) ((const char *) (Msgid))
# define ngettext(Msgid1, Msgid2, N) \
((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
# define dngettext(Domainname, Msgid1, Msgid2, N) \
((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
# define dcngettext(Domainname, Msgid1, Msgid2, N, Category) \
((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
# define textdomain(Domainname) ((const char *) (Domainname))
# define bindtextdomain(Domainname, Dirname) ((const char *) (Dirname))
# define bind_textdomain_codeset(Domainname, Codeset) ((const char *) (Codeset))
#endif
/* A pseudo function call that serves as a marker for the automated
extraction of messages, but does not call gettext(). The run-time
translation is done at a different place in the code.
The argument, String, should be a literal string. Concatenated strings
and other string expressions won't work.
The macro's expansion is not parenthesized, so that it is suitable as
initializer for static 'char[]' or 'const char[]' variables. */
#define gettext_noop(String) String
/* The separator between msgctxt and msgid in a .mo file. */
#define GETTEXT_CONTEXT_GLUE "\004"
/* Pseudo function calls, taking a MSGCTXT and a MSGID instead of just a
MSGID. MSGCTXT and MSGID must be string literals. MSGCTXT should be
short and rarely need to change.
The letter 'p' stands for 'particular' or 'special'. */
#ifdef DEFAULT_TEXT_DOMAIN
# define pgettext(Msgctxt, Msgid) \
pgettext_aux (DEFAULT_TEXT_DOMAIN, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES)
#else
# define pgettext(Msgctxt, Msgid) \
pgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES)
#endif
#define dpgettext(Domainname, Msgctxt, Msgid) \
pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES)
#define dcpgettext(Domainname, Msgctxt, Msgid, Category) \
pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, Category)
#ifdef DEFAULT_TEXT_DOMAIN
# define npgettext(Msgctxt, Msgid, MsgidPlural, N) \
npgettext_aux (DEFAULT_TEXT_DOMAIN, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES)
#else
# define npgettext(Msgctxt, Msgid, MsgidPlural, N) \
npgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES)
#endif
#define dnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N) \
npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES)
#define dcnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N, Category) \
npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, Category)
static inline const char *
pgettext_aux (const char *domain,
const char *msg_ctxt_id, const char *msgid,
int category)
{
const char *translation = dcgettext (domain, msg_ctxt_id, category);
if (translation == msg_ctxt_id)
return msgid;
else
return translation;
}
static inline const char *
npgettext_aux (const char *domain,
const char *msg_ctxt_id, const char *msgid,
const char *msgid_plural, unsigned long int n,
int category)
{
const char *translation =
dcngettext (domain, msg_ctxt_id, msgid_plural, n, category);
if (translation == msg_ctxt_id || translation == msgid_plural)
return (n == 1 ? msgid : msgid_plural);
else
return translation;
}
/* The same thing extended for non-constant arguments. Here MSGCTXT and MSGID
can be arbitrary expressions. But for string literals these macros are
less efficient than those above. */
#include <string.h>
#define _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS 1
#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
#include <stdlib.h>
#endif
#define pgettext_expr(Msgctxt, Msgid) \
dcpgettext_expr (NULL, Msgctxt, Msgid, LC_MESSAGES)
#define dpgettext_expr(Domainname, Msgctxt, Msgid) \
dcpgettext_expr (Domainname, Msgctxt, Msgid, LC_MESSAGES)
static inline const char *
dcpgettext_expr (const char *domain,
const char *msgctxt, const char *msgid,
int category)
{
size_t msgctxt_len = strlen (msgctxt) + 1;
size_t msgid_len = strlen (msgid) + 1;
const char *translation;
#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
char msg_ctxt_id[msgctxt_len + msgid_len];
#else
char buf[1024];
char *msg_ctxt_id =
(msgctxt_len + msgid_len <= sizeof (buf)
? buf
: (char *) malloc (msgctxt_len + msgid_len));
if (msg_ctxt_id != NULL)
#endif
{
memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1);
msg_ctxt_id[msgctxt_len - 1] = '\004';
memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len);
translation = dcgettext (domain, msg_ctxt_id, category);
#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
if (msg_ctxt_id != buf)
free (msg_ctxt_id);
#endif
if (translation != msg_ctxt_id)
return translation;
}
return msgid;
}
#define npgettext_expr(Msgctxt, Msgid, MsgidPlural, N) \
dcnpgettext_expr (NULL, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES)
#define dnpgettext_expr(Domainname, Msgctxt, Msgid, MsgidPlural, N) \
dcnpgettext_expr (Domainname, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES)
static inline const char *
dcnpgettext_expr (const char *domain,
const char *msgctxt, const char *msgid,
const char *msgid_plural, unsigned long int n,
int category)
{
size_t msgctxt_len = strlen (msgctxt) + 1;
size_t msgid_len = strlen (msgid) + 1;
const char *translation;
#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
char msg_ctxt_id[msgctxt_len + msgid_len];
#else
char buf[1024];
char *msg_ctxt_id =
(msgctxt_len + msgid_len <= sizeof (buf)
? buf
: (char *) malloc (msgctxt_len + msgid_len));
if (msg_ctxt_id != NULL)
#endif
{
memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1);
msg_ctxt_id[msgctxt_len - 1] = '\004';
memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len);
translation = dcngettext (domain, msg_ctxt_id, msgid_plural, n, category);
#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
if (msg_ctxt_id != buf)
free (msg_ctxt_id);
#endif
if (!(translation == msg_ctxt_id || translation == msgid_plural))
return translation;
}
return (n == 1 ? msgid : msgid_plural);
}
#endif /* _LIBGETTEXT_H */

View file

@ -1,5 +1,5 @@
# getopt.m4 serial 13
dnl Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
# getopt.m4 serial 14 (modified version)
dnl Copyright (C) 2002-2006, 2008 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
@ -13,7 +13,6 @@ AC_DEFUN([gl_GETOPT_SUBSTITUTE],
AC_LIBOBJ([getopt])
AC_LIBOBJ([getopt1])
gl_GETOPT_SUBSTITUTE_HEADER
gl_PREREQ_GETOPT
])
AC_DEFUN([gl_GETOPT_SUBSTITUTE_HEADER],
@ -31,41 +30,32 @@ AC_DEFUN([gl_GETOPT_CHECK_HEADERS],
AC_CHECK_HEADERS([getopt.h], [], [GETOPT_H=getopt.h])
fi
dnl BSD getopt_long uses a way to reset option processing, that is different
dnl from GNU and Solaris (which copied the GNU behavior). We support both
dnl GNU and BSD style resetting of getopt_long(), so there's no need to use
dnl GNU getopt_long() on BSD due to different resetting style.
dnl
dnl With getopt_long(), some BSD versions have a bug in handling optional
dnl arguments. This bug appears only if the environment variable
dnl POSIXLY_CORRECT has been set, so it shouldn't be too bad in most
dnl cases; probably most don't have that variable set. But if we actually
dnl hit this bug, it is a real problem due to our heavy use of optional
dnl arguments.
dnl
dnl According to CVS logs, the bug was introduced in OpenBSD in 2003-09-22
dnl and copied to FreeBSD in 2004-02-24. It was fixed in both in 2006-09-22,
dnl so the affected versions shouldn't be popular anymore anyway. NetBSD
dnl never had this bug. TODO: What about Darwin and others?
if test -z "$GETOPT_H"; then
AC_CHECK_FUNCS([getopt_long_only], [], [GETOPT_H=getopt.h])
fi
dnl BSD getopt_long uses an incompatible method to reset option processing,
dnl and (as of 2004-10-15) mishandles optional option-arguments.
if test -z "$GETOPT_H"; then
AC_CHECK_DECL([optreset], [GETOPT_H=getopt.h], [], [#include <getopt.h>])
AC_CHECK_DECL([optreset],
[AC_DEFINE([HAVE_OPTRESET], 1,
[Define to 1 if getopt.h declares extern int optreset.])],
[], [#include <getopt.h>])
fi
dnl Solaris 10 getopt doesn't handle `+' as a leading character in an
dnl option string (as of 2005-05-05).
if test -z "$GETOPT_H"; then
AC_CACHE_CHECK([for working GNU getopt function], [gl_cv_func_gnu_getopt],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM([#include <getopt.h>],
[[
char *myargv[3];
myargv[0] = "conftest";
myargv[1] = "-+";
myargv[2] = 0;
return getopt (2, myargv, "+a") != '?';
]])],
[gl_cv_func_gnu_getopt=yes],
[gl_cv_func_gnu_getopt=no],
[dnl cross compiling - pessimistically guess based on decls
dnl Solaris 10 getopt doesn't handle `+' as a leading character in an
dnl option string (as of 2005-05-05).
AC_CHECK_DECL([getopt_clip],
[gl_cv_func_gnu_getopt=no], [gl_cv_func_gnu_getopt=yes],
[#include <getopt.h>])])])
if test "$gl_cv_func_gnu_getopt" = "no"; then
GETOPT_H=getopt.h
fi
fi
dnl option string (as of 2005-05-05). We don't use that feature, so this
dnl is not a problem for us. Thus, the respective test was removed here.
])
AC_DEFUN([gl_GETOPT_IFELSE],
@ -75,9 +65,3 @@ AC_DEFUN([gl_GETOPT_IFELSE],
])
AC_DEFUN([gl_GETOPT], [gl_GETOPT_IFELSE([gl_GETOPT_SUBSTITUTE])])
# Prerequisites of lib/getopt*.
AC_DEFUN([gl_PREREQ_GETOPT],
[
AC_CHECK_DECLS_ONCE([getenv])
])

View file

@ -16,20 +16,29 @@
// NOTE: We assume that config.h is already #included.
// byteswap.h is a GNU extension. It contains inline assembly versions
// for byteswapping. When byteswap.h is not available, we use generic code.
// At least glibc has byteswap.h which contains inline assembly code for
// byteswapping. Some systems have byteswap.h but lack one or more of the
// bswap_xx macros/functions, which is why we check them separately even
// if byteswap.h is available.
#ifdef HAVE_BYTESWAP_H
# include <byteswap.h>
#else
#endif
#ifndef HAVE_BSWAP_16
# define bswap_16(num) \
(((num) << 8) | ((num) >> 8))
#endif
#ifndef HAVE_BSWAP_32
# define bswap_32(num) \
( (((num) << 24) ) \
| (((num) << 8) & UINT32_C(0x00FF0000)) \
| (((num) >> 8) & UINT32_C(0x0000FF00)) \
| (((num) >> 24) ) )
#endif
#ifndef HAVE_BSWAP_64
# define bswap_64(num) \
( (((num) << 56) ) \
| (((num) << 40) & UINT64_C(0x00FF000000000000)) \

View file

@ -23,6 +23,10 @@
# endif
#endif
#if defined(HAVE_PHYSMEM_SYSCONF) || defined(HAVE_NCPU_SYSCONF)
# include <unistd.h>
#endif
/// \brief Get the amount of physical memory in bytes
///

View file

@ -111,6 +111,7 @@
#endif
#include <stdlib.h>
#include <assert.h>
// Pre-C99 systems lack stdbool.h. All the code in LZMA Utils must be written
// so that it works with fake bool type, for example:
@ -134,17 +135,6 @@ typedef unsigned char _Bool;
# define __bool_true_false_are_defined 1
#endif
#ifdef HAVE_ASSERT_H
# include <assert.h>
#else
# ifdef NDEBUG
# define assert(x)
# else
// TODO: Pretty bad assert macro.
# define assert(x) (!(x) && abort())
# endif
#endif
// string.h should be enough but let's include strings.h and memory.h too if
// they exists, since that shouldn't do any harm, but may improve portability.
#ifdef HAVE_STRING_H

View file

@ -1,6 +1,6 @@
/**
* \file lzma/block.h
* \brief .lzma Block handling
* \brief .xz Block handling
*
* \author Copyright (C) 1999-2006 Igor Pavlov
* \author Copyright (C) 2007 Lasse Collin
@ -131,11 +131,10 @@ typedef struct {
*
* \note Because of the array is terminated with
* .id = LZMA_VLI_UNKNOWN, the actual array must
* have LZMA_BLOCK_FILTERS_MAX + 1 members or the Block
* have LZMA_FILTERS_MAX + 1 members or the Block
* Header decoder will overflow the buffer.
*/
lzma_filter *filters;
# define LZMA_BLOCK_FILTERS_MAX 4
} lzma_block;
@ -148,6 +147,8 @@ typedef struct {
* The size can be calculated from the first byte of a Block using this macro.
* Note that if the first byte is 0x00, it indicates beginning of Index; use
* this macro only when the byte is not 0x00.
*
* There is no encoding macro, because Block Header encoder is enough for that.
*/
#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4)
@ -211,38 +212,50 @@ extern lzma_ret lzma_block_header_decode(lzma_block *options,
/**
* \brief Sets Compressed Size according to Total Size
* \brief Sets Compressed Size according to Unpadded Size
*
* Block Header stores Compressed Size, but Index has Total Size. If the
* Block Header stores Compressed Size, but Index has Unpadded Size. If the
* application has already parsed the Index and is now decoding Blocks,
* it can calculate Compressed Size from Total Size. This function does
* it can calculate Compressed Size from Unpadded Size. This function does
* exactly that with error checking, so application doesn't need to check,
* for example, if the value in Index is too small to contain even the
* Block Header. Note that you need to call this function after decoding
* Block Header. Note that you need to call this function _after_ decoding
* the Block Header field.
*
* \return - LZMA_OK: options->compressed_size was set successfully.
* - LZMA_DATA_ERROR: total_size is too small compared to
* - LZMA_DATA_ERROR: unpadded_size is too small compared to
* options->header_size and lzma_check_sizes[options->check].
* - LZMA_PROG_ERROR: Some values are invalid. For example,
* total_size and options->header_size must be multiples
* of four, total_size must be at least 12, and
* options->header_size must be a multiple of four, and
* options->header_size between 8 and 1024 inclusive.
*/
extern lzma_ret lzma_block_total_size_set(
lzma_block *options, lzma_vli total_size)
extern lzma_ret lzma_block_compressed_size(
lzma_block *options, lzma_vli unpadded_size)
lzma_attr_warn_unused_result;
/**
* \brief Calculates Total Size
* \brief Calculates Unpadded Size
*
* This function can be useful after decoding a Block to get Total Size
* This function can be useful after decoding a Block to get Unpadded Size
* that is stored in Index.
*
* \return Total Size on success, or zero on error.
* \return Unpadded Size on success, or zero on error.
*/
extern lzma_vli lzma_block_total_size_get(const lzma_block *options)
extern lzma_vli lzma_block_unpadded_size(const lzma_block *options)
lzma_attr_pure;
/**
* \brief Calculates the total encoded size of a Block
*
* This is equivalent to lzma_block_unpadded_size() except that the returned
* value includes the size of the Block Padding field.
*
* \return On success, total encoded size of the Block. On error,
* zero is returned.
*/
extern lzma_vli lzma_block_total_size(const lzma_block *options)
lzma_attr_pure;
@ -255,8 +268,6 @@ extern lzma_vli lzma_block_total_size_get(const lzma_block *options)
* \return - LZMA_OK: All good, continue with lzma_code().
* - LZMA_MEM_ERROR
* - LZMA_OPTIONS_ERROR
* - LZMA_DATA_ERROR: Limits (total_limit and uncompressed_limit)
* have been reached already.
* - LZMA_UNSUPPORTED_CHECK: options->check specfies a Check
* that is not supported by this buid of liblzma. Initializing
* the encoder failed.

View file

@ -54,6 +54,14 @@ typedef struct {
} lzma_filter;
/**
* \brief Maximum number of filters in a chain
*
* FIXME desc
*/
#define LZMA_FILTERS_MAX 4
/**
* \brief Test if the given Filter ID is supported for encoding
*

View file

@ -32,12 +32,24 @@ typedef struct lzma_index_s lzma_index;
*/
typedef struct {
/**
* Total Size of a Block.
* \brief Total encoded size of a Block including Block Padding
*
* This value is useful if you need to know the actual size of the
* Block that the Block decoder will read.
*/
lzma_vli total_size;
/**
* Uncompressed Size of a Block
* \brief Encoded size of a Block excluding Block Padding
*
* This value is stored in the Index. When doing random-access
* reading, you should give this value to the Block decoder along
* with uncompressed_size.
*/
lzma_vli unpadded_size;
/**
* \brief Uncompressed Size of a Block
*/
lzma_vli uncompressed_size;
@ -80,7 +92,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator);
* \brief Add a new Record to an Index
*
* \param index Pointer to a lzma_index structure
* \param total_size Total Size of a Block
* \param unpadded_size Unpadded Size of a Block
* \param uncompressed_size Uncompressed Size of a Block, or
* LZMA_VLI_UNKNOWN to indicate padding.
*
@ -92,7 +104,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator);
* - LZMA_PROG_ERROR
*/
extern lzma_ret lzma_index_append(lzma_index *i, lzma_allocator *allocator,
lzma_vli total_size, lzma_vli uncompressed_size)
lzma_vli unpadded_size, lzma_vli uncompressed_size)
lzma_attr_warn_unused_result;

View file

@ -57,7 +57,7 @@ extern void lzma_index_hash_end(
* \brief Add a new Record to an Index hash
*
* \param index Pointer to a lzma_index_hash structure
* \param total_size Total Size of a Block
* \param unpadded_size Unpadded Size of a Block
* \param uncompressed_size Uncompressed Size of a Block
*
* \return - LZMA_OK
@ -67,7 +67,7 @@ extern void lzma_index_hash_end(
* used when lzma_index_hash_decode() has already been used.
*/
extern lzma_ret lzma_index_hash_append(lzma_index_hash *index_hash,
lzma_vli total_size, lzma_vli uncompressed_size)
lzma_vli unpadded_size, lzma_vli uncompressed_size)
lzma_attr_warn_unused_result;

View file

@ -33,13 +33,13 @@ struct lzma_coder_s {
lzma_next_coder next;
/// Decoding options; we also write Compressed Size and Uncompressed
/// Size back to this structure when the encoding has been finished.
/// Size back to this structure when the decoding has been finished.
lzma_block *options;
/// Compressed Size calculated while encoding
/// Compressed Size calculated while decoding
lzma_vli compressed_size;
/// Uncompressed Size calculated while encoding
/// Uncompressed Size calculated while decoding
lzma_vli uncompressed_size;
/// Maximum allowed Compressed Size; this takes into account the
@ -110,25 +110,6 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
if (ret != LZMA_STREAM_END)
return ret;
coder->sequence = SEQ_PADDING;
}
// Fall through
case SEQ_PADDING:
// Compressed Data is padded to a multiple of four bytes.
while (coder->compressed_size & 3) {
if (*in_pos >= in_size)
return LZMA_OK;
if (in[(*in_pos)++] != 0x00)
return LZMA_DATA_ERROR;
if (update_size(&coder->compressed_size, 1,
coder->compressed_limit))
return LZMA_DATA_ERROR;
}
// Compressed and Uncompressed Sizes are now at their final
// values. Verify that they match the values given to us.
if (!is_size_valid(coder->compressed_size,
@ -142,6 +123,27 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
coder->options->compressed_size = coder->compressed_size;
coder->options->uncompressed_size = coder->uncompressed_size;
coder->sequence = SEQ_PADDING;
}
// Fall through
case SEQ_PADDING:
// Compressed Data is padded to a multiple of four bytes.
while (coder->compressed_size & 3) {
// We use compressed_size here just get the Padding
// right. The actual Compressed Size was stored to
// coder->options already, and won't be modified by
// us anymore.
++coder->compressed_size;
if (*in_pos >= in_size)
return LZMA_OK;
if (in[(*in_pos)++] != 0x00)
return LZMA_DATA_ERROR;
}
if (coder->options->check == LZMA_CHECK_NONE)
return LZMA_STREAM_END;
@ -193,14 +195,11 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
{
lzma_next_coder_init(lzma_block_decoder_init, next, allocator);
// While lzma_block_total_size_get() is meant to calculate the Total
// Size, it also validates the options excluding the filters.
if (lzma_block_total_size_get(options) == 0)
return LZMA_PROG_ERROR;
// options->check is used for array indexing so we need to know that
// it is in the valid range.
if ((unsigned)(options->check) > LZMA_CHECK_ID_MAX)
// Validate the options. lzma_block_unpadded_size() does that for us
// except for Uncompressed Size and filters. Filters are validated
// by the raw decoder.
if (lzma_block_unpadded_size(options) == 0
|| !lzma_vli_is_valid(options->uncompressed_size))
return LZMA_PROG_ERROR;
// Allocate and initialize *next->coder if needed.
@ -221,8 +220,8 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
next->coder->uncompressed_size = 0;
// If Compressed Size is not known, we calculate the maximum allowed
// value so that Total Size of the Block still is a valid VLI and
// a multiple of four.
// value so that encoded size of the Block (including Block Padding)
// is still a valid VLI and a multiple of four.
next->coder->compressed_limit
= options->compressed_size == LZMA_VLI_UNKNOWN
? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))

View file

@ -27,8 +27,8 @@
/// take into account the headers etc. to determine the exact maximum size
/// of the Compressed Data field, but the complexity would give us nothing
/// useful. Instead, limit the size of Compressed Data so that even with
/// biggest possible Block Header and Check fields the total size of the
/// Block stays as valid VLI. This way we don't produce incorrect output
/// biggest possible Block Header and Check fields the total encoded size of
/// the Block stays as valid VLI. This way we don't produce incorrect output
/// if someone will really try creating a Block of 8 EiB.
///
/// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of
@ -41,9 +41,9 @@ struct lzma_coder_s {
/// The filters in the chain; initialized with lzma_raw_decoder_init().
lzma_next_coder next;
/// Encoding options; we also write Total Size, Compressed Size, and
/// Uncompressed Size back to this structure when the encoding has
/// been finished.
/// Encoding options; we also write Unpadded Size, Compressed Size,
/// and Uncompressed Size back to this structure when the encoding
/// has been finished.
lzma_block *options;
enum {
@ -58,8 +58,8 @@ struct lzma_coder_s {
/// Uncompressed Size calculated while encoding
lzma_vli uncompressed_size;
/// Position when writing out the Check field
size_t check_pos;
/// Position in Block Padding and the Check fields
size_t pos;
/// Check of the uncompressed data
lzma_check_state check;
@ -106,6 +106,11 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator,
assert(*in_pos == in_size);
assert(action == LZMA_FINISH);
// Copy the values into coder->options. The caller
// may use this information to construct Index.
coder->options->compressed_size = coder->compressed_size;
coder->options->uncompressed_size = coder->uncompressed_size;
coder->sequence = SEQ_PADDING;
}
@ -113,28 +118,21 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator,
case SEQ_PADDING:
// Pad Compressed Data to a multiple of four bytes.
while (coder->compressed_size & 3) {
while ((coder->compressed_size + coder->pos) & 3) {
if (*out_pos >= out_size)
return LZMA_OK;
out[*out_pos] = 0x00;
++*out_pos;
// No need to use check for overflow here since we
// have already checked in SEQ_CODE that Compressed
// Size will stay in proper limits.
++coder->compressed_size;
++coder->pos;
}
// Copy the values into coder->options. The caller
// may use this information to construct Index.
coder->options->compressed_size = coder->compressed_size;
coder->options->uncompressed_size = coder->uncompressed_size;
if (coder->options->check == LZMA_CHECK_NONE)
return LZMA_STREAM_END;
lzma_check_finish(&coder->check, coder->options->check);
coder->pos = 0;
coder->sequence = SEQ_CHECK;
// Fall through
@ -144,11 +142,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator,
= lzma_check_size(coder->options->check);
while (*out_pos < out_size) {
out[*out_pos] = coder->check.buffer.u8[
coder->check_pos];
out[*out_pos] = coder->check.buffer.u8[coder->pos];
++*out_pos;
if (++coder->check_pos == check_size)
if (++coder->pos == check_size)
return LZMA_STREAM_END;
}
@ -199,9 +196,9 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
next->coder->options = options;
next->coder->compressed_size = 0;
next->coder->uncompressed_size = 0;
next->coder->pos = 0;
// Initialize the check
next->coder->check_pos = 0;
lzma_check_init(&next->coder->check, options->check);
// Initialize the requested filters.

View file

@ -27,7 +27,7 @@ free_properties(lzma_block *options, lzma_allocator *allocator)
// Free allocated filter options. The last array member is not
// touched after the initialization in the beginning of
// lzma_block_header_decode(), so we don't need to touch that here.
for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) {
for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) {
lzma_free(options->filters[i].options, allocator);
options->filters[i].id = LZMA_VLI_UNKNOWN;
options->filters[i].options = NULL;
@ -48,24 +48,19 @@ lzma_block_header_decode(lzma_block *options,
// Initialize the filter options array. This way the caller can
// safely free() the options even if an error occurs in this function.
for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) {
for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) {
options->filters[i].id = LZMA_VLI_UNKNOWN;
options->filters[i].options = NULL;
}
size_t in_size = options->header_size;
// Validate. The caller must have set options->header_size with
// lzma_block_header_size_decode() macro, so it is a programming error
// if these tests fail.
if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN
|| in_size > LZMA_BLOCK_HEADER_SIZE_MAX
|| (in_size & 3)
|| lzma_block_header_size_decode(in[0]) != in_size)
// Validate Block Header Size and Check type. The caller must have
// already set these, so it is a programming error if this test fails.
if (lzma_block_header_size_decode(in[0]) != options->header_size
|| (unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
return LZMA_PROG_ERROR;
// Exclude the CRC32 field.
in_size -= 4;
const size_t in_size = options->header_size - 4;
// Verify CRC32
if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size))
@ -83,15 +78,9 @@ lzma_block_header_decode(lzma_block *options,
return_if_error(lzma_vli_decode(&options->compressed_size,
NULL, in, &in_pos, in_size));
if (options->compressed_size > LZMA_VLI_MAX / 4 - 1)
return LZMA_DATA_ERROR;
options->compressed_size = (options->compressed_size + 1) * 4;
// Check that Total Size (that is, size of
// Block Header + Compressed Data + Check) is
// representable as a VLI.
if (lzma_block_total_size_get(options) == 0)
// Validate Compressed Size. This checks that it isn't zero
// and that the total size of the Block is a valid VLI.
if (lzma_block_unpadded_size(options) == 0)
return LZMA_DATA_ERROR;
} else {
options->compressed_size = LZMA_VLI_UNKNOWN;

View file

@ -25,21 +25,20 @@ extern LZMA_API lzma_ret
lzma_block_header_size(lzma_block *options)
{
// Block Header Size + Block Flags + CRC32.
size_t size = 1 + 1 + 4;
uint32_t size = 1 + 1 + 4;
// Compressed Size
if (options->compressed_size != LZMA_VLI_UNKNOWN) {
if (options->compressed_size > LZMA_VLI_MAX / 4 - 1
|| options->compressed_size == 0
|| (options->compressed_size & 3))
const uint32_t add = lzma_vli_size(options->compressed_size);
if (add == 0 || options->compressed_size == 0)
return LZMA_PROG_ERROR;
size += lzma_vli_size(options->compressed_size / 4 - 1);
size += add;
}
// Uncompressed Size
if (options->uncompressed_size != LZMA_VLI_UNKNOWN) {
const size_t add = lzma_vli_size(options->uncompressed_size);
const uint32_t add = lzma_vli_size(options->uncompressed_size);
if (add == 0)
return LZMA_PROG_ERROR;
@ -51,10 +50,9 @@ lzma_block_header_size(lzma_block *options)
|| options->filters[0].id == LZMA_VLI_UNKNOWN)
return LZMA_PROG_ERROR;
for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN;
++i) {
for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
// Don't allow too many filters.
if (i == 4)
if (i == LZMA_FILTERS_MAX)
return LZMA_PROG_ERROR;
uint32_t add;
@ -65,12 +63,13 @@ lzma_block_header_size(lzma_block *options)
}
// Pad to a multiple of four bytes.
options->header_size = (size + 3) & ~(size_t)(3);
options->header_size = (size + 3) & ~UINT32_C(3);
// NOTE: We don't verify that Total Size of the Block stays within
// limits. This is because it is possible that we are called with
// exaggerated values to reserve space for Block Header, and later
// called again with lower, real values.
// NOTE: We don't verify that the encoded size of the Block stays
// within limits. This is because it is possible that we are called
// with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve
// space for Block Header, and later called again with lower,
// real values.
return LZMA_OK;
}
@ -79,9 +78,9 @@ lzma_block_header_size(lzma_block *options)
extern LZMA_API lzma_ret
lzma_block_header_encode(const lzma_block *options, uint8_t *out)
{
if ((options->header_size & 3)
|| options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
|| options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX)
// Valdidate everything but filters.
if (lzma_block_unpadded_size(options) == 0
|| !lzma_vli_is_valid(options->uncompressed_size))
return LZMA_PROG_ERROR;
// Indicate the size of the buffer _excluding_ the CRC32 field.
@ -90,32 +89,28 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out)
// Store the Block Header Size.
out[0] = out_size / 4;
// We write Block Flags a little later.
// We write Block Flags in pieces.
out[1] = 0x00;
size_t out_pos = 2;
// Compressed Size
if (options->compressed_size != LZMA_VLI_UNKNOWN) {
// Compressed Size must be non-zero, fit into a 63-bit
// integer and be a multiple of four. Also the Total Size
// of the Block must fit into 63-bit integer.
if (options->compressed_size == 0
|| (options->compressed_size & 3)
|| options->compressed_size
> LZMA_VLI_MAX
|| lzma_block_total_size_get(options) == 0)
return LZMA_PROG_ERROR;
return_if_error(lzma_vli_encode(
options->compressed_size / 4 - 1, NULL,
options->compressed_size, NULL,
out, &out_pos, out_size));
out[1] |= 0x40;
}
// Uncompressed Size
if (options->uncompressed_size != LZMA_VLI_UNKNOWN)
if (options->uncompressed_size != LZMA_VLI_UNKNOWN) {
return_if_error(lzma_vli_encode(
options->uncompressed_size, NULL,
out, &out_pos, out_size));
out[1] |= 0x80;
}
// Filter Flags
if (options->filters == NULL
|| options->filters[0].id == LZMA_VLI_UNKNOWN)
@ -124,24 +119,16 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out)
size_t filter_count = 0;
do {
// There can be at maximum of four filters.
if (filter_count == 4)
if (filter_count == LZMA_FILTERS_MAX)
return LZMA_PROG_ERROR;
return_if_error(lzma_filter_flags_encode(
options->filters + filter_count,
out, &out_pos, out_size));
} while (options->filters[++filter_count].id
!= LZMA_VLI_UNKNOWN);
} while (options->filters[++filter_count].id != LZMA_VLI_UNKNOWN);
// Block Flags
out[1] = filter_count - 1;
if (options->compressed_size != LZMA_VLI_UNKNOWN)
out[1] |= 0x40;
if (options->uncompressed_size != LZMA_VLI_UNKNOWN)
out[1] |= 0x80;
out[1] |= filter_count - 1;
// Padding
memzero(out + out_pos, out_size - out_pos);

View file

@ -18,10 +18,11 @@
///////////////////////////////////////////////////////////////////////////////
#include "common.h"
#include "index.h"
extern LZMA_API lzma_ret
lzma_block_total_size_set(lzma_block *options, lzma_vli total_size)
lzma_block_compressed_size(lzma_block *options, lzma_vli total_size)
{
// Validate.
if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
@ -45,29 +46,47 @@ lzma_block_total_size_set(lzma_block *options, lzma_vli total_size)
extern LZMA_API lzma_vli
lzma_block_total_size_get(const lzma_block *options)
lzma_block_unpadded_size(const lzma_block *options)
{
// Validate the values that we are interested in.
// Validate the values that we are interested in i.e. all but
// Uncompressed Size and the filters.
//
// NOTE: This function is used for validation too, so it is
// essential that these checks are always done even if
// Compressed Size is unknown.
if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
|| options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX
|| (options->header_size & 3)
|| (unsigned)(options->check) > LZMA_CHECK_ID_MAX)
|| !lzma_vli_is_valid(options->compressed_size)
|| options->compressed_size == 0
|| (unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
return 0;
// If Compressed Size is unknown, return that we cannot know
// Total Size either.
// size of the Block either.
if (options->compressed_size == LZMA_VLI_UNKNOWN)
return LZMA_VLI_UNKNOWN;
const lzma_vli total_size = options->compressed_size
// Calculate Unpadded Size and validate it.
const lzma_vli unpadded_size = options->compressed_size
+ options->header_size
+ lzma_check_size(options->check);
// Validate the calculated Total Size.
if (options->compressed_size > LZMA_VLI_MAX
|| (options->compressed_size & 3)
|| total_size > LZMA_VLI_MAX)
assert(unpadded_size >= UNPADDED_SIZE_MIN);
if (unpadded_size > UNPADDED_SIZE_MAX)
return 0;
return total_size;
return unpadded_size;
}
extern LZMA_API lzma_vli
lzma_block_total_size(const lzma_block *options)
{
lzma_vli unpadded_size = lzma_block_unpadded_size(options);
if (unpadded_size != 0 && unpadded_size != LZMA_VLI_UNKNOWN)
unpadded_size = vli_ceil4(unpadded_size);
return unpadded_size;
}

View file

@ -66,10 +66,6 @@
| LZMA_CONCATENATED )
///////////
// Types //
///////////
/// Type of encoder/decoder specific data; the actual structure is defined
/// differently in different coders.
typedef struct lzma_coder_s lzma_coder;
@ -187,10 +183,6 @@ struct lzma_internal_s {
};
///////////////
// Functions //
///////////////
/// Allocates memory
extern void *lzma_alloc(size_t size, lzma_allocator *allocator)
lzma_attribute((malloc));

View file

@ -164,7 +164,7 @@ validate_chain(const lzma_filter *filters, size_t *count)
// There must be 1-4 filters. The last filter must be usable as
// the last filter in the chain. At maximum of three filters are
// allowed to change the size of the data.
if (i > LZMA_BLOCK_FILTERS_MAX || !last_ok || changes_size_count > 3)
if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3)
return LZMA_OPTIONS_ERROR;
*count = i;
@ -182,7 +182,7 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
return_if_error(validate_chain(options, &count));
// Set the filter functions and copy the options pointer.
lzma_filter_info filters[LZMA_BLOCK_FILTERS_MAX + 1];
lzma_filter_info filters[LZMA_FILTERS_MAX + 1];
if (is_encoder) {
for (size_t i = 0; i < count; ++i) {
// The order of the filters is reversed in the

View file

@ -20,24 +20,34 @@
#include "index.h"
/// Number of Records to allocate at once.
/// Number of Records to allocate at once in the unrolled list.
#define INDEX_GROUP_SIZE 256
typedef struct lzma_index_group_s lzma_index_group;
struct lzma_index_group_s {
/// Next group
/// Previous group
lzma_index_group *prev;
/// Previous group
/// Next group
lzma_index_group *next;
/// Index of the last Record in this group
size_t last;
/// Total Size fields as cumulative sum relative to the beginning
/// of the group. The total size of the group is total_sums[last].
lzma_vli total_sums[INDEX_GROUP_SIZE];
/// Unpadded Size fields as special cumulative sum relative to the
/// beginning of the group. It's special in sense that the previous
/// value is rounded up the next multiple of four with before
/// calculating the new value. The total encoded size of the Blocks
/// in the group is unpadded_sums[last] rounded up to the next
/// multiple of four.
///
/// For example, if the Unpadded Sizes are 39, 57, and 81, the stored
/// values are 39, 97 (40 + 57), and 181 (100 + 181). The total
/// encoded size of these Blocks is 184.
///
/// This encoding is nice from point of view of lzma_index_locate().
lzma_vli unpadded_sums[INDEX_GROUP_SIZE];
/// Uncompressed Size fields as cumulative sum relative to the
/// beginning of the group. The uncompressed size of the group is
@ -56,19 +66,13 @@ struct lzma_index_s {
/// Uncompressed size of the Stream
lzma_vli uncompressed_size;
/// Number of non-padding records. This is needed by Index encoder.
/// Number of non-padding records. This is needed for Index encoder.
lzma_vli count;
/// Size of the List of Records field; this is updated every time
/// a new non-padding Record is added.
lzma_vli index_list_size;
/// This is zero if no Indexes have been combined with
/// lzma_index_cat(). With combined Indexes, this contains the sizes
/// of all but latest the Streams, including possible Stream Padding
/// fields.
lzma_vli padding_size;
/// First group of Records
lzma_index_group *head;
@ -80,8 +84,8 @@ struct lzma_index_s {
/// Group where the current read position is.
lzma_index_group *group;
/// The most recently read record in *group
lzma_vli record;
/// The most recently read Record in *group
size_t record;
/// Uncompressed offset of the beginning of *group relative
/// to the beginning of the Stream
@ -102,6 +106,10 @@ struct lzma_index_s {
/// Stream. This is needed when a new Index is concatenated
/// to this lzma_index structure.
lzma_vli index_list_size;
/// Total size of all but the last Stream and all Stream
/// Padding fields.
lzma_vli streams_size;
} old;
};
@ -136,12 +144,12 @@ lzma_index_init(lzma_index *i, lzma_allocator *allocator)
i->uncompressed_size = 0;
i->count = 0;
i->index_list_size = 0;
i->padding_size = 0;
i->head = NULL;
i->tail = NULL;
i->current.group = NULL;
i->old.count = 0;
i->old.index_list_size = 0;
i->old.streams_size = 0;
return i;
}
@ -195,12 +203,12 @@ lzma_index_file_size(const lzma_index *i)
{
// If multiple Streams are concatenated, the Stream Header, Index,
// and Stream Footer fields of all but the last Stream are already
// included in padding_size. Thus, we need to calculate only the
// included in old.streams_size. Thus, we need to calculate only the
// size of the last Index, not all Indexes.
return i->total_size + i->padding_size
return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size
+ index_size(i->count - i->old.count,
i->index_list_size - i->old.index_list_size)
+ LZMA_STREAM_HEADER_SIZE * 2;
+ LZMA_STREAM_HEADER_SIZE;
}
@ -219,10 +227,11 @@ lzma_index_padding_size(const lzma_index *i)
}
/// Helper function for index_append()
/// Appends a new Record to the Index. If needed, this allocates a new
/// Record group.
static lzma_ret
index_append_real(lzma_index *i, lzma_allocator *allocator,
lzma_vli total_size, lzma_vli uncompressed_size,
lzma_vli unpadded_size, lzma_vli uncompressed_size,
bool is_padding)
{
// Add the new record.
@ -237,7 +246,7 @@ index_append_real(lzma_index *i, lzma_allocator *allocator,
g->prev = i->tail;
g->next = NULL;
g->last = 0;
g->total_sums[0] = total_size;
g->unpadded_sums[0] = unpadded_size;
g->uncompressed_sums[0] = uncompressed_size;
g->paddings[0] = is_padding;
@ -252,9 +261,9 @@ index_append_real(lzma_index *i, lzma_allocator *allocator,
} else {
// i->tail has space left for at least one record.
i->tail->total_sums[i->tail->last + 1]
= i->tail->total_sums[i->tail->last]
+ total_size;
i->tail->unpadded_sums[i->tail->last + 1]
= unpadded_size + vli_ceil4(
i->tail->unpadded_sums[i->tail->last]);
i->tail->uncompressed_sums[i->tail->last + 1]
= i->tail->uncompressed_sums[i->tail->last]
+ uncompressed_size;
@ -266,13 +275,14 @@ index_append_real(lzma_index *i, lzma_allocator *allocator,
}
static lzma_ret
index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
lzma_vli uncompressed_size, bool is_padding)
extern LZMA_API lzma_ret
lzma_index_append(lzma_index *i, lzma_allocator *allocator,
lzma_vli unpadded_size, lzma_vli uncompressed_size)
{
if (total_size > LZMA_VLI_MAX
if (unpadded_size < UNPADDED_SIZE_MIN
|| unpadded_size > UNPADDED_SIZE_MAX
|| uncompressed_size > LZMA_VLI_MAX)
return LZMA_DATA_ERROR;
return LZMA_PROG_ERROR;
// This looks a bit ugly. We want to first validate that the Index
// and Stream stay in valid limits after adding this Record. After
@ -280,29 +290,12 @@ index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
// slightly more correct to validate before allocating, YMMV).
lzma_ret ret;
if (is_padding) {
assert(uncompressed_size == 0);
// First update the info so we can validate it.
i->padding_size += total_size;
if (i->padding_size > LZMA_VLI_MAX
|| lzma_index_file_size(i) > LZMA_VLI_MAX)
ret = LZMA_DATA_ERROR; // Would grow past the limits.
else
ret = index_append_real(i, allocator,
total_size, uncompressed_size, true);
// If something went wrong, undo the updated value.
if (ret != LZMA_OK)
i->padding_size -= total_size;
} else {
// First update the overall info so we can validate it.
const lzma_vli index_list_size_add
= lzma_vli_size(total_size / 4 - 1)
const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size)
+ lzma_vli_size(uncompressed_size);
const lzma_vli total_size = vli_ceil4(unpadded_size);
i->total_size += total_size;
i->uncompressed_size += uncompressed_size;
++i->count;
@ -314,8 +307,8 @@ index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
|| lzma_index_file_size(i) > LZMA_VLI_MAX)
ret = LZMA_DATA_ERROR; // Would grow past the limits.
else
ret = index_append_real(i, allocator,
total_size, uncompressed_size, false);
ret = index_append_real(i, allocator, unpadded_size,
uncompressed_size, false);
if (ret != LZMA_OK) {
// Something went wrong. Undo the updates.
@ -324,21 +317,11 @@ index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
--i->count;
i->index_list_size -= index_list_size_add;
}
}
return ret;
}
extern LZMA_API lzma_ret
lzma_index_append(lzma_index *i, lzma_allocator *allocator,
lzma_vli total_size, lzma_vli uncompressed_size)
{
return index_append(i, allocator,
total_size, uncompressed_size, false);
}
/// Initialize i->current to point to the first Record.
static bool
init_current(lzma_index *i)
@ -370,10 +353,10 @@ previous_group(lzma_index *i)
i->current.record = i->current.group->last;
// Then update the offsets.
i->current.stream_offset -= i->current.group
->total_sums[i->current.group->last];
i->current.uncompressed_offset -= i->current.group
->uncompressed_sums[i->current.group->last];
i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[
i->current.group->last]);
i->current.uncompressed_offset -= i->current.group->uncompressed_sums[
i->current.group->last];
return;
}
@ -386,8 +369,8 @@ next_group(lzma_index *i)
assert(i->current.group->next != NULL);
// Update the offsets first.
i->current.stream_offset += i->current.group
->total_sums[i->current.group->last];
i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[
i->current.group->last]);
i->current.uncompressed_offset += i->current.group
->uncompressed_sums[i->current.group->last];
@ -403,30 +386,39 @@ next_group(lzma_index *i)
static void
set_info(const lzma_index *i, lzma_index_record *info)
{
info->total_size = i->current.group->total_sums[i->current.record];
// First copy the cumulative sizes from the current Record of the
// current group.
info->unpadded_size
= i->current.group->unpadded_sums[i->current.record];
info->total_size = vli_ceil4(info->unpadded_size);
info->uncompressed_size = i->current.group->uncompressed_sums[
i->current.record];
// Copy the start offsets of this group.
info->stream_offset = i->current.stream_offset;
info->uncompressed_offset = i->current.uncompressed_offset;
// If it's not the first Record in this group, we need to do some
// adjustements.
if (i->current.record > 0) {
// _sums[] are cumulative, thus we need to substract the
// _previous _sums[] to get the sizes of this Record.
info->total_size -= i->current.group
->total_sums[i->current.record - 1];
info->uncompressed_size -= i->current.group
// Since the _sums[] are cumulative, we substract the sums of
// the previous Record to get the sizes of the current Record,
// and add the sums of the previous Record to the offsets.
// With unpadded_sums[] we need to take into account that it
// uses a bit weird way to do the cumulative summing
const lzma_vli total_sum
= vli_ceil4(i->current.group->unpadded_sums[
i->current.record - 1]);
const lzma_vli uncompressed_sum = i->current.group
->uncompressed_sums[i->current.record - 1];
// i->current.{total,uncompressed}_offsets have the offset
// of the beginning of the group, thus we need to add the
// appropriate amount to get the offsetes of this Record.
info->stream_offset += i->current.group
->total_sums[i->current.record - 1];
info->uncompressed_offset += i->current.group
->uncompressed_sums[i->current.record - 1];
info->total_size -= total_sum;
info->unpadded_size -= total_sum;
info->uncompressed_size -= uncompressed_sum;
info->stream_offset += total_sum;
info->uncompressed_offset += uncompressed_sum;
}
return;
@ -547,12 +539,23 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
return LZMA_PROG_ERROR;
// Check that the combined size of the Indexes stays within limits.
{
const lzma_vli dest_size = index_size_unpadded(
dest->count, dest->index_list_size);
const lzma_vli src_size = index_size_unpadded(
src->count, src->index_list_size);
if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX)
return LZMA_DATA_ERROR;
}
// Check that the combined size of the "files" (combined total
// encoded sizes) stays within limits.
{
const lzma_vli dest_size = lzma_index_file_size(dest);
const lzma_vli src_size = lzma_index_file_size(src);
if (dest_size + src_size > LZMA_VLI_UNKNOWN
if (dest_size + src_size > LZMA_VLI_MAX
|| dest_size + src_size + padding
> LZMA_VLI_UNKNOWN)
> LZMA_VLI_MAX)
return LZMA_DATA_ERROR;
}
@ -561,17 +564,37 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
//
// NOTE: This cannot overflow, because Index Size is always
// far smaller than LZMA_VLI_MAX, and adding two VLIs
// (Index Size and padding) doesn't overflow. It may become
// an invalid VLI if padding is huge, but that is caught by
// index_append().
// (Index Size and padding) doesn't overflow.
padding += index_size(dest->count - dest->old.count,
dest->index_list_size
- dest->old.index_list_size)
+ LZMA_STREAM_HEADER_SIZE * 2;
// While the above cannot overflow, but it may become an invalid VLI.
if (padding > LZMA_VLI_MAX)
return LZMA_DATA_ERROR;
// Add the padding Record.
return_if_error(index_append(
dest, allocator, padding, 0, true));
{
lzma_ret ret;
// First update the info so we can validate it.
dest->old.streams_size += padding;
if (dest->old.streams_size > LZMA_VLI_MAX
|| lzma_index_file_size(dest) > LZMA_VLI_MAX)
ret = LZMA_DATA_ERROR; // Would grow past the limits.
else
ret = index_append_real(dest, allocator,
padding, 0, true);
// If something went wrong, undo the updated value and return
// the error.
if (ret != LZMA_OK) {
dest->old.streams_size -= padding;
return ret;
}
}
// Avoid wasting lots of memory if src->head has only a few records
// that fit into dest->tail. That is, combine two groups if possible.
@ -581,9 +604,10 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
if (src->head != NULL && src->head->last + 1
<= INDEX_GROUP_SIZE - dest->tail->last - 1) {
// Copy the first Record.
dest->tail->total_sums[dest->tail->last + 1]
= dest->tail->total_sums[dest->tail->last]
+ src->head->total_sums[0];
dest->tail->unpadded_sums[dest->tail->last + 1]
= vli_ceil4(dest->tail->unpadded_sums[
dest->tail->last])
+ src->head->unpadded_sums[0];
dest->tail->uncompressed_sums[dest->tail->last + 1]
= dest->tail->uncompressed_sums[dest->tail->last]
@ -596,10 +620,11 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
// Copy the rest.
for (size_t i = 1; i < src->head->last; ++i) {
dest->tail->total_sums[dest->tail->last + 1]
= dest->tail->total_sums[dest->tail->last]
+ src->head->total_sums[i + 1]
- src->head->total_sums[i];
dest->tail->unpadded_sums[dest->tail->last + 1]
= vli_ceil4(dest->tail->unpadded_sums[
dest->tail->last])
+ src->head->unpadded_sums[i + 1]
- src->head->unpadded_sums[i];
dest->tail->uncompressed_sums[dest->tail->last + 1]
= dest->tail->uncompressed_sums[
@ -636,13 +661,13 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
dest->old.count = dest->count + src->old.count;
dest->old.index_list_size
= dest->index_list_size + src->old.index_list_size;
dest->old.streams_size += src->old.streams_size;
// Update overall information.
dest->total_size += src->total_size;
dest->uncompressed_size += src->uncompressed_size;
dest->count += src->count;
dest->index_list_size += src->index_list_size;
dest->padding_size += src->padding_size;
// *src has nothing left but the base structure.
lzma_free(src, allocator);
@ -690,7 +715,7 @@ lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
// Copy the arrays so that we don't read uninitialized memory.
const size_t count = src_group->last + 1;
memcpy(dest_group->total_sums, src_group->total_sums,
memcpy(dest_group->unpadded_sums, src_group->unpadded_sums,
sizeof(lzma_vli) * count);
memcpy(dest_group->uncompressed_sums,
src_group->uncompressed_sums,
@ -729,8 +754,8 @@ lzma_index_equal(const lzma_index *a, const lzma_index *b)
while (ag != NULL && bg != NULL) {
const size_t count = ag->last + 1;
if (ag->last != bg->last
|| memcmp(ag->total_sums,
bg->total_sums,
|| memcmp(ag->unpadded_sums,
bg->unpadded_sums,
sizeof(lzma_vli) * count) != 0
|| memcmp(ag->uncompressed_sums,
bg->uncompressed_sums,

View file

@ -23,14 +23,11 @@
#include "common.h"
/// Maximum encoded value of Total Size.
#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_MAX / 4 - 1)
/// Minimum Unpadded Size
#define UNPADDED_SIZE_MIN LZMA_VLI_C(5)
/// Convert the real Total Size value to a value that is stored to the Index.
#define total_size_encode(size) ((size) / 4 - 1)
/// Convert the encoded Total Size value from Index to the real Total Size.
#define total_size_decode(size) (((size) + 1) * 4)
/// Maximum Unpadded Size
#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
/// Get the size of the Index Padding field. This is needed by Index encoder
@ -38,6 +35,16 @@
extern uint32_t lzma_index_padding_size(const lzma_index *i);
/// Round the variable-length integer to the next multiple of four.
static inline lzma_vli
vli_ceil4(lzma_vli vli)
{
assert(vli <= LZMA_VLI_MAX);
return (vli + 3) & ~LZMA_VLI_C(3);
}
/// Calculate the size of the Index field excluding Index Padding
static inline lzma_vli
index_size_unpadded(lzma_vli count, lzma_vli index_list_size)
{
@ -46,20 +53,20 @@ index_size_unpadded(lzma_vli count, lzma_vli index_list_size)
}
/// Calculate the size of the Index field including Index Padding
static inline lzma_vli
index_size(lzma_vli count, lzma_vli index_list_size)
{
// Round up to a mulitiple of four.
return (index_size_unpadded(count, index_list_size) + 3)
& ~LZMA_VLI_C(3);
return vli_ceil4(index_size_unpadded(count, index_list_size));
}
/// Calculate the total size of the Stream
static inline lzma_vli
index_stream_size(
lzma_vli total_size, lzma_vli count, lzma_vli index_list_size)
index_stream_size(lzma_vli blocks_size,
lzma_vli count, lzma_vli index_list_size)
{
return LZMA_STREAM_HEADER_SIZE + total_size
return LZMA_STREAM_HEADER_SIZE + blocks_size
+ index_size(count, index_list_size)
+ LZMA_STREAM_HEADER_SIZE;
}

View file

@ -25,7 +25,7 @@ struct lzma_coder_s {
enum {
SEQ_INDICATOR,
SEQ_COUNT,
SEQ_TOTAL,
SEQ_UNPADDED,
SEQ_UNCOMPRESSED,
SEQ_PADDING_INIT,
SEQ_PADDING,
@ -38,8 +38,8 @@ struct lzma_coder_s {
/// Number of Records left to decode.
lzma_vli count;
/// The most recent Total Size field
lzma_vli total_size;
/// The most recent Unpadded Size field
lzma_vli unpadded_size;
/// The most recent Uncompressed Size field
lzma_vli uncompressed_size;
@ -91,14 +91,14 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator,
ret = LZMA_OK;
coder->pos = 0;
coder->sequence = coder->count == 0
? SEQ_PADDING_INIT : SEQ_TOTAL;
? SEQ_PADDING_INIT : SEQ_UNPADDED;
break;
}
case SEQ_TOTAL:
case SEQ_UNPADDED:
case SEQ_UNCOMPRESSED: {
lzma_vli *size = coder->sequence == SEQ_TOTAL
? &coder->total_size
lzma_vli *size = coder->sequence == SEQ_UNPADDED
? &coder->unpadded_size
: &coder->uncompressed_size;
ret = lzma_vli_decode(size, &coder->pos,
@ -109,27 +109,26 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator,
ret = LZMA_OK;
coder->pos = 0;
if (coder->sequence == SEQ_TOTAL) {
// Validate that encoded Total Size isn't too big.
if (coder->total_size > TOTAL_SIZE_ENCODED_MAX)
if (coder->sequence == SEQ_UNPADDED) {
// Validate that encoded Unpadded Size isn't too small
// or too big.
if (coder->unpadded_size < UNPADDED_SIZE_MIN
|| coder->unpadded_size
> UNPADDED_SIZE_MAX)
return LZMA_DATA_ERROR;
// Convert the encoded Total Size to the real
// Total Size.
coder->total_size = total_size_decode(
coder->total_size);
coder->sequence = SEQ_UNCOMPRESSED;
} else {
// Add the decoded Record to the Index.
return_if_error(lzma_index_append(
coder->index, allocator,
coder->total_size,
coder->unpadded_size,
coder->uncompressed_size));
// Check if this was the last Record.
coder->sequence = --coder->count == 0
? SEQ_PADDING_INIT
: SEQ_TOTAL;
: SEQ_UNPADDED;
}
break;

View file

@ -26,7 +26,7 @@ struct lzma_coder_s {
enum {
SEQ_INDICATOR,
SEQ_COUNT,
SEQ_TOTAL,
SEQ_UNPADDED,
SEQ_UNCOMPRESSED,
SEQ_NEXT,
SEQ_PADDING,
@ -97,18 +97,20 @@ index_encode(lzma_coder *coder,
break;
}
// Total Size must be a multiple of four.
if (coder->record.total_size & 3)
// Unpadded Size must be within valid limits.
if (coder->record.unpadded_size < UNPADDED_SIZE_MIN
|| coder->record.unpadded_size
> UNPADDED_SIZE_MAX)
return LZMA_PROG_ERROR;
coder->sequence = SEQ_TOTAL;
coder->sequence = SEQ_UNPADDED;
// Fall through
case SEQ_TOTAL:
case SEQ_UNPADDED:
case SEQ_UNCOMPRESSED: {
const lzma_vli size = coder->sequence == SEQ_TOTAL
? total_size_encode(coder->record.total_size)
const lzma_vli size = coder->sequence == SEQ_UNPADDED
? coder->record.unpadded_size
: coder->record.uncompressed_size;
ret = lzma_vli_encode(size, &coder->pos,

View file

@ -23,8 +23,8 @@
typedef struct {
/// Sum of the Total Size fields
lzma_vli total_size;
/// Sum of the Block sizes (including Block Padding)
lzma_vli blocks_size;
/// Sum of the Uncompressed Size fields
lzma_vli uncompressed_size;
@ -35,7 +35,7 @@ typedef struct {
/// Size of the List of Index Records as bytes
lzma_vli index_list_size;
/// Check calculated from Total Sizes and Uncompressed Sizes.
/// Check calculated from Unpadded Sizes and Uncompressed Sizes.
lzma_check_state check;
} lzma_index_hash_info;
@ -45,7 +45,7 @@ struct lzma_index_hash_s {
enum {
SEQ_BLOCK,
SEQ_COUNT,
SEQ_TOTAL,
SEQ_UNPADDED,
SEQ_UNCOMPRESSED,
SEQ_PADDING_INIT,
SEQ_PADDING,
@ -61,8 +61,8 @@ struct lzma_index_hash_s {
/// Number of Records not fully decoded
lzma_vli remaining;
/// Total Size currently being read from an Index Record.
lzma_vli total_size;
/// Unpadded Size currently being read from an Index Record.
lzma_vli unpadded_size;
/// Uncompressed Size currently being read from an Index Record.
lzma_vli uncompressed_size;
@ -86,15 +86,15 @@ lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
}
index_hash->sequence = SEQ_BLOCK;
index_hash->blocks.total_size = 0;
index_hash->blocks.blocks_size = 0;
index_hash->blocks.uncompressed_size = 0;
index_hash->blocks.count = 0;
index_hash->blocks.index_list_size = 0;
index_hash->records.total_size = 0;
index_hash->records.blocks_size = 0;
index_hash->records.uncompressed_size = 0;
index_hash->records.count = 0;
index_hash->records.index_list_size = 0;
index_hash->total_size = 0;
index_hash->unpadded_size = 0;
index_hash->uncompressed_size = 0;
index_hash->pos = 0;
index_hash->crc32 = 0;
@ -128,16 +128,16 @@ lzma_index_hash_size(const lzma_index_hash *index_hash)
/// Updates the sizes and the hash without any validation.
static lzma_ret
hash_append(lzma_index_hash_info *info, lzma_vli total_size,
hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
lzma_vli uncompressed_size)
{
info->total_size += total_size;
info->blocks_size += vli_ceil4(unpadded_size);
info->uncompressed_size += uncompressed_size;
info->index_list_size += lzma_vli_size(total_size_encode(total_size))
info->index_list_size += lzma_vli_size(unpadded_size)
+ lzma_vli_size(uncompressed_size);
++info->count;
const lzma_vli sizes[2] = { total_size, uncompressed_size };
const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
lzma_check_update(&info->check, LZMA_CHECK_BEST,
(const uint8_t *)(sizes), sizeof(sizes));
@ -146,26 +146,27 @@ hash_append(lzma_index_hash_info *info, lzma_vli total_size,
extern LZMA_API lzma_ret
lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size,
lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
lzma_vli uncompressed_size)
{
// Validate the arguments.
if (index_hash->sequence != SEQ_BLOCK || total_size == 0
|| total_size > LZMA_VLI_MAX || (total_size & 3)
if (index_hash->sequence != SEQ_BLOCK
|| unpadded_size < UNPADDED_SIZE_MIN
|| unpadded_size > UNPADDED_SIZE_MAX
|| uncompressed_size > LZMA_VLI_MAX)
return LZMA_PROG_ERROR;
// Update the hash.
return_if_error(hash_append(&index_hash->blocks,
total_size, uncompressed_size));
unpadded_size, uncompressed_size));
// Validate the properties of *info are still in allowed limits.
if (index_hash->blocks.total_size > LZMA_VLI_MAX
if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
|| index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
|| index_size(index_hash->blocks.count,
index_hash->blocks.index_list_size)
> LZMA_BACKWARD_SIZE_MAX
|| index_stream_size(index_hash->blocks.total_size,
|| index_stream_size(index_hash->blocks.blocks_size,
index_hash->blocks.count,
index_hash->blocks.index_list_size)
> LZMA_VLI_MAX)
@ -216,14 +217,14 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
// Handle the special case when there are no Blocks.
index_hash->sequence = index_hash->remaining == 0
? SEQ_PADDING_INIT : SEQ_TOTAL;
? SEQ_PADDING_INIT : SEQ_UNPADDED;
break;
}
case SEQ_TOTAL:
case SEQ_UNPADDED:
case SEQ_UNCOMPRESSED: {
lzma_vli *size = index_hash->sequence == SEQ_TOTAL
? &index_hash->total_size
lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
? &index_hash->unpadded_size
: &index_hash->uncompressed_size;
ret = lzma_vli_decode(size, &index_hash->pos,
@ -234,18 +235,17 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
ret = LZMA_OK;
index_hash->pos = 0;
if (index_hash->sequence == SEQ_TOTAL) {
if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX)
if (index_hash->sequence == SEQ_UNPADDED) {
if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
|| index_hash->unpadded_size
> UNPADDED_SIZE_MAX)
return LZMA_DATA_ERROR;
index_hash->total_size = total_size_decode(
index_hash->total_size);
index_hash->sequence = SEQ_UNCOMPRESSED;
} else {
// Update the hash.
return_if_error(hash_append(&index_hash->records,
index_hash->total_size,
index_hash->unpadded_size,
index_hash->uncompressed_size));
// Verify that we don't go over the known sizes. Note
@ -254,8 +254,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
// that values in index_hash->blocks are already
// validated and we are fine as long as we don't
// exceed them in index_hash->records.
if (index_hash->blocks.total_size
< index_hash->records.total_size
if (index_hash->blocks.blocks_size
< index_hash->records.blocks_size
|| index_hash->blocks.uncompressed_size
< index_hash->records.uncompressed_size
|| index_hash->blocks.index_list_size
@ -264,7 +264,7 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
// Check if this was the last Record.
index_hash->sequence = --index_hash->remaining == 0
? SEQ_PADDING_INIT : SEQ_TOTAL;
? SEQ_PADDING_INIT : SEQ_UNPADDED;
}
break;
@ -288,8 +288,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
}
// Compare the sizes.
if (index_hash->blocks.total_size
!= index_hash->records.total_size
if (index_hash->blocks.blocks_size
!= index_hash->records.blocks_size
|| index_hash->blocks.uncompressed_size
!= index_hash->records.uncompressed_size
|| index_hash->blocks.index_list_size

View file

@ -190,7 +190,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
// Set up a buffer to hold the filter chain. Block Header
// decoder will initialize all members of this array so
// we don't need to do it here.
lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1];
lzma_filter filters[LZMA_FILTERS_MAX + 1];
coder->block_options.filters = filters;
// Decode the Block Header.
@ -216,7 +216,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
// Free the allocated filter options since they are needed
// only to initialize the Block decoder.
for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i)
for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
lzma_free(filters[i].options, allocator);
coder->block_options.filters = NULL;
@ -243,7 +243,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
// Block decoded successfully. Add the new size pair to
// the Index hash.
return_if_error(lzma_index_hash_append(coder->index_hash,
lzma_block_total_size_get(
lzma_block_unpadded_size(
&coder->block_options),
coder->block_options.uncompressed_size));
@ -270,7 +270,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
// Fall through
case SEQ_STREAM_FOOTER:
case SEQ_STREAM_FOOTER: {
// Copy the Stream Footer to the internal buffer.
lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
LZMA_STREAM_HEADER_SIZE);
@ -306,6 +306,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
return LZMA_STREAM_END;
coder->sequence = SEQ_STREAM_PADDING;
}
// Fall through

View file

@ -157,11 +157,11 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator,
return ret;
// Add a new Index Record.
const lzma_vli total_size = lzma_block_total_size_get(
const lzma_vli unpadded_size = lzma_block_unpadded_size(
&coder->block_options);
assert(total_size != 0);
assert(unpadded_size != 0);
return_if_error(lzma_index_append(coder->index, allocator,
total_size,
unpadded_size,
coder->block_options.uncompressed_size));
coder->sequence = SEQ_BLOCK_INIT;

View file

@ -157,14 +157,14 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
uint32_t copy_size = dict->size - copy_pos;
if (copy_size < left) {
memcpy(dict->buf + dict->pos, dict->buf + copy_pos,
memmove(dict->buf + dict->pos, dict->buf + copy_pos,
copy_size);
dict->pos += copy_size;
copy_size = left - copy_size;
memcpy(dict->buf + dict->pos, dict->buf, copy_size);
dict->pos += copy_size;
} else {
memcpy(dict->buf + dict->pos, dict->buf + copy_pos,
memmove(dict->buf + dict->pos, dict->buf + copy_pos,
left);
dict->pos += left;
}

View file

@ -211,7 +211,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
break;
}
case FLAG_END_SUBFILTER:
case FLAG_END_SUBFILTER: {
if (coder->padding != 0 || (in[*in_pos] & 0x0F)
|| coder->subfilter.code == NULL
|| !coder->got_output_with_subfilter)
@ -250,6 +250,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
++*in_pos;
break;
}
default:
return LZMA_DATA_ERROR;

View file

@ -15,19 +15,16 @@
bin_PROGRAMS = lzma
lzma_SOURCES = \
alloc.c \
alloc.h \
args.c \
args.h \
error.c \
error.h \
hardware.c \
hardware.h \
help.c \
help.h \
io.c \
io.h \
main.c \
main.h \
message.c \
message.h \
options.c \
options.h \
private.h \

View file

@ -1,106 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file alloc.c
/// \brief Memory allocation functions
//
// Copyright (C) 2007 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#include "private.h"
/// Called when memory allocation fails. Prints and error message and
/// quits the application.
static void lzma_attribute((noreturn))
xerror(void)
{
errmsg(V_ERROR, "%s", strerror(errno));
my_exit(ERROR);
}
extern void *
xmalloc(size_t size)
{
if (size < 1) {
errno = EINVAL;
xerror();
}
void *p = malloc(size);
if (p == NULL)
xerror();
return p;
}
/*
extern void *
xrealloc(void *ptr, size_t size)
{
if (size < 1) {
errno = EINVAL;
xerror();
}
ptr = realloc(ptr, size);
if (ptr == NULL)
xerror();
return ptr;
}
*/
extern char *
xstrdup(const char *src)
{
if (src == NULL) {
errno = EINVAL;
xerror();
}
const size_t size = strlen(src) + 1;
char *dest = malloc(size);
if (dest == NULL)
xerror();
memcpy(dest, src, size);
return dest;
}
extern void
xstrcpy(char **dest, const char *src)
{
size_t len = strlen(src) + 1;
*dest = realloc(*dest, len);
if (*dest == NULL)
xerror();
memcpy(*dest, src, len + 1);
return;
}
extern void *
allocator(void *opaque lzma_attribute((unused)),
size_t nmemb lzma_attribute((unused)), size_t size)
{
return xmalloc(size);
}

View file

@ -1,42 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file alloc.h
/// \brief Memory allocation functions
//
// Copyright (C) 2007 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef ALLOC_H
#define ALLOC_H
#include "private.h"
/// Safe malloc() that never returns NULL.
extern void *xmalloc(size_t size);
/// Safe realloc() that never returns NULL.
extern void *xrealloc(void *ptr, size_t size);
/// Safe strdup() that never returns NULL.
extern char *xstrdup(const char *src);
/// xrealloc()s *dest to the size needed by src, and copies src to *dest.
extern void xstrcpy(char **dest, const char *src);
/// Function for lzma_allocator.alloc. This uses xmalloc().
extern void *allocator(void *opaque lzma_attribute((unused)),
size_t nmemb lzma_attribute((unused)), size_t size);
#endif

View file

@ -25,38 +25,18 @@
#include <ctype.h>
enum tool_mode opt_mode = MODE_COMPRESS;
enum format_type opt_format = FORMAT_AUTO;
char *opt_suffix = NULL;
char *opt_files_name = NULL;
char opt_files_split = '\0';
FILE *opt_files_file = NULL;
bool opt_stdout = false;
bool opt_force = false;
bool opt_keep_original = false;
bool opt_preserve_name = false;
lzma_check opt_check = LZMA_CHECK_CRC64;
lzma_filter opt_filters[LZMA_BLOCK_FILTERS_MAX + 1];
// We don't modify or free() this, but we need to assign it in some
// non-const pointers.
const char *stdin_filename = "(stdin)";
static size_t preset_number = 7;
static bool preset_default = true;
static size_t filter_count = 0;
/// When compressing, which file format to use if --format=auto or no --format
/// at all has been specified. We need a variable because this depends on
/// with which name we are called. All names with "lz" in them makes us to
/// use the legacy .lzma format.
static enum format_type format_compress_auto = FORMAT_XZ;
static void
parse_real(args_info *args, int argc, char **argv)
{
enum {
OPT_SUBBLOCK = INT_MIN,
OPT_X86,
@ -73,37 +53,40 @@ enum {
OPT_FILES0,
};
static const char short_opts[] = "cC:dfF:hlLkM:qrS:tT:vVz123456789";
static const char short_opts[] = "cC:dfF:hHlLkM:p:qrS:tT:vVz123456789";
static const struct option long_opts[] = {
// gzip-like options
{ "fast", no_argument, NULL, '1' },
{ "best", no_argument, NULL, '9' },
{ "memory", required_argument, NULL, 'M' },
{ "name", no_argument, NULL, 'N' },
{ "suffix", required_argument, NULL, 'S' },
{ "threads", required_argument, NULL, 'T' },
{ "version", no_argument, NULL, 'V' },
{ "stdout", no_argument, NULL, 'c' },
{ "to-stdout", no_argument, NULL, 'c' },
// Operation mode
{ "compress", no_argument, NULL, 'z' },
{ "decompress", no_argument, NULL, 'd' },
{ "uncompress", no_argument, NULL, 'd' },
{ "force", no_argument, NULL, 'f' },
{ "help", no_argument, NULL, 'h' },
{ "test", no_argument, NULL, 't' },
{ "list", no_argument, NULL, 'l' },
{ "info", no_argument, NULL, 'l' },
// Operation modifiers
{ "keep", no_argument, NULL, 'k' },
{ "no-name", no_argument, NULL, 'n' },
{ "quiet", no_argument, NULL, 'q' },
{ "force", no_argument, NULL, 'f' },
{ "stdout", no_argument, NULL, 'c' },
{ "to-stdout", no_argument, NULL, 'c' },
{ "suffix", required_argument, NULL, 'S' },
// { "recursive", no_argument, NULL, 'r' }, // TODO
{ "test", no_argument, NULL, 't' },
{ "verbose", no_argument, NULL, 'v' },
{ "compress", no_argument, NULL, 'z' },
{ "files", optional_argument, NULL, OPT_FILES },
{ "files0", optional_argument, NULL, OPT_FILES0 },
// Basic compression settings
{ "format", required_argument, NULL, 'F' },
{ "check", required_argument, NULL, 'C' },
{ "preset", required_argument, NULL, 'p' },
{ "memory", required_argument, NULL, 'M' },
{ "threads", required_argument, NULL, 'T' },
{ "fast", no_argument, NULL, '1' },
{ "best", no_argument, NULL, '9' },
// Filters
{ "subblock", optional_argument, NULL, OPT_SUBBLOCK },
{ "lzma1", optional_argument, NULL, OPT_LZMA1 },
{ "lzma2", optional_argument, NULL, OPT_LZMA2 },
{ "x86", no_argument, NULL, OPT_X86 },
{ "bcj", no_argument, NULL, OPT_X86 },
{ "powerpc", no_argument, NULL, OPT_POWERPC },
@ -114,61 +97,18 @@ static const struct option long_opts[] = {
{ "armthumb", no_argument, NULL, OPT_ARMTHUMB },
{ "sparc", no_argument, NULL, OPT_SPARC },
{ "delta", optional_argument, NULL, OPT_DELTA },
{ "lzma1", optional_argument, NULL, OPT_LZMA1 },
{ "lzma2", optional_argument, NULL, OPT_LZMA2 },
{ "subblock", optional_argument, NULL, OPT_SUBBLOCK },
// Other
{ "format", required_argument, NULL, 'F' },
{ "check", required_argument, NULL, 'C' },
{ "files", optional_argument, NULL, OPT_FILES },
{ "files0", optional_argument, NULL, OPT_FILES0 },
// Other options
{ "quiet", no_argument, NULL, 'q' },
{ "verbose", no_argument, NULL, 'v' },
{ "help", no_argument, NULL, 'h' },
{ "long-help", no_argument, NULL, 'H' },
{ "version", no_argument, NULL, 'V' },
{ NULL, 0, NULL, 0 }
};
static void
add_filter(lzma_vli id, const char *opt_str)
{
if (filter_count == LZMA_BLOCK_FILTERS_MAX) {
errmsg(V_ERROR, _("Maximum number of filters is seven"));
my_exit(ERROR);
}
opt_filters[filter_count].id = id;
switch (id) {
case LZMA_FILTER_SUBBLOCK:
opt_filters[filter_count].options
= parse_options_subblock(opt_str);
break;
case LZMA_FILTER_DELTA:
opt_filters[filter_count].options
= parse_options_delta(opt_str);
break;
case LZMA_FILTER_LZMA1:
case LZMA_FILTER_LZMA2:
opt_filters[filter_count].options
= parse_options_lzma(opt_str);
break;
default:
assert(opt_str == NULL);
opt_filters[filter_count].options = NULL;
break;
}
++filter_count;
preset_default = false;
return;
}
static void
parse_real(int argc, char **argv)
{
int c;
while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
@ -178,32 +118,28 @@ parse_real(int argc, char **argv)
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
preset_number = c - '0';
preset_default = false;
coder_set_preset(c - '0');
break;
case 'p': {
const uint64_t preset = str_to_uint64(
"preset", optarg, 1, 9);
coder_set_preset(preset);
break;
}
// --memory
case 'M':
opt_memory = str_to_uint64("memory", optarg,
1, SIZE_MAX);
break;
case 'N':
opt_preserve_name = true;
// On 32-bit systems, SIZE_MAX would make more sense
// than UINT64_MAX. But use UINT64_MAX still so that
// scripts that assume > 4 GiB values don't break.
hardware_memlimit_set(str_to_uint64(
"memory", optarg, 0, UINT64_MAX));
break;
// --suffix
case 'S':
// Empty suffix and suffixes having a slash are
// rejected. Such suffixes would break things later.
if (optarg[0] == '\0' || strchr(optarg, '/') != NULL) {
errmsg(V_ERROR, _("%s: Invalid filename "
"suffix"), optarg);
my_exit(ERROR);
}
free(opt_suffix);
opt_suffix = xstrdup(optarg);
suffix_set(optarg);
break;
case 'T':
@ -214,7 +150,7 @@ parse_real(int argc, char **argv)
// --version
case 'V':
// This doesn't return.
show_version();
message_version();
// --stdout
case 'c':
@ -234,7 +170,12 @@ parse_real(int argc, char **argv)
// --help
case 'h':
// This doesn't return.
show_help();
message_help(false);
// --long-help
case 'H':
// This doesn't return.
message_help(true);
// --list
case 'l':
@ -246,15 +187,9 @@ parse_real(int argc, char **argv)
opt_keep_original = true;
break;
case 'n':
opt_preserve_name = false;
break;
// --quiet
case 'q':
if (verbosity > V_SILENT)
--verbosity;
message_verbosity_decrease();
break;
case 't':
@ -263,9 +198,7 @@ parse_real(int argc, char **argv)
// --verbose
case 'v':
if (verbosity < V_DEBUG)
++verbosity;
message_verbosity_increase();
break;
case 'z':
@ -275,43 +208,47 @@ parse_real(int argc, char **argv)
// Filter setup
case OPT_SUBBLOCK:
add_filter(LZMA_FILTER_SUBBLOCK, optarg);
coder_add_filter(LZMA_FILTER_SUBBLOCK,
options_subblock(optarg));
break;
case OPT_X86:
add_filter(LZMA_FILTER_X86, NULL);
coder_add_filter(LZMA_FILTER_X86, NULL);
break;
case OPT_POWERPC:
add_filter(LZMA_FILTER_POWERPC, NULL);
coder_add_filter(LZMA_FILTER_POWERPC, NULL);
break;
case OPT_IA64:
add_filter(LZMA_FILTER_IA64, NULL);
coder_add_filter(LZMA_FILTER_IA64, NULL);
break;
case OPT_ARM:
add_filter(LZMA_FILTER_ARM, NULL);
coder_add_filter(LZMA_FILTER_ARM, NULL);
break;
case OPT_ARMTHUMB:
add_filter(LZMA_FILTER_ARMTHUMB, NULL);
coder_add_filter(LZMA_FILTER_ARMTHUMB, NULL);
break;
case OPT_SPARC:
add_filter(LZMA_FILTER_SPARC, NULL);
coder_add_filter(LZMA_FILTER_SPARC, NULL);
break;
case OPT_DELTA:
add_filter(LZMA_FILTER_DELTA, optarg);
coder_add_filter(LZMA_FILTER_DELTA,
options_delta(optarg));
break;
case OPT_LZMA1:
add_filter(LZMA_FILTER_LZMA1, optarg);
coder_add_filter(LZMA_FILTER_LZMA1,
options_lzma(optarg));
break;
case OPT_LZMA2:
add_filter(LZMA_FILTER_LZMA2, optarg);
coder_add_filter(LZMA_FILTER_LZMA2,
options_lzma(optarg));
break;
// Other
@ -335,14 +272,11 @@ parse_real(int argc, char **argv)
};
size_t i = 0;
while (strcmp(types[i].str, optarg) != 0) {
if (++i == ARRAY_SIZE(types)) {
errmsg(V_ERROR, _("%s: Unknown file "
while (strcmp(types[i].str, optarg) != 0)
if (++i == ARRAY_SIZE(types))
message_fatal(_("%s: Unknown file "
"format type"),
optarg);
my_exit(ERROR);
}
}
opt_format = types[i].format;
break;
@ -362,50 +296,43 @@ parse_real(int argc, char **argv)
size_t i = 0;
while (strcmp(types[i].str, optarg) != 0) {
if (++i == ARRAY_SIZE(types)) {
errmsg(V_ERROR, _("%s: Unknown "
"integrity check "
"type"), optarg);
my_exit(ERROR);
}
if (++i == ARRAY_SIZE(types))
message_fatal(_("%s: Unknown integrity"
"check type"), optarg);
}
opt_check = types[i].check;
coder_set_check(types[i].check);
break;
}
case OPT_FILES:
opt_files_split = '\n';
args->files_delim = '\n';
// Fall through
case OPT_FILES0:
if (opt_files_name != NULL) {
errmsg(V_ERROR, _("Only one file can be "
if (args->files_name != NULL)
message_fatal(_("Only one file can be "
"specified with `--files'"
"or `--files0'."));
my_exit(ERROR);
}
if (optarg == NULL) {
opt_files_name = (char *)stdin_filename;
opt_files_file = stdin;
args->files_name = (char *)stdin_filename;
args->files_file = stdin;
} else {
opt_files_name = optarg;
opt_files_file = fopen(optarg,
args->files_name = optarg;
args->files_file = fopen(optarg,
c == OPT_FILES ? "r" : "rb");
if (opt_files_file == NULL) {
errmsg(V_ERROR, "%s: %s", optarg,
if (args->files_file == NULL)
message_fatal("%s: %s", optarg,
strerror(errno));
my_exit(ERROR);
}
}
break;
default:
show_try_help();
my_exit(ERROR);
message_try_help();
my_exit(E_ERROR);
}
}
@ -414,163 +341,124 @@ parse_real(int argc, char **argv)
static void
parse_environment(void)
parse_environment(args_info *args, char *argv0)
{
char *env = getenv("LZMA_OPT");
char *env = getenv("XZ_OPT");
if (env == NULL)
return;
// We modify the string, so make a copy of it.
env = xstrdup(env);
// Calculate the number of arguments in env.
unsigned int argc = 1;
// Calculate the number of arguments in env. argc stats at one
// to include space for the program name.
int argc = 1;
bool prev_was_space = true;
for (size_t i = 0; env[i] != '\0'; ++i) {
if (isspace(env[i])) {
prev_was_space = true;
} else if (prev_was_space) {
prev_was_space = false;
if (++argc > (unsigned int)(INT_MAX)) {
errmsg(V_ERROR, _("The environment variable "
"LZMA_OPT contains too many "
// Keep argc small enough to fit into a singed int
// and to keep it usable for memory allocation.
if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
message_fatal(_("The environment variable "
"XZ_OPT contains too many "
"arguments"));
my_exit(ERROR);
}
}
}
char **argv = xmalloc((argc + 1) * sizeof(char*));
// Allocate memory to hold pointers to the arguments. Add one to get
// space for the terminating NULL (if some systems happen to need it).
char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
argv[0] = argv0;
argv[argc] = NULL;
// Go through the string again. Split the arguments using '\0'
// characters and add pointers to the resulting strings to argv.
argc = 1;
prev_was_space = true;
for (size_t i = 0; env[i] != '\0'; ++i) {
if (isspace(env[i])) {
prev_was_space = true;
env[i] = '\0';
} else if (prev_was_space) {
prev_was_space = false;
argv[argc++] = env + i;
}
}
parse_real((int)(argc), argv);
// Parse the argument list we got from the environment. All non-option
// arguments i.e. filenames are ignored.
parse_real(args, argc, argv);
// Reset the state of the getopt_long() so that we can parse the
// command line options too. There are two incompatible ways to
// do it.
#ifdef HAVE_OPTRESET
// BSD
optind = 1;
optreset = 1;
#else
// GNU, Solaris
optind = 0;
#endif
// We don't need the argument list from environment anymore.
free(argv);
free(env);
return;
}
static void
set_compression_settings(void)
extern void
args_parse(args_info *args, int argc, char **argv)
{
static lzma_options_lzma opt_lzma;
// Initialize those parts of *args that we need later.
args->files_name = NULL;
args->files_file = NULL;
args->files_delim = '\0';
if (filter_count == 0) {
if (lzma_lzma_preset(&opt_lzma, preset_number)) {
errmsg(V_ERROR, _("Internal error (bug)"));
my_exit(ERROR);
}
// Type of the file format to use when --format=auto or no --format
// was specified.
enum format_type format_compress_auto = FORMAT_XZ;
opt_filters[0].id = opt_format == FORMAT_LZMA
? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
opt_filters[0].options = &opt_lzma;
filter_count = 1;
}
// Terminate the filter options array.
opt_filters[filter_count].id = LZMA_VLI_UNKNOWN;
// If we are using the LZMA_Alone format, allow exactly one filter
// which has to be LZMA.
if (opt_format == FORMAT_LZMA && (filter_count != 1
|| opt_filters[0].id != LZMA_FILTER_LZMA1)) {
errmsg(V_ERROR, _("With --format=lzma only the LZMA1 filter "
"is supported"));
my_exit(ERROR);
}
// TODO: liblzma probably needs an API to validate the filter chain.
// If using --format=raw, we can be decoding.
uint64_t memory_usage = opt_mode == MODE_COMPRESS
? lzma_memusage_encoder(opt_filters)
: lzma_memusage_decoder(opt_filters);
// Don't go over the memory limits when the default
// setting is used.
if (preset_default) {
while (memory_usage > opt_memory) {
if (preset_number == 1) {
errmsg(V_ERROR, _("Memory usage limit is too "
"small for any internal "
"filter preset"));
my_exit(ERROR);
}
if (lzma_lzma_preset(&opt_lzma, --preset_number)) {
errmsg(V_ERROR, _("Internal error (bug)"));
my_exit(ERROR);
}
memory_usage = lzma_memusage_encoder(opt_filters);
}
// TODO: With --format=raw, we should print a warning since
// the presets may change and thus the next version may not
// be able to uncompress the raw stream with the same preset
// number.
} else {
if (memory_usage > opt_memory) {
errmsg(V_ERROR, _("Memory usage limit is too small "
"for the given filter setup"));
my_exit(ERROR);
}
}
// Limit the number of worked threads so that memory usage
// limit isn't exceeded.
assert(memory_usage > 0);
size_t thread_limit = opt_memory / memory_usage;
if (thread_limit == 0)
thread_limit = 1;
if (opt_threads > thread_limit)
opt_threads = thread_limit;
return;
}
extern char **
parse_args(int argc, char **argv)
{
// Check how we were called.
{
const char *name = str_filename(argv[0]);
if (name != NULL) {
// Default file format
// Remove the leading path name, if any.
const char *name = strrchr(argv[0], '/');
if (name == NULL)
name = argv[0];
else
++name;
// NOTE: It's possible that name[0] is now '\0' if argv[0]
// is weird, but it doesn't matter here.
// The default file format is .lzma if the command name
// contains "lz".
if (strstr(name, "lz") != NULL)
format_compress_auto = FORMAT_LZMA;
// Operation mode
if (strstr(name, "cat") != NULL) {
// Imply --decompress --stdout
opt_mode = MODE_DECOMPRESS;
opt_stdout = true;
} else if (strstr(name, "un") != NULL) {
// Imply --decompress
opt_mode = MODE_DECOMPRESS;
}
}
}
// First the flags from environment
parse_environment();
parse_environment(args, argv[0]);
// Then from the command line
optind = 1;
parse_real(argc, argv);
parse_real(args, argc, argv);
// Never remove the source file when the destination is not on disk.
// In test mode the data is written nowhere, but setting opt_stdout
@ -580,18 +468,33 @@ parse_args(int argc, char **argv)
opt_stdout = true;
}
// If no --format flag was used, or it was --format=auto, we need to
// decide what is the target file format we are going to use. This
// depends on how we were called (checked earlier in this function).
if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
opt_format = format_compress_auto;
// Compression settings need to be validated (options themselves and
// their memory usage) when compressing to any file format. It has to
// be done also when uncompressing raw data, since for raw decoding
// the options given on the command line are used to know what kind
// of raw data we are supposed to decode.
if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
set_compression_settings();
coder_set_compression_settings();
// If no filenames are given, use stdin.
if (argv[optind] == NULL && opt_files_name == NULL) {
// We don't modify or free() the "-" constant.
static char *argv_stdin[2] = { (char *)"-", NULL };
return argv_stdin;
if (argv[optind] == NULL && args->files_name == NULL) {
// We don't modify or free() the "-" constant. The caller
// modifies this so don't make the struct itself const.
static char *names_stdin[2] = { (char *)"-", NULL };
args->arg_names = names_stdin;
args->arg_count = 1;
} else {
// We got at least one filename from the command line, or
// --files or --files0 was specified.
args->arg_names = argv + optind;
args->arg_count = argc - optind;
}
return argv + optind;
return;
}

View file

@ -23,42 +23,34 @@
#include "private.h"
enum tool_mode {
MODE_COMPRESS,
MODE_DECOMPRESS,
MODE_TEST,
MODE_LIST,
};
typedef struct {
/// Filenames from command line
char **arg_names;
// NOTE: The order of these is significant in suffix.c.
enum format_type {
FORMAT_AUTO,
FORMAT_XZ,
FORMAT_LZMA,
// HEADER_GZIP,
FORMAT_RAW,
};
/// Number of filenames from command line
size_t arg_count;
/// Name of the file from which to read filenames. This is NULL
/// if --files or --files0 was not used.
char *files_name;
extern char *opt_suffix;
/// File opened for reading from which filenames are read. This is
/// non-NULL only if files_name is non-NULL.
FILE *files_file;
/// Delimiter for filenames read from files_file
char files_delim;
} args_info;
extern char *opt_files_name;
extern char opt_files_split;
extern FILE *opt_files_file;
extern bool opt_stdout;
extern bool opt_force;
extern bool opt_keep_original;
extern bool opt_preserve_name;
// extern bool opt_recursive;
extern enum tool_mode opt_mode;
extern enum format_type opt_format;
extern lzma_check opt_check;
extern lzma_filter opt_filters[LZMA_BLOCK_FILTERS_MAX + 1];
extern const char *stdin_filename;
extern char **parse_args(int argc, char **argv);
extern void args_parse(args_info *args, int argc, char **argv);
#endif

View file

@ -1,162 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file error.c
/// \brief Error message printing
//
// Copyright (C) 2007 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#include "private.h"
#include <stdarg.h>
exit_status_type exit_status = SUCCESS;
verbosity_type verbosity = V_WARNING;
char *argv0 = NULL;
volatile sig_atomic_t user_abort = 0;
extern const char *
str_strm_error(lzma_ret code)
{
switch (code) {
case LZMA_OK:
return _("Operation successful");
case LZMA_STREAM_END:
return _("Operation finished successfully");
case LZMA_PROG_ERROR:
return _("Internal error (bug)");
case LZMA_DATA_ERROR:
return _("Compressed data is corrupt");
case LZMA_MEM_ERROR:
return strerror(ENOMEM);
case LZMA_BUF_ERROR:
return _("Unexpected end of input");
case LZMA_OPTIONS_ERROR:
return _("Unsupported options");
case LZMA_UNSUPPORTED_CHECK:
return _("Unsupported integrity check type");
case LZMA_MEMLIMIT_ERROR:
return _("Memory usage limit reached");
case LZMA_FORMAT_ERROR:
return _("File format not recognized");
default:
return NULL;
}
}
extern void
set_exit_status(exit_status_type new_status)
{
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&mutex);
if (new_status != WARNING || exit_status == SUCCESS)
exit_status = new_status;
pthread_mutex_unlock(&mutex);
return;
}
extern void lzma_attribute((noreturn))
my_exit(int status)
{
// Close stdout. If something goes wrong, print an error message
// to stderr.
{
const int ferror_err = ferror(stdout);
const int fclose_err = fclose(stdout);
if (fclose_err) {
errmsg(V_ERROR, _("Writing to standard output "
"failed: %s"), strerror(errno));
status = ERROR;
} else if (ferror_err) {
// Some error has occurred but we have no clue about
// the reason since fclose() succeeded.
errmsg(V_ERROR, _("Writing to standard output "
"failed: %s"), "Unknown error");
status = ERROR;
}
}
// Close stderr. If something goes wrong, there's nothing where we
// could print an error message. Just set the exit status.
{
const int ferror_err = ferror(stderr);
const int fclose_err = fclose(stderr);
if (fclose_err || ferror_err)
status = ERROR;
}
exit(status);
}
extern void lzma_attribute((format(printf, 2, 3)))
errmsg(verbosity_type v, const char *fmt, ...)
{
va_list ap;
if (v <= verbosity) {
va_start(ap, fmt);
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&mutex);
fprintf(stderr, "%s: ", argv0);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
pthread_mutex_unlock(&mutex);
va_end(ap);
}
if (v == V_ERROR)
set_exit_status(ERROR);
else if (v == V_WARNING)
set_exit_status(WARNING);
return;
}
extern void
out_of_memory(void)
{
errmsg(V_ERROR, "%s", strerror(ENOMEM));
user_abort = 1;
return;
}
extern void
internal_error(void)
{
errmsg(V_ERROR, _("Internal error (bug)"));
user_abort = 1;
return;
}

View file

@ -1,67 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file error.c
/// \brief Error message printing
//
// Copyright (C) 2007 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef ERROR_H
#define ERROR_H
#include "private.h"
typedef enum {
SUCCESS = 0,
ERROR = 1,
WARNING = 2,
} exit_status_type;
typedef enum {
V_SILENT,
V_ERROR,
V_WARNING,
V_VERBOSE,
V_DEBUG,
} verbosity_type;
extern exit_status_type exit_status;
extern verbosity_type verbosity;
/// Like GNU's program_invocation_name but portable
extern char *argv0;
/// Once this is non-zero, all threads must shutdown and clean up incomplete
/// output files from the disk.
extern volatile sig_atomic_t user_abort;
extern const char * str_strm_error(lzma_ret code);
extern void errmsg(verbosity_type v, const char *fmt, ...)
lzma_attribute((format(printf, 2, 3)));
extern void set_exit_status(exit_status_type new_status);
extern void my_exit(int status) lzma_attribute((noreturn));
extern void out_of_memory(void);
extern void internal_error(void);
#endif

View file

@ -26,33 +26,15 @@
size_t opt_threads = 1;
/// Number of bytes of memory to use at maximum (only a rough limit).
/// This can be set with the --memory=NUM command line option.
/// If no better value can be determined, the default is 14 MiB, which
/// should be quite safe even for older systems while still allowing
/// reasonable compression ratio.
size_t opt_memory = 14 * 1024 * 1024;
/// Memory usage limit for encoding
static uint64_t memlimit_encoder;
/// Memory usage limit for decoding
static uint64_t memlimit_decoder;
/// Get the amount of physical memory, and set opt_memory to 1/3 of it.
/// User can then override this with --memory command line option.
static void
hardware_memory(void)
{
uint64_t mem = physmem();
if (mem != 0) {
mem /= 3;
#if UINT64_MAX > SIZE_MAX
if (mem > SIZE_MAX)
mem = SIZE_MAX;
#endif
opt_memory = mem;
}
return;
}
/// Memory usage limit given on the command line or environment variable.
/// Zero indicates the default (memlimit_encoder or memlimit_decoder).
static uint64_t memlimit_custom = 0;
/// Get the number of CPU cores, and set opt_threads to default to that value.
@ -90,10 +72,51 @@ hardware_cores(void)
}
static void
hardware_memlimit_init(void)
{
uint64_t mem = physmem();
// If we cannot determine the amount of RAM, assume 32 MiB. Maybe
// even that is too much on some systems. But on most systems it's
// far too little, and can be annoying.
if (mem == 0)
mem = UINT64_C(16) * 1024 * 1024;
// Use at maximum of 90 % of RAM when encoding and 33 % when decoding.
memlimit_encoder = mem - mem / 10;
memlimit_decoder = mem / 3;
return;
}
extern void
hardware_memlimit_set(uint64_t memlimit)
{
memlimit_custom = memlimit;
return;
}
extern uint64_t
hardware_memlimit_encoder(void)
{
return memlimit_custom != 0 ? memlimit_custom : memlimit_encoder;
}
extern uint64_t
hardware_memlimit_decoder(void)
{
return memlimit_custom != 0 ? memlimit_custom : memlimit_decoder;
}
extern void
hardware_init(void)
{
hardware_memory();
hardware_memlimit_init();
hardware_cores();
return;
}

View file

@ -24,8 +24,22 @@
extern size_t opt_threads;
extern size_t opt_memory;
/// Initialize some hardware-specific variables, which are needed by other
/// hardware_* functions.
extern void hardware_init(void);
/// Set custom memory usage limit. This is used for both encoding and
/// decoding. Zero indicates resetting the limit back to defaults.
extern void hardware_memlimit_set(uint64_t memlimit);
/// Get the memory usage limit for encoding. By default this is 90 % of RAM.
extern uint64_t hardware_memlimit_encoder(void);
/// Get the memory usage limit for decoding. By default this is 30 % of RAM.
extern uint64_t hardware_memlimit_decoder(void);
#endif

View file

@ -1,170 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file help.c
/// \brief Help messages
//
// Copyright (C) 2007 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#include "private.h"
extern void
show_try_help(void)
{
// Print this with V_WARNING instead of V_ERROR to prevent it from
// showing up when --quiet has been specified.
errmsg(V_WARNING, _("Try `%s --help' for more information."), argv0);
return;
}
extern void lzma_attribute((noreturn))
show_help(void)
{
printf(_("Usage: %s [OPTION]... [FILE]...\n"
"Compress or decompress FILEs in the .lzma format.\n"
"\n"), argv0);
puts(_("Mandatory arguments to long options are mandatory for "
"short options too.\n"));
puts(_(
" Operation mode:\n"
"\n"
" -z, --compress force compression\n"
" -d, --decompress force decompression\n"
" -t, --test test compressed file integrity\n"
" -l, --list list information about files\n"
));
puts(_(
" Operation modifiers:\n"
"\n"
" -k, --keep keep (don't delete) input files\n"
" -f, --force force overwrite of output file and (de)compress links\n"
" -c, --stdout write to standard output and don't delete input files\n"
" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n"
" -F, --format=FMT file format to encode or decode; possible values are\n"
" `auto' (default), `xz', `lzma', and `raw'\n"
" --files=[FILE] read filenames to process from FILE; if FILE is\n"
" omitted, filenames are read from the standard input;\n"
" filenames must be terminated with the newline character\n"
" --files0=[FILE] like --files but use the nul byte as terminator\n"
));
puts(_(
" Compression presets and basic compression options:\n"
"\n"
" -1 .. -2 fast compression\n"
" -3 .. -6 good compression\n"
" -7 .. -9 excellent compression, but needs a lot of memory;\n"
" default is -7 if memory limit allows\n"
"\n"
" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n"
" or `sha256'\n"
));
puts(_(
" Custom filter chain for compression (alternative for using presets):\n"
"\n"
" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n"
" --lzma2=[OPTS] more of the following options (valid values; default):\n"
" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n"
" lc=NUM number of literal context bits (0-4; 3)\n"
" lp=NUM number of literal position bits (0-4; 0)\n"
" pb=NUM number of position bits (0-4; 2)\n"
" mode=MODE compression mode (fast, normal; normal)\n"
" nice=NUM nice length of a match (2-273; 64)\n"
" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n"
" depth=NUM maximum search depth; 0=automatic (default)\n"
"\n"
" --x86 x86 filter (sometimes called BCJ filter)\n"
" --powerpc PowerPC (big endian) filter\n"
" --ia64 IA64 (Itanium) filter\n"
" --arm ARM filter\n"
" --armthumb ARM-Thumb filter\n"
" --sparc SPARC filter\n"
"\n"
" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n"
" dist=NUM distance between bytes being subtracted\n"
" from each other (1-256; 1)\n"
"\n"
" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n"
" size=NUM number of bytes of data per subblock\n"
" (1 - 256Mi; 4Ki)\n"
" rle=NUM run-length encoder chunk size (0-256; 0)\n"
));
puts(_(
" Resource usage options:\n"
"\n"
" -M, --memory=NUM use roughly NUM bytes of memory at maximum\n"
" -T, --threads=NUM use a maximum of NUM (de)compression threads\n"
// " --threading=STR threading style; possible values are `auto' (default),\n"
// " `files', and `stream'
));
puts(_(
" Other options:\n"
"\n"
" -q, --quiet suppress warnings; specify twice to suppress errors too\n"
" -v, --verbose be verbose; specify twice for even more verbose\n"
"\n"
" -h, --help display this help and exit\n"
" -V, --version display version and license information and exit\n"));
puts(_("With no FILE, or when FILE is -, read standard input.\n"));
size_t mem_limit = opt_memory / (1024 * 1024);
if (mem_limit == 0)
mem_limit = 1;
// We use PRIu64 instead of %zu to support pre-C99 libc.
puts(_("On this system and configuration, the tool will use"));
printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"),
(uint64_t)(mem_limit));
printf(N_(" * at maximum of one thread for (de)compression.\n\n",
" * at maximum of %" PRIu64
" threads for (de)compression.\n\n",
(uint64_t)(opt_threads)), (uint64_t)(opt_threads));
printf(_("Report bugs to <%s> (in English or Finnish).\n"),
PACKAGE_BUGREPORT);
my_exit(SUCCESS);
}
extern void lzma_attribute((noreturn))
show_version(void)
{
printf(
"lzma (LZMA Utils) " PACKAGE_VERSION "\n"
"\n"
"Copyright (C) 1999-2008 Igor Pavlov\n"
"Copyright (C) 2007-2008 Lasse Collin\n"
"\n"
"This program is free software; you can redistribute it and/or modify\n"
"it under the terms of the GNU General Public License as published by\n"
"the Free Software Foundation; either version 2 of the License, or\n"
"(at your option) any later version.\n"
"\n"
"This program is distributed in the hope that it will be useful,\n"
"but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
"GNU General Public License for more details.\n"
"\n");
my_exit(SUCCESS);
}

View file

@ -1,32 +0,0 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file help.h
/// \brief Help messages
//
// Copyright (C) 2007 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef HELP_H
#define HELP_H
#include "private.h"
extern void show_try_help(void);
extern void show_help(void) lzma_attribute((noreturn));
extern void show_version(void) lzma_attribute((noreturn));
#endif

View file

@ -19,131 +19,39 @@
#include "private.h"
#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT)
#include <fcntl.h>
#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
# include <sys/time.h>
#elif defined(HAVE_UTIME)
# include <utime.h>
#endif
#ifndef O_SEARCH
# define O_SEARCH O_RDONLY
#endif
/// \brief Number of open file_pairs
///
/// Once the main() function has requested processing of all files,
/// we wait that open_pairs drops back to zero. Then it is safe to
/// exit from the program.
static size_t open_pairs = 0;
/// \brief mutex for file system operations
///
/// All file system operations are done via the functions in this file.
/// They use fchdir() to avoid some race conditions (more portable than
/// openat() & co.).
///
/// Synchronizing all file system operations shouldn't affect speed notably,
/// since the actual reading from and writing to files is done in parallel.
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
/// This condition is invoked when a file is closed and the value of
/// the open_files variable has dropped to zero. The only listener for
/// this condition is io_finish() which is called from main().
static pthread_cond_t io_cond = PTHREAD_COND_INITIALIZER;
/// True when stdout is being used by some thread
static bool stdout_in_use = false;
/// This condition is signalled when a thread releases stdout (no longer
/// writes data to it).
static pthread_cond_t stdout_cond = PTHREAD_COND_INITIALIZER;
/// \brief Directory where we were started
///
/// This is needed when a new file, whose name was given on command line,
/// is opened.
static int start_dir;
static uid_t uid;
static gid_t gid;
extern void
io_init(void)
{
start_dir = open(".", O_SEARCH | O_NOCTTY);
if (start_dir == -1) {
errmsg(V_ERROR, _("Cannot get file descriptor of the current "
"directory: %s"), strerror(errno));
my_exit(ERROR);
}
uid = getuid();
gid = getgid();
return;
}
/// Waits until the number of open file_pairs has dropped to zero.
extern void
io_finish(void)
{
pthread_mutex_lock(&mutex);
while (open_pairs != 0)
pthread_cond_wait(&io_cond, &mutex);
(void)close(start_dir);
pthread_mutex_unlock(&mutex);
return;
}
/// \brief Unlinks a file
///
/// \param dir_fd File descriptor of the directory containing the file
/// \param name Name of the file with or without path
///
/// \return Zero on success. On error, -1 is returned and errno set.
///
/// This tries to verify that the file being unlinked really is the file that
/// we want to unlink by verifying device and inode numbers. There's still
/// a small unavoidable race, but this is much better than nothing (the file
/// could have been moved/replaced even hours earlier).
static void
io_unlink(int dir_fd, const char *name, ino_t ino)
io_unlink(const char *name, const struct stat *known_st)
{
const char *base = str_filename(name);
if (base == NULL) {
// This shouldn't happen.
errmsg(V_ERROR, _("%s: Invalid filename"), name);
return;
}
struct stat new_st;
pthread_mutex_lock(&mutex);
if (fchdir(dir_fd)) {
errmsg(V_ERROR, _("Cannot change directory: %s"),
strerror(errno));
if (lstat(name, &new_st)
|| new_st.st_dev != known_st->st_dev
|| new_st.st_ino != known_st->st_ino) {
message_error(_("%s: File seems to be moved, not removing"),
name);
} else {
struct stat st;
if (lstat(base, &st) || st.st_ino != ino)
errmsg(V_ERROR, _("%s: File seems to be moved, "
"not removing"), name);
// There's a race condition between lstat() and unlink()
// but at least we have tried to avoid removing wrong file.
else if (unlink(base))
errmsg(V_ERROR, _("%s: Cannot remove: %s"),
if (unlink(name))
message_error(_("%s: Cannot remove: %s"),
name, strerror(errno));
}
pthread_mutex_unlock(&mutex);
return;
}
@ -160,14 +68,31 @@ io_copy_attrs(const file_pair *pair)
// destination file who didn't have permission to access the
// source file.
if (uid == 0 && fchown(pair->dest_fd, pair->src_st.st_uid, -1))
errmsg(V_WARNING, _("%s: Cannot set the file owner: %s"),
// Simple cache to avoid repeated calls to geteuid().
static enum {
WARN_FCHOWN_UNKNOWN,
WARN_FCHOWN_NO,
WARN_FCHOWN_YES,
} warn_fchown = WARN_FCHOWN_UNKNOWN;
// Try changing the owner of the file. If we aren't root or the owner
// isn't already us, fchown() probably doesn't succeed. We warn
// about failing fchown() only if we are root.
if (fchown(pair->dest_fd, pair->src_st.st_uid, -1)
&& warn_fchown != WARN_FCHOWN_NO) {
if (warn_fchown == WARN_FCHOWN_UNKNOWN)
warn_fchown = geteuid() == 0
? WARN_FCHOWN_YES : WARN_FCHOWN_NO;
if (warn_fchown == WARN_FCHOWN_YES)
message_warning(_("%s: Cannot set the file owner: %s"),
pair->dest_name, strerror(errno));
}
mode_t mode;
if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) {
errmsg(V_WARNING, _("%s: Cannot set the file group: %s"),
message_warning(_("%s: Cannot set the file group: %s"),
pair->dest_name, strerror(errno));
// We can still safely copy some additional permissions:
// `group' must be at least as strict as `other' and
@ -186,192 +111,291 @@ io_copy_attrs(const file_pair *pair)
}
if (fchmod(pair->dest_fd, mode))
errmsg(V_WARNING, _("%s: Cannot set the file permissions: %s"),
message_warning(_("%s: Cannot set the file permissions: %s"),
pair->dest_name, strerror(errno));
// Copy the timestamps only if we have a secure function to do it.
#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT)
struct timeval tv[2];
tv[0].tv_sec = pair->src_st.st_atime;
tv[1].tv_sec = pair->src_st.st_mtime;
// Copy the timestamps. We have several possible ways to do this, of
// which some are better in both security and precision.
//
// First, get the nanosecond part of the timestamps. As of writing,
// it's not standardized by POSIX, and there are several names for
// the same thing in struct stat.
long atime_nsec;
long mtime_nsec;
# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
tv[0].tv_usec = pair->src_st.st_atim.tv_nsec / 1000;
// GNU and Solaris
atime_nsec = pair->src_st.st_atim.tv_nsec;
mtime_nsec = pair->src_st.st_mtim.tv_nsec;
# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
tv[0].tv_usec = pair->src_st.st_atimespec.tv_nsec / 1000;
// BSD
atime_nsec = pair->src_st.st_atimespec.tv_nsec;
mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
// GNU and BSD without extensions
atime_nsec = pair->src_st.st_atimensec;
mtime_nsec = pair->src_st.st_mtimensec;
# elif defined(HAVE_STRUCT_STAT_ST_UATIME)
// Tru64
atime_nsec = pair->src_st.st_uatime * 1000;
mtime_nsec = pair->src_st.st_umtime * 1000;
# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
// UnixWare
atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
# else
tv[0].tv_usec = 0;
// Safe fallback
atime_nsec = 0;
mtime_nsec = 0;
# endif
# if defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC)
tv[1].tv_usec = pair->src_st.st_mtim.tv_nsec / 1000;
# elif defined(HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC)
tv[1].tv_usec = pair->src_st.st_mtimespec.tv_nsec / 1000;
# else
tv[1].tv_usec = 0;
# endif
// Construct a structure to hold the timestamps and call appropriate
// function to set the timestamps.
#if defined(HAVE_FUTIMENS)
// Use nanosecond precision.
struct timespec tv[2];
tv[0].tv_sec = pair->src_st.st_atime;
tv[0].tv_nsec = atime_nsec;
tv[1].tv_sec = pair->src_st.st_mtime;
tv[1].tv_nsec = mtime_nsec;
# ifdef HAVE_FUTIMES
(void)futimens(pair->dest_fd, tv);
#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
// Use microsecond precision.
struct timeval tv[2];
tv[0].tv_sec = pair->src_st.st_atime;
tv[0].tv_usec = atime_nsec / 1000;
tv[1].tv_sec = pair->src_st.st_mtime;
tv[1].tv_usec = mtime_nsec / 1000;
# if defined(HAVE_FUTIMES)
(void)futimes(pair->dest_fd, tv);
# else
# elif defined(HAVE_FUTIMESAT)
(void)futimesat(pair->dest_fd, NULL, tv);
# else
// Argh, no function to use a file descriptor to set the timestamp.
(void)utimes(pair->src_name, tv);
# endif
#elif defined(HAVE_UTIME)
// Use one-second precision. utime() doesn't support using file
// descriptor either.
const struct utimbuf buf = {
.actime = pair->src_st.st_atime;
.modtime = pair->src_st.st_mtime;
};
// Avoid warnings.
(void)atime_nsec;
(void)mtime_nsec;
(void)utime(pair->src_name, &buf);
#endif
return;
}
/// Opens and changes into the directory containing the source file.
static int
io_open_dir(file_pair *pair)
{
if (pair->src_name == stdin_filename)
return 0;
if (fchdir(start_dir)) {
errmsg(V_ERROR, _("Cannot change directory: %s"),
strerror(errno));
return -1;
}
const char *split = strrchr(pair->src_name, '/');
if (split == NULL) {
pair->dir_fd = start_dir;
} else {
// Copy also the slash. It's needed to support filenames
// like "/foo" (dirname being "/"), and it never hurts anyway.
const size_t dirname_len = split - pair->src_name + 1;
char dirname[dirname_len + 1];
memcpy(dirname, pair->src_name, dirname_len);
dirname[dirname_len] = '\0';
// Open the directory and change into it.
pair->dir_fd = open(dirname, O_SEARCH | O_NOCTTY);
if (pair->dir_fd == -1 || fchdir(pair->dir_fd)) {
errmsg(V_ERROR, _("%s: Cannot open the directory "
"containing the file: %s"),
pair->src_name, strerror(errno));
(void)close(pair->dir_fd);
return -1;
}
}
return 0;
}
static void
io_close_dir(file_pair *pair)
{
if (pair->dir_fd != start_dir)
(void)close(pair->dir_fd);
return;
}
/// Opens the source file. The file is opened using the plain filename without
/// path, thus the file must be in the current working directory. This is
/// ensured because io_open_dir() is always called before this function.
static int
/// Opens the source file. Returns false on success, true on error.
static bool
io_open_src(file_pair *pair)
{
// There's nothing to open when reading from stdin.
if (pair->src_name == stdin_filename) {
pair->src_fd = STDIN_FILENO;
} else {
// Strip the pathname. Thanks to io_open_dir(), the file
// is now in the current working directory.
const char *filename = str_filename(pair->src_name);
if (filename == NULL)
return -1;
return false;
}
// We accept only regular files if we are writing the output
// to disk too, and if --force was not given.
const bool reg_files_only = !opt_stdout && !opt_force;
// Flags for open()
int flags = O_RDONLY | O_NOCTTY;
// If we accept only regular files, we need to be careful to avoid
// problems with special files like devices and FIFOs. O_NONBLOCK
// prevents blocking when opening such files. When we want to accept
// special files, we must not use O_NONBLOCK, or otherwise we won't
// block waiting e.g. FIFOs to become readable.
if (reg_files_only)
flags |= O_NONBLOCK;
#ifdef O_NOFOLLOW
if (reg_files_only)
flags |= O_NOFOLLOW;
#else
// Some POSIX-like systems lack O_NOFOLLOW (it's not required
// by POSIX). Check for symlinks with a separate lstat() on
// these systems.
if (reg_files_only) {
struct stat st;
if (lstat(pair->src_name, &st)) {
message_error("%s: %s", pair->src_name,
strerror(errno));
return true;
} else if (S_ISLNK(st.st_mode)) {
message_warning(_("%s: Is a symbolic link, "
"skipping"), pair->src_name);
return true;
}
}
#endif
// Try to open the file. If we are accepting non-regular files,
// unblock the caught signals so that open() can be interrupted
// if it blocks e.g. due to a FIFO file.
if (!reg_files_only)
signals_unblock();
// Maybe this wouldn't need a loop, since all the signal handlers for
// which we don't use SA_RESTART set user_abort to true. But it
// doesn't hurt to have it just in case.
do {
pair->src_fd = open(pair->src_name, flags);
} while (pair->src_fd == -1 && errno == EINTR && !user_abort);
if (!reg_files_only)
signals_block();
// Symlinks are followed if --stdout or --force has been
// specified.
const bool follow_symlinks = opt_stdout || opt_force;
pair->src_fd = open(filename, O_RDONLY | O_NOCTTY
| (follow_symlinks ? 0 : O_NOFOLLOW));
if (pair->src_fd == -1) {
// If we were interrupted, don't display any error message.
if (errno == EINTR) {
// All the signals that don't have SA_RESTART
// set user_abort.
assert(user_abort);
return true;
}
#ifdef O_NOFOLLOW
// Give an understandable error message in if reason
// for failing was that the file was a symbolic link.
// - Linux, OpenBSD, Solaris: ELOOP
// - FreeBSD: EMLINK
// - Tru64: ENOTSUP
// It seems to be safe to check for all these, since
// those errno values aren't used for other purporses
// on any of the listed operating system *when* the
// above flags are used with open().
if (!follow_symlinks
&& (errno == ELOOP
#ifdef EMLINK
|| errno == EMLINK
//
// Note that at least Linux, OpenBSD, Solaris, and Darwin
// use ELOOP to indicate if O_NOFOLLOW was the reason
// that open() failed. Because there may be
// directories in the pathname, ELOOP may occur also
// because of a symlink loop in the directory part.
// So ELOOP doesn't tell us what actually went wrong.
//
// FreeBSD associates EMLINK with O_NOFOLLOW and
// Tru64 uses ENOTSUP. We use these directly here
// and skip the lstat() call and the associated race.
// I want to hear if there are other kernels that
// fail with something else than ELOOP with O_NOFOLLOW.
bool was_symlink = false;
# if defined(__FreeBSD__) || defined(__DragonFly__)
if (errno == EMLINK)
was_symlink = true;
# elif defined(__digital__) && defined(__unix__)
if (errno == ENOTSUP)
was_symlink = true;
# else
if (errno == ELOOP && reg_files_only) {
const int saved_errno = errno;
struct stat st;
if (lstat(pair->src_name, &st) == 0
&& S_ISLNK(st.st_mode))
was_symlink = true;
errno = saved_errno;
}
# endif
#ifdef ENOTSUP
|| errno == ENOTSUP
#endif
)) {
errmsg(V_WARNING, _("%s: Is a symbolic link, "
if (was_symlink)
message_warning(_("%s: Is a symbolic link, "
"skipping"), pair->src_name);
} else {
errmsg(V_ERROR, "%s: %s", pair->src_name,
else
#endif
// Something else than O_NOFOLLOW failing
// (assuming that the race conditions didn't
// confuse us).
message_error("%s: %s", pair->src_name,
strerror(errno));
return true;
}
return -1;
// Drop O_NONBLOCK, which is used only when we are accepting only
// regular files. After the open() call, we want things to block
// instead of giving EAGAIN.
if (reg_files_only) {
flags = fcntl(pair->src_fd, F_GETFL);
if (flags == -1)
goto error_msg;
flags &= ~O_NONBLOCK;
if (fcntl(pair->src_fd, F_SETFL, flags))
goto error_msg;
}
if (fstat(pair->src_fd, &pair->src_st)) {
errmsg(V_ERROR, "%s: %s", pair->src_name,
strerror(errno));
goto error;
}
// Stat the source file. We need the result also when we copy
// the permissions, and when unlinking.
if (fstat(pair->src_fd, &pair->src_st))
goto error_msg;
if (S_ISDIR(pair->src_st.st_mode)) {
errmsg(V_WARNING, _("%s: Is a directory, skipping"),
message_warning(_("%s: Is a directory, skipping"),
pair->src_name);
goto error;
}
if (!opt_stdout) {
if (!opt_force && !S_ISREG(pair->src_st.st_mode)) {
errmsg(V_WARNING, _("%s: Not a regular file, "
if (reg_files_only) {
if (!S_ISREG(pair->src_st.st_mode)) {
message_warning(_("%s: Not a regular file, "
"skipping"), pair->src_name);
goto error;
}
if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
// Setuid and setgid files are rejected even
// with --force. This is good for security
// (hopefully) but it's a bit weird to reject
// file when --force was given. At least this
// matches gzip's behavior.
errmsg(V_WARNING, _("%s: File has setuid or "
// gzip rejects setuid and setgid files even
// when --force was used. bzip2 doesn't check
// for them, but calls fchown() after fchmod(),
// and many systems automatically drop setuid
// and setgid bits there.
//
// We accept setuid and setgid files if
// --force was used. We drop these bits
// explicitly in io_copy_attr().
message_warning(_("%s: File has setuid or "
"setgid bit set, skipping"),
pair->src_name);
goto error;
}
if (!opt_force && (pair->src_st.st_mode & S_ISVTX)) {
errmsg(V_WARNING, _("%s: File has sticky bit "
if (pair->src_st.st_mode & S_ISVTX) {
message_warning(_("%s: File has sticky bit "
"set, skipping"),
pair->src_name);
goto error;
}
if (pair->src_st.st_nlink > 1) {
errmsg(V_WARNING, _("%s: Input file has more "
message_warning(_("%s: Input file has more "
"than one hard link, "
"skipping"), pair->src_name);
goto error;
}
}
}
return 0;
return false;
error_msg:
message_error("%s: %s", pair->src_name, strerror(errno));
error:
(void)close(pair->src_fd);
return -1;
return true;
}
@ -383,65 +407,73 @@ error:
static void
io_close_src(file_pair *pair, bool success)
{
if (pair->src_fd == STDIN_FILENO || pair->src_fd == -1)
return;
if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
// If we are going to unlink(), do it before closing the file.
// This way there's no risk that someone replaces the file and
// happens to get same inode number, which would make us
// unlink() wrong file.
if (success && !opt_keep_original)
io_unlink(pair->src_name, &pair->src_st);
if (close(pair->src_fd)) {
errmsg(V_ERROR, _("%s: Closing the file failed: %s"),
pair->src_name, strerror(errno));
} else if (success && !opt_keep_original) {
io_unlink(pair->dir_fd, pair->src_name, pair->src_st.st_ino);
(void)close(pair->src_fd);
}
return;
}
static int
static bool
io_open_dest(file_pair *pair)
{
if (opt_stdout || pair->src_fd == STDIN_FILENO) {
// We don't modify or free() this.
pair->dest_name = (char *)"(stdout)";
pair->dest_fd = STDOUT_FILENO;
return false;
}
// Synchronize the order in which files get written to stdout.
// Unlocking the mutex is safe, because opening the file_pair
// can no longer fail.
while (stdout_in_use)
pthread_cond_wait(&stdout_cond, &mutex);
stdout_in_use = true;
} else {
pair->dest_name = get_dest_name(pair->src_name);
pair->dest_name = suffix_get_dest_name(pair->src_name);
if (pair->dest_name == NULL)
return -1;
return true;
// This cannot fail, because get_dest_name() doesn't return
// invalid names.
const char *filename = str_filename(pair->dest_name);
assert(filename != NULL);
// If --force was used, unlink the target file first.
if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
message_error("%s: Cannot unlink: %s",
pair->dest_name, strerror(errno));
free(pair->dest_name);
return true;
}
pair->dest_fd = open(filename, O_WRONLY | O_NOCTTY | O_CREAT
| (opt_force ? O_TRUNC : O_EXCL),
S_IRUSR | S_IWUSR);
if (pair->dest_fd == -1) {
errmsg(V_ERROR, "%s: %s", pair->dest_name,
if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
message_error("%s: Cannot unlink: %s", pair->dest_name,
strerror(errno));
free(pair->dest_name);
return -1;
return true;
}
// Open the file.
const int flags = O_WRONLY | O_NOCTTY | O_CREAT | O_EXCL;
const mode_t mode = S_IRUSR | S_IWUSR;
pair->dest_fd = open(pair->dest_name, flags, mode);
if (pair->dest_fd == -1) {
// Don't bother with error message if user requested
// us to exit anyway.
if (!user_abort)
message_error("%s: %s", pair->dest_name,
strerror(errno));
free(pair->dest_name);
return true;
}
// If this really fails... well, we have a safe fallback.
struct stat st;
if (fstat(pair->dest_fd, &st))
pair->dest_ino = 0;
else
pair->dest_ino = st.st_ino;
if (fstat(pair->dest_fd, &pair->dest_st)) {
pair->dest_st.st_dev = 0;
pair->dest_st.st_ino = 0;
}
return 0;
return false;
}
@ -455,22 +487,16 @@ io_open_dest(file_pair *pair)
static int
io_close_dest(file_pair *pair, bool success)
{
if (pair->dest_fd == -1)
if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
return 0;
if (pair->dest_fd == STDOUT_FILENO) {
stdout_in_use = false;
pthread_cond_signal(&stdout_cond);
return 0;
}
if (close(pair->dest_fd)) {
errmsg(V_ERROR, _("%s: Closing the file failed: %s"),
message_error(_("%s: Closing the file failed: %s"),
pair->dest_name, strerror(errno));
// Closing destination file failed, so we cannot trust its
// contents. Get rid of junk:
io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino);
io_unlink(pair->dest_name, &pair->dest_st);
free(pair->dest_name);
return -1;
}
@ -478,7 +504,7 @@ io_close_dest(file_pair *pair, bool success)
// If the operation using this file wasn't successful, we git rid
// of the junk file.
if (!success)
io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino);
io_unlink(pair->dest_name, &pair->dest_st);
free(pair->dest_name);
@ -492,98 +518,63 @@ io_open(const char *src_name)
if (is_empty_filename(src_name))
return NULL;
file_pair *pair = malloc(sizeof(file_pair));
if (pair == NULL) {
out_of_memory();
return NULL;
}
// Since we have only one file open at a time, we can use
// a statically allocated structure.
static file_pair pair;
*pair = (file_pair){
pair = (file_pair){
.src_name = src_name,
.dest_name = NULL,
.dir_fd = -1,
.src_fd = -1,
.dest_fd = -1,
.src_eof = false,
};
pthread_mutex_lock(&mutex);
// Block the signals, for which we have a custom signal handler, so
// that we don't need to worry about EINTR.
signals_block();
++open_pairs;
file_pair *ret = NULL;
if (!io_open_src(&pair)) {
// io_open_src() may have unblocked the signals temporarily,
// and thus user_abort may have got set even if open()
// succeeded.
if (user_abort || io_open_dest(&pair))
io_close_src(&pair, false);
else
ret = &pair;
}
if (io_open_dir(pair))
goto error_dir;
signals_unblock();
if (io_open_src(pair))
goto error_src;
if (user_abort || io_open_dest(pair))
goto error_dest;
pthread_mutex_unlock(&mutex);
return pair;
error_dest:
io_close_src(pair, false);
error_src:
io_close_dir(pair);
error_dir:
--open_pairs;
pthread_mutex_unlock(&mutex);
free(pair);
return NULL;
return ret;
}
/// \brief Closes the file descriptors and frees the structure
extern void
io_close(file_pair *pair, bool success)
{
signals_block();
if (success && pair->dest_fd != STDOUT_FILENO)
io_copy_attrs(pair);
// Close the destination first. If it fails, we must not remove
// the source file!
if (!io_close_dest(pair, success)) {
// Closing destination file succeeded. Remove the source file
// if the operation using this file pair was successful
// and we haven't been requested to keep the source file.
if (io_close_dest(pair, success))
success = false;
// Close the source file, and unlink it if the operation using this
// file pair was successful and we haven't requested to keep the
// source file.
io_close_src(pair, success);
} else {
// We don't care if operation using this file pair was
// successful or not, since closing the destination file
// failed. Don't remove the original file.
io_close_src(pair, false);
}
io_close_dir(pair);
free(pair);
pthread_mutex_lock(&mutex);
if (--open_pairs == 0)
pthread_cond_signal(&io_cond);
pthread_mutex_unlock(&mutex);
signals_unblock();
return;
}
/// \brief Reads from a file to a buffer
///
/// \param pair File pair having the sourcefile open for reading
/// \param buf Destination buffer to hold the read data
/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
///
/// \return On success, number of bytes read is returned. On end of
/// file zero is returned and pair->src_eof set to true.
/// On error, SIZE_MAX is returned and error message printed.
///
/// \note This does no locking, thus two threads must not read from
/// the same file. This no problem in this program.
extern size_t
io_read(file_pair *pair, uint8_t *buf, size_t size)
{
@ -608,7 +599,7 @@ io_read(file_pair *pair, uint8_t *buf, size_t size)
continue;
}
errmsg(V_ERROR, _("%s: Read error: %s"),
message_error(_("%s: Read error: %s"),
pair->src_name, strerror(errno));
// FIXME Is this needed?
@ -625,18 +616,7 @@ io_read(file_pair *pair, uint8_t *buf, size_t size)
}
/// \brief Writes a buffer to a file
///
/// \param pair File pair having the destination file open for writing
/// \param buf Buffer containing the data to be written
/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
///
/// \return On success, zero is returned. On error, -1 is returned
/// and error message printed.
///
/// \note This does no locking, thus two threads must not write to
/// the same file. This no problem in this program.
extern int
extern bool
io_write(const file_pair *pair, const uint8_t *buf, size_t size)
{
assert(size < SSIZE_MAX);
@ -660,18 +640,19 @@ io_write(const file_pair *pair, const uint8_t *buf, size_t size)
// GNU bash).
//
// We don't do anything special with --quiet, which
// is what bzip2 does too. However, we print a
// message if --verbose was used (or should that
// only be with double --verbose i.e. debugging?).
errmsg(errno == EPIPE ? V_VERBOSE : V_ERROR,
_("%s: Write error: %s"),
// is what bzip2 does too. If we get SIGPIPE, we
// will handle it like other signals by setting
// user_abort, and get EPIPE here.
if (errno != EPIPE)
message_error(_("%s: Write error: %s"),
pair->dest_name, strerror(errno));
return -1;
return true;
}
buf += (size_t)(amount);
size -= (size_t)(amount);
}
return 0;
return false;
}

View file

@ -22,6 +22,8 @@
#include "private.h"
// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
#if BUFSIZ <= 1024
# define IO_BUFFER_SIZE 8192
#else
@ -30,31 +32,66 @@
typedef struct {
/// Name of the source filename (as given on the command line) or
/// pointer to static "(stdin)" when reading from standard input.
const char *src_name;
/// Destination filename converted from src_name or pointer to static
/// "(stdout)" when writing to standard output.
char *dest_name;
int dir_fd;
/// File descriptor of the source file
int src_fd;
/// File descriptor of the target file
int dest_fd;
/// Stat of the source file.
struct stat src_st;
ino_t dest_ino;
/// Stat of the destination file.
struct stat dest_st;
/// True once end of the source file has been detected.
bool src_eof;
} file_pair;
extern void io_init(void);
extern void io_finish(void);
/// \brief Opens a file pair
extern file_pair *io_open(const char *src_name);
/// \brief Closes the file descriptors and frees possible allocated memory
///
/// The success argument determines if source or destination file gets
/// unlinked:
/// - false: The destination file is unlinked.
/// - true: The source file is unlinked unless writing to stdout or --keep
/// was used.
extern void io_close(file_pair *pair, bool success);
/// \brief Reads from the source file to a buffer
///
/// \param pair File pair having the source file open for reading
/// \param buf Destination buffer to hold the read data
/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
///
/// \return On success, number of bytes read is returned. On end of
/// file zero is returned and pair->src_eof set to true.
/// On error, SIZE_MAX is returned and error message printed.
extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size);
extern int io_write(const file_pair *pair, const uint8_t *buf, size_t size);
/// \brief Writes a buffer to the destination file
///
/// \param pair File pair having the destination file open for writing
/// \param buf Buffer containing the data to be written
/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
///
/// \return On success, zero is returned. On error, -1 is returned
/// and error message printed.
extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size);
#endif

View file

@ -21,16 +21,30 @@
#include "open_stdxxx.h"
#include <ctype.h>
static sig_atomic_t exit_signal = 0;
volatile sig_atomic_t user_abort = false;
/// Exit status to use. This can be changed with set_exit_status().
static enum exit_status_type exit_status = E_SUCCESS;
/// If we were interrupted by a signal, we store the signal number so that
/// we can raise that signal to kill the program when all cleanups have
/// been done.
static volatile sig_atomic_t exit_signal = 0;
/// Mask of signals for which have have established a signal handler to set
/// user_abort to true.
static sigset_t hooked_signals;
/// signals_block() and signals_unblock() can be called recursively.
static size_t signals_block_count = 0;
static void
signal_handler(int sig)
{
// FIXME Is this thread-safe together with main()?
exit_signal = sig;
user_abort = 1;
user_abort = true;
return;
}
@ -38,118 +52,228 @@ signal_handler(int sig)
static void
establish_signal_handlers(void)
{
struct sigaction sa;
sa.sa_handler = &signal_handler;
sigfillset(&sa.sa_mask);
sa.sa_flags = 0;
// List of signals for which we establish the signal handler.
static const int sigs[] = {
SIGHUP,
SIGINT,
SIGPIPE,
SIGTERM,
#ifdef SIGHUP
SIGHUP,
#endif
#ifdef SIGPIPE
SIGPIPE,
#endif
#ifdef SIGXCPU
SIGXCPU,
#endif
#ifdef SIGXFSZ
SIGXFSZ,
#endif
};
for (size_t i = 0; i < sizeof(sigs) / sizeof(sigs[0]); ++i) {
if (sigaction(sigs[i], &sa, NULL)) {
errmsg(V_ERROR, _("Cannot establish signal handlers"));
my_exit(ERROR);
}
// Mask of the signals for which we have established a signal handler.
sigemptyset(&hooked_signals);
for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i)
sigaddset(&hooked_signals, sigs[i]);
struct sigaction sa;
// All the signals that we handle we also blocked while the signal
// handler runs.
sa.sa_mask = hooked_signals;
// Don't set SA_RESTART, because we want EINTR so that we can check
// for user_abort and cleanup before exiting. We block the signals
// for which we have established a handler when we don't want EINTR.
sa.sa_flags = 0;
sa.sa_handler = &signal_handler;
for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) {
// If the parent process has left some signals ignored,
// we don't unignore them.
struct sigaction old;
if (sigaction(sigs[i], NULL, &old) == 0
&& old.sa_handler == SIG_IGN)
continue;
// Establish the signal handler.
if (sigaction(sigs[i], &sa, NULL))
message_signal_handler();
}
/*
SIGINFO/SIGUSR1 for status reporting?
*/
return;
}
static bool
is_tty_stdin(void)
extern void
signals_block(void)
{
const bool ret = isatty(STDIN_FILENO);
if (ret) {
// FIXME: Other threads may print between these lines.
// Maybe that should be fixed. Not a big issue in practice.
errmsg(V_ERROR, _("Compressed data not read from "
"a terminal."));
errmsg(V_ERROR, _("Use `--force' to force decompression."));
show_try_help();
if (signals_block_count++ == 0) {
const int saved_errno = errno;
sigprocmask(SIG_BLOCK, &hooked_signals, NULL);
errno = saved_errno;
}
return ret;
return;
}
static bool
is_tty_stdout(void)
extern void
signals_unblock(void)
{
const bool ret = isatty(STDOUT_FILENO);
if (ret) {
errmsg(V_ERROR, _("Compressed data not written to "
"a terminal."));
errmsg(V_ERROR, _("Use `--force' to force compression."));
show_try_help();
assert(signals_block_count > 0);
if (--signals_block_count == 0) {
const int saved_errno = errno;
sigprocmask(SIG_UNBLOCK, &hooked_signals, NULL);
errno = saved_errno;
}
return ret;
return;
}
static char *
read_name(void)
extern void
set_exit_status(enum exit_status_type new_status)
{
size_t size = 256;
assert(new_status == E_WARNING || new_status == E_ERROR);
if (exit_status != E_ERROR)
exit_status = new_status;
return;
}
extern void
my_exit(enum exit_status_type status)
{
// Close stdout. If something goes wrong, print an error message
// to stderr.
{
const int ferror_err = ferror(stdout);
const int fclose_err = fclose(stdout);
if (ferror_err || fclose_err) {
// If it was fclose() that failed, we have the reason
// in errno. If only ferror() indicated an error,
// we have no idea what the reason was.
message(V_ERROR, _("Writing to standard output "
"failed: %s"),
fclose_err ? strerror(errno)
: _("Unknown error"));
status = E_ERROR;
}
}
// Close stderr. If something goes wrong, there's nothing where we
// could print an error message. Just set the exit status.
{
const int ferror_err = ferror(stderr);
const int fclose_err = fclose(stderr);
if (fclose_err || ferror_err)
status = E_ERROR;
}
// If we have got a signal, raise it to kill the program.
const int sig = exit_signal;
if (sig != 0) {
struct sigaction sa;
sa.sa_handler = SIG_DFL;
sigfillset(&sa.sa_mask);
sa.sa_flags = 0;
sigaction(sig, &sa, NULL);
raise(exit_signal);
// If, for some weird reason, the signal doesn't kill us,
// we safely fall to the exit below.
}
exit(status);
}
static const char *
read_name(const args_info *args)
{
// FIXME: Maybe we should have some kind of memory usage limit here
// like the tool has for the actual compression and uncompression.
// Giving some huge text file with --files0 makes us to read the
// whole file in RAM.
static char *name = NULL;
static size_t size = 256;
// Allocate the initial buffer. This is never freed, since after it
// is no longer needed, the program exits very soon. It is safe to
// use xmalloc() and xrealloc() in this function, because while
// executing this function, no files are open for writing, and thus
// there's no need to cleanup anything before exiting.
if (name == NULL)
name = xmalloc(size);
// Write position in name
size_t pos = 0;
char *name = malloc(size);
if (name == NULL) {
out_of_memory();
// Read one character at a time into name.
while (!user_abort) {
const int c = fgetc(args->files_file);
if (ferror(args->files_file)) {
// Take care of EINTR since we have established
// the signal handlers already.
if (errno == EINTR)
continue;
message_error(_("%s: Error reading filenames: %s"),
args->files_name, strerror(errno));
return NULL;
}
while (true) {
const int c = fgetc(opt_files_file);
if (c == EOF) {
free(name);
if (ferror(opt_files_file))
errmsg(V_ERROR, _("%s: Error reading "
"filenames: %s"),
opt_files_name,
strerror(errno));
else if (pos != 0)
errmsg(V_ERROR, _("%s: Unexpected end of "
"input when reading "
"filenames"), opt_files_name);
if (feof(args->files_file)) {
if (pos != 0)
message_error(_("%s: Unexpected end of input "
"when reading filenames"),
args->files_name);
return NULL;
}
if (c == '\0' || c == opt_files_split)
break;
if (c == args->files_delim) {
// We allow consecutive newline (--files) or '\0'
// characters (--files0), and ignore such empty
// filenames.
if (pos == 0)
continue;
// A non-empty name was read. Terminate it with '\0'
// and return it.
name[pos] = '\0';
return name;
}
if (c == '\0') {
// A null character was found when using --files,
// which expects plain text input separated with
// newlines.
message_error(_("%s: Null character found when "
"reading filenames; maybe you meant "
"to use `--files0' instead "
"of `--files'?"), args->files_name);
return NULL;
}
name[pos++] = c;
// Allocate more memory if needed. There must always be space
// at least for one character to allow terminating the string
// with '\0'.
if (pos == size) {
size *= 2;
char *tmp = realloc(name, size);
if (tmp == NULL) {
free(name);
out_of_memory();
name = xrealloc(name, size);
}
}
return NULL;
}
name = tmp;
}
}
if (name != NULL)
name[pos] = '\0';
return name;
}
int
main(int argc, char **argv)
@ -158,35 +282,56 @@ main(int argc, char **argv)
// a valid file descriptor. Exit immediatelly with exit code ERROR
// if we cannot make the file descriptors valid. Maybe we should
// print an error message, but our stderr could be screwed anyway.
open_stdxxx(ERROR);
open_stdxxx(E_ERROR);
// Set the program invocation name used in various messages.
argv0 = argv[0];
// This has to be done before calling any liblzma functions.
lzma_init();
setlocale(LC_ALL, "en_US.UTF-8");
// Set up the locale.
setlocale(LC_ALL, "");
#ifdef ENABLE_NLS
// Set up the message translations too.
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
#endif
// Set the program invocation name used in various messages, and
// do other message handling related initializations.
message_init(argv[0]);
// Set hardware-dependent default values. These can be overriden
// on the command line, thus this must be done before parse_args().
hardware_init();
char **files = parse_args(argc, argv);
// Parse the command line arguments and get an array of filenames.
// This doesn't return if something is wrong with the command line
// arguments. If there are no arguments, one filename ("-") is still
// returned to indicate stdin.
args_info args;
args_parse(&args, argc, argv);
if (opt_mode == MODE_COMPRESS && opt_stdout && is_tty_stdout())
return ERROR;
if (opt_mode == MODE_COMPRESS)
lzma_init_encoder();
// Tell the message handling code how many input files there are if
// we know it. This way the progress indicator can show it.
if (args.files_name != NULL)
message_set_files(0);
else
lzma_init_decoder();
message_set_files(args.arg_count);
io_init();
process_init();
// Refuse to write compressed data to standard output if it is
// a terminal and --force wasn't used.
if (opt_mode == MODE_COMPRESS) {
if (opt_stdout || (args.arg_count == 1
&& strcmp(args.arg_names[0], "-") == 0)) {
if (is_tty_stdout()) {
message_try_help();
my_exit(E_ERROR);
}
}
}
if (opt_mode == MODE_LIST) {
errmsg(V_ERROR, "--list is not implemented yet.");
my_exit(ERROR);
message_fatal("--list is not implemented yet.");
}
// Hook the signal handlers. We don't need these before we start
@ -194,60 +339,63 @@ main(int argc, char **argv)
// line arguments.
establish_signal_handlers();
while (*files != NULL && !user_abort) {
if (strcmp("-", *files) == 0) {
// Process the files given on the command line. Note that if no names
// were given, parse_args() gave us a fake "-" filename.
for (size_t i = 0; i < args.arg_count && !user_abort; ++i) {
if (strcmp("-", args.arg_names[i]) == 0) {
// Processing from stdin to stdout. Unless --force
// was used, check that we aren't writing compressed
// data to a terminal or reading it from terminal.
if (!opt_force) {
if (opt_mode == MODE_COMPRESS) {
if (is_tty_stdout()) {
++files;
if (is_tty_stdout())
continue;
}
} else if (is_tty_stdin()) {
++files;
continue;
}
}
if (opt_files_name == stdin_filename) {
errmsg(V_ERROR, _("Cannot read data from "
// It doesn't make sense to compress data from stdin
// if we are supposed to read filenames from stdin
// too (enabled with --files or --files0).
if (args.files_name == stdin_filename) {
message_error(_("Cannot read data from "
"standard input when "
"reading filenames "
"from standard input"));
++files;
continue;
}
*files = (char *)stdin_filename;
// Replace the "-" with a special pointer, which is
// recognized by process_file() and other things.
// This way error messages get a proper filename
// string and the code still knows that it is
// handling the special case of stdin.
args.arg_names[i] = (char *)stdin_filename;
}
process_file(*files++);
// Do the actual compression or uncompression.
process_file(args.arg_names[i]);
}
if (opt_files_name != NULL) {
// If --files or --files0 was used, process the filenames from the
// given file or stdin. Note that here we don't consider "-" to
// indicate stdin like we do with the command line arguments.
if (args.files_name != NULL) {
// read_name() checks for user_abort so we don't need to
// check it as loop termination condition.
while (true) {
char *name = read_name();
const char *name = read_name(&args);
if (name == NULL)
break;
if (name[0] != '\0')
// read_name() doesn't return empty names.
assert(name[0] != '\0');
process_file(name);
free(name);
}
if (opt_files_name != stdin_filename)
(void)fclose(opt_files_file);
}
io_finish();
if (exit_signal != 0) {
struct sigaction sa;
sa.sa_handler = SIG_DFL;
sigfillset(&sa.sa_mask);
sa.sa_flags = 0;
sigaction(exit_signal, &sa, NULL);
raise(exit_signal);
if (args.files_name != stdin_filename)
(void)fclose(args.files_file);
}
my_exit(exit_status);

60
src/lzma/main.h Normal file
View file

@ -0,0 +1,60 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file main.h
/// \brief Miscellanous declarations
//
// Copyright (C) 2008 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef MAIN_H
#define MAIN_H
/// Possible exit status values. These are the same as used by gzip and bzip2.
enum exit_status_type {
E_SUCCESS = 0,
E_ERROR = 1,
E_WARNING = 2,
};
/// If this is true, we will clean up the possibly incomplete output file,
/// return to main() as soon as practical. That is, the code needs to poll
/// this variable in various places.
extern volatile sig_atomic_t user_abort;
/// Block the signals which don't have SA_RESTART and which would just set
/// user_abort to true. This is handy when we don't want to handle EINTR
/// and don't want SA_RESTART either.
extern void signals_block(void);
/// Unblock the signals blocked by signals_block().
extern void signals_unblock(void);
/// Sets the exit status after a warning or error has occurred. If new_status
/// is EX_WARNING and the old exit status was already EX_ERROR, the exit
/// status is not changed.
extern void set_exit_status(enum exit_status_type new_status);
/// Exits the program using the given status. This takes care of closing
/// stdin, stdout, and stderr and catches possible errors. If we had got
/// a signal, this function will raise it so that to the parent process it
/// appears that we were killed by the signal sent by the user.
extern void my_exit(enum exit_status_type status) lzma_attribute((noreturn));
#endif

892
src/lzma/message.c Normal file
View file

@ -0,0 +1,892 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file message.c
/// \brief Printing messages to stderr
//
// Copyright (C) 2007-2008 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#include "private.h"
#if defined(HAVE_SYS_TIME_H)
# include <sys/time.h>
#elif defined(SIGALRM)
// FIXME
#endif
#include <stdarg.h>
/// Name of the program which is prefixed to the error messages.
static const char *argv0;
/// Number of the current file
static unsigned int files_pos = 0;
/// Total number of input files; zero if unknown.
static unsigned int files_total;
/// Verbosity level
static enum message_verbosity verbosity = V_WARNING;
/// Filename which we will print with the verbose messages
static const char *filename;
/// True once the a filename has been printed to stderr as part of progress
/// message. If automatic progress updating isn't enabled, this becomes true
/// after the first progress message has been printed due to user sending
/// SIGALRM. Once this variable is true, we will print an empty line before
/// the next filename to make the output more readable.
static bool first_filename_printed = false;
/// This is set to true when we have printed the current filename to stderr
/// as part of a progress message. This variable is useful only if not
/// updating progress automatically: if user sends many SIGALRM signals,
/// we won't print the name of the same file multiple times.
static bool current_filename_printed = false;
/// True if we should print progress indicator and update it automatically.
static bool progress_automatic;
/// This is true when a progress message was printed and the cursor is still
/// on the same line with the progress message. In that case, a newline has
/// to be printed before any error messages.
static bool progress_active = false;
/// Expected size of the input stream is needed to show completion percentage
/// and estimate remaining time.
static uint64_t expected_in_size;
/// Time when we started processing the file
static double start_time;
/// The signal handler for SIGALRM sets this to true. It is set back to false
/// once the progress message has been updated.
static volatile sig_atomic_t progress_needs_updating = false;
/// Signal handler for SIGALRM
static void
progress_signal_handler(int sig lzma_attribute((unused)))
{
progress_needs_updating = true;
return;
}
/// Get the current time as double
static double
my_time(void)
{
struct timeval tv;
// This really shouldn't fail. I'm not sure what to return if it
// still fails. It doesn't look so useful to check the return value
// everywhere. FIXME?
if (gettimeofday(&tv, NULL))
return -1.0;
return (double)(tv.tv_sec) + (double)(tv.tv_usec) / 1.0e9;
}
/// Wrapper for snprintf() to help constructing a string in pieces.
static void /* lzma_attribute((format(printf, 3, 4))) */
my_snprintf(char **pos, size_t *left, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
const int len = vsnprintf(*pos, *left, fmt, ap);
va_end(ap);
// If an error occurred, we want the caller to think that the whole
// buffer was used. This way no more data will be written to the
// buffer. We don't need better error handling here.
if (len < 0 || (size_t)(len) >= *left) {
*left = 0;
} else {
*pos += len;
*left -= len;
}
return;
}
extern void
message_init(const char *given_argv0)
{
// Name of the program
argv0 = given_argv0;
// If --verbose is used, we use a progress indicator if and only
// if stderr is a terminal. If stderr is not a terminal, we print
// verbose information only after finishing the file. As a special
// exception, even if --verbose was not used, user can send SIGALRM
// to make us print progress information once without automatic
// updating.
progress_automatic = isatty(STDERR_FILENO);
/*
if (progress_automatic) {
// stderr is a terminal. Check the COLUMNS environment
// variable to see if the terminal is wide enough. If COLUMNS
// doesn't exist or it has some unparseable value, we assume
// that the terminal is wide enough.
const char *columns_str = getenv("COLUMNS");
uint64_t columns;
if (columns_str != NULL
&& !str_to_uint64_raw(&columns, columns_str)
&& columns < 80)
progress_automatic = false;
}
*/
#ifdef SIGALRM
// Establish the signal handler for SIGALRM. Since this signal
// doesn't require any quick action, we set SA_RESTART.
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESTART;
sa.sa_handler = &progress_signal_handler;
if (sigaction(SIGALRM, &sa, NULL))
message_signal_handler();
#endif
return;
}
extern void
message_verbosity_increase(void)
{
if (verbosity < V_DEBUG)
++verbosity;
return;
}
extern void
message_verbosity_decrease(void)
{
if (verbosity > V_SILENT)
--verbosity;
return;
}
extern void
message_set_files(unsigned int files)
{
files_total = files;
return;
}
/// Prints the name of the current file if it hasn't been printed already,
/// except if we are processing exactly one stream from stdin to stdout.
/// I think it looks nicer to not print "(stdin)" when --verbose is used
/// in a pipe and no other files are processed.
static void
print_filename(void)
{
if (!current_filename_printed
&& (files_total != 1 || filename != stdin_filename)) {
signals_block();
// If a file was already processed, put an empty line
// before the next filename to improve readability.
if (first_filename_printed)
fputc('\n', stderr);
first_filename_printed = true;
current_filename_printed = true;
// If we don't know how many files there will be due
// to usage of --files or --files0.
if (files_total == 0)
fprintf(stderr, "%s (%u)\n", filename,
files_pos);
else
fprintf(stderr, "%s (%u/%u)\n", filename,
files_pos, files_total);
signals_unblock();
}
return;
}
extern void
message_progress_start(const char *src_name, uint64_t in_size)
{
// Store the processing start time of the file and its expected size.
// If we aren't printing any statistics, then these are unused. But
// since it is possible that the user tells us with SIGALRM to show
// statistics, we need to have these available anyway.
start_time = my_time();
filename = src_name;
expected_in_size = in_size;
// Indicate the name of this file hasn't been printed to
// stderr yet.
current_filename_printed = false;
// Start numbering the files starting from one.
++files_pos;
// If progress indicator is wanted, print the filename and possibly
// the file count now. As an exception, if there is exactly one file,
// do not print the filename at all.
if (verbosity >= V_VERBOSE && progress_automatic) {
// Print the filename to stderr if that is appropriate with
// the current settings.
print_filename();
// Start the timer to set progress_needs_updating to true
// after about one second. An alternative would to be set
// progress_needs_updating to true here immediatelly, but
// setting the timer looks better to me, since extremely
// early progress info is pretty much useless.
alarm(1);
}
return;
}
/// Make the string indicating completion percentage.
static const char *
progress_percentage(uint64_t in_pos)
{
// If the size of the input file is unknown or the size told us is
// clearly wrong since we have processed more data than the alleged
// size of the file, show a static string indicating that we have
// no idea of the completion percentage.
if (expected_in_size == 0 || in_pos > expected_in_size)
return "--- %";
static char buf[sizeof("99.9 %")];
// Never show 100.0 % before we actually are finished (that case is
// handled separately in message_progress_end()).
snprintf(buf, sizeof(buf), "%.1f %%",
(double)(in_pos) / (double)(expected_in_size) * 99.9);
return buf;
}
static void
progress_sizes_helper(char **pos, size_t *left, uint64_t value, bool final)
{
if (final) {
// At maximum of four digits is allowed for exact byte count.
if (value < 10000) {
my_snprintf(pos, left, "%'" PRIu64 " B", value);
return;
}
// // At maximum of four significant digits is allowed for KiB.
// if (value < UINT64_C(1023900)) {
// At maximum of five significant digits is allowed for KiB.
if (value < UINT64_C(10239900)) {
my_snprintf(pos, left, "%'.1f KiB",
(double)(value) / 1024.0);
return;
}
}
// Otherwise we use MiB.
my_snprintf(pos, left, "%'.1f MiB",
(double)(value) / (1024.0 * 1024.0));
return;
}
/// Make the string containing the amount of input processed, amount of
/// output produced, and the compression ratio.
static const char *
progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final)
{
// This is enough to hold sizes up to about 99 TiB if thousand
// separator is used, or about 1 PiB without thousand separator.
// After that the progress indicator will look a bit silly, since
// the compression ratio no longer fits with three decimal places.
static char buf[44];
char *pos = buf;
size_t left = sizeof(buf);
// Print the sizes. If this the final message, use more reasonable
// units than MiB if the file was small.
progress_sizes_helper(&pos, &left, compressed_pos, final);
my_snprintf(&pos, &left, " / ");
progress_sizes_helper(&pos, &left, uncompressed_pos, final);
// Avoid division by zero. If we cannot calculate the ratio, set
// it to some nice number greater than 10.0 so that it gets caught
// in the next if-clause.
const double ratio = uncompressed_pos > 0
? (double)(compressed_pos) / (double)(uncompressed_pos)
: 16.0;
// If the ratio is very bad, just indicate that it is greater than
// 9.999. This way the length of the ratio field stays fixed.
if (ratio > 9.999)
snprintf(pos, left, " > %.3f", 9.999);
else
snprintf(pos, left, " = %.3f", ratio);
return buf;
}
/// Make the string containing the processing speed of uncompressed data.
static const char *
progress_speed(uint64_t uncompressed_pos, double elapsed)
{
// Don't print the speed immediatelly, since the early values look
// like somewhat random.
if (elapsed < 3.0)
return "";
static const char unit[][8] = {
"KiB/s",
"MiB/s",
"GiB/s",
};
size_t unit_index = 0;
// Calculate the speed as KiB/s.
double speed = (double)(uncompressed_pos) / (elapsed * 1024.0);
// Adjust the unit of the speed if needed.
while (speed > 999.9) {
speed /= 1024.0;
if (++unit_index == ARRAY_SIZE(unit))
return ""; // Way too fast ;-)
}
static char buf[sizeof("999.9 GiB/s")];
snprintf(buf, sizeof(buf), "%.1f %s", speed, unit[unit_index]);
return buf;
}
/// Make a string indicating elapsed or remaining time. The format is either
/// M:SS or H:MM:SS depending on if the time is an hour or more.
static const char *
progress_time(uint32_t seconds)
{
// 9999 hours = 416 days
static char buf[sizeof("9999:59:59")];
// Don't show anything if the time is zero or ridiculously big.
if (seconds == 0 || seconds > ((UINT32_C(9999) * 60) + 59) * 60 + 59)
return "";
uint32_t minutes = seconds / 60;
seconds %= 60;
if (minutes >= 60) {
const uint32_t hours = minutes / 60;
minutes %= 60;
snprintf(buf, sizeof(buf),
"%" PRIu32 ":%02" PRIu32 ":%02" PRIu32,
hours, minutes, seconds);
} else {
snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32,
minutes, seconds);
}
return buf;
}
/// Make the string to contain the estimated remaining time, or if the amount
/// of input isn't known, how much time has elapsed.
static const char *
progress_remaining(uint64_t in_pos, double elapsed)
{
// If we don't know the size of the input, we indicate the time
// spent so far.
if (expected_in_size == 0 || in_pos > expected_in_size)
return progress_time((uint32_t)(elapsed));
// If we are at the very beginning of the file or the file is very
// small, don't give any estimate to avoid far too wrong estimations.
if (in_pos < (UINT64_C(1) << 19) || elapsed < 8.0)
return "";
// Calculate the estimate. Don't give an estimate of zero seconds,
// since it is possible that all the input has been already passed
// to the library, but there is still quite a bit of output pending.
uint32_t remaining = (double)(expected_in_size - in_pos)
* elapsed / (double)(in_pos);
if (remaining == 0)
remaining = 1;
return progress_time(remaining);
}
extern void
message_progress_update(uint64_t in_pos, uint64_t out_pos)
{
// If there's nothing to do, return immediatelly.
if (!progress_needs_updating || in_pos == 0)
return;
// Print the filename if it hasn't been printed yet.
print_filename();
// Calculate how long we have been processing this file.
const double elapsed = my_time() - start_time;
// Set compressed_pos and uncompressed_pos.
uint64_t compressed_pos;
uint64_t uncompressed_pos;
if (opt_mode == MODE_COMPRESS) {
compressed_pos = out_pos;
uncompressed_pos = in_pos;
} else {
compressed_pos = in_pos;
uncompressed_pos = out_pos;
}
signals_block();
// Print the actual progress message. The idea is that there is at
// least three spaces between the fields in typical situations, but
// even in rare situations there is at least one space.
fprintf(stderr, " %7s %43s %11s %10s\r",
progress_percentage(in_pos),
progress_sizes(compressed_pos, uncompressed_pos, false),
progress_speed(uncompressed_pos, elapsed),
progress_remaining(in_pos, elapsed));
// Updating the progress info was finished. Reset
// progress_needs_updating to wait for the next SIGALRM.
//
// NOTE: This has to be done before alarm() call or with (very) bad
// luck we could be setting this to false after the alarm has already
// been triggered.
progress_needs_updating = false;
if (progress_automatic) {
// Mark that the progress indicator is active, so if an error
// occurs, the error message gets printed cleanly.
progress_active = true;
// Restart the timer so that progress_needs_updating gets
// set to true after about one second.
alarm(1);
} else {
// The progress message was printed because user had sent us
// SIGALRM. In this case, each progress message is printed
// on its own line.
fputc('\n', stderr);
}
signals_unblock();
return;
}
extern void
message_progress_end(uint64_t in_pos, uint64_t out_pos, bool success)
{
// If we are not in verbose mode, we have nothing to do.
if (verbosity < V_VERBOSE || user_abort)
return;
// Cancel a pending alarm, if any.
if (progress_automatic) {
alarm(0);
progress_active = false;
}
const double elapsed = my_time() - start_time;
uint64_t compressed_pos;
uint64_t uncompressed_pos;
if (opt_mode == MODE_COMPRESS) {
compressed_pos = out_pos;
uncompressed_pos = in_pos;
} else {
compressed_pos = in_pos;
uncompressed_pos = out_pos;
}
// If it took less than a second, don't display the time.
const char *elapsed_str = progress_time((double)(elapsed));
signals_block();
// When using the auto-updating progress indicator, the final
// statistics are printed in the same format as the progress
// indicator itself.
if (progress_automatic && in_pos > 0) {
// Using floating point conversion for the percentage instead
// of static "100.0 %" string, because the decimal separator
// isn't a dot in all locales.
fprintf(stderr, " %5.1f %% %43s %11s %10s\n",
100.0,
progress_sizes(compressed_pos, uncompressed_pos, true),
progress_speed(uncompressed_pos, elapsed),
elapsed_str);
// When no automatic progress indicator is used, don't print a verbose
// message at all if we something went wrong and we couldn't produce
// any output. If we did produce output, then it is sometimes useful
// to tell that to the user, especially if we detected an error after
// a time-consuming operation.
} else if (success || out_pos > 0) {
// The filename and size information are always printed.
fprintf(stderr, "%s: %s", filename, progress_sizes(
compressed_pos, uncompressed_pos, true));
// The speed and elapsed time aren't always shown.
const char *speed = progress_speed(uncompressed_pos, elapsed);
if (speed[0] != '\0')
fprintf(stderr, ", %s", speed);
if (elapsed_str[0] != '\0')
fprintf(stderr, ", %s", elapsed_str);
fputc('\n', stderr);
}
signals_unblock();
return;
}
static void
vmessage(enum message_verbosity v, const char *fmt, va_list ap)
{
if (v <= verbosity) {
signals_block();
// If there currently is a progress message on the screen,
// print a newline so that the progress message is left
// readable. This is good, because it is nice to be able to
// see where the error occurred. (The alternative would be
// to clear the progress message and replace it with the
// error message.)
if (progress_active) {
progress_active = false;
fputc('\n', stderr);
}
fprintf(stderr, "%s: ", argv0);
vfprintf(stderr, fmt, ap);
fputc('\n', stderr);
signals_unblock();
}
return;
}
extern void
message(enum message_verbosity v, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vmessage(v, fmt, ap);
va_end(ap);
return;
}
extern void
message_warning(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vmessage(V_WARNING, fmt, ap);
va_end(ap);
set_exit_status(E_WARNING);
return;
}
extern void
message_error(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vmessage(V_ERROR, fmt, ap);
va_end(ap);
set_exit_status(E_ERROR);
return;
}
extern void
message_fatal(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vmessage(V_ERROR, fmt, ap);
va_end(ap);
my_exit(E_ERROR);
}
extern void
message_bug(void)
{
message_fatal(_("Internal error (bug)"));
}
extern void
message_signal_handler(void)
{
message_fatal(_("Cannot establish signal handlers"));
}
extern const char *
message_strm(lzma_ret code)
{
switch (code) {
case LZMA_NO_CHECK:
return _("No integrity check; not verifying file integrity");
case LZMA_UNSUPPORTED_CHECK:
return _("Unsupported type of integrity check; "
"not verifying file integrity");
case LZMA_MEM_ERROR:
return strerror(ENOMEM);
case LZMA_MEMLIMIT_ERROR:
return _("Memory usage limit reached");
case LZMA_FORMAT_ERROR:
return _("File format not recognized");
case LZMA_OPTIONS_ERROR:
return _("Unsupported options");
case LZMA_DATA_ERROR:
return _("Compressed data is corrupt");
case LZMA_BUF_ERROR:
return _("Unexpected end of input");
case LZMA_OK:
case LZMA_STREAM_END:
case LZMA_GET_CHECK:
case LZMA_PROG_ERROR:
return _("Internal error (bug)");
}
return NULL;
}
extern void
message_try_help(void)
{
// Print this with V_WARNING instead of V_ERROR to prevent it from
// showing up when --quiet has been specified.
message(V_WARNING, _("Try `%s --help' for more information."), argv0);
return;
}
extern void
message_version(void)
{
// It is possible that liblzma version is different than the command
// line tool version, so print both.
printf("xz " PACKAGE_VERSION "\n");
printf("liblzma %s\n", lzma_version_string());
my_exit(E_SUCCESS);
}
extern void
message_help(bool long_help)
{
printf(_("Usage: %s [OPTION]... [FILE]...\n"
"Compress or decompress FILEs in the .xz format.\n\n"),
argv0);
puts(_("Mandatory arguments to long options are mandatory for "
"short options too.\n"));
if (long_help)
puts(_(" Operation mode:\n"));
puts(_(
" -z, --compress force compression\n"
" -d, --decompress force decompression\n"
" -t, --test test compressed file integrity\n"
" -l, --list list information about files"));
if (long_help)
puts(_("\n Operation modifiers:\n"));
puts(_(
" -k, --keep keep (don't delete) input files\n"
" -f, --force force overwrite of output file and (de)compress links\n"
" -c, --stdout write to standard output and don't delete input files"));
if (long_help)
puts(_(
" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n"
" --files=[FILE] read filenames to process from FILE; if FILE is\n"
" omitted, filenames are read from the standard input;\n"
" filenames must be terminated with the newline character\n"
" --files0=[FILE] like --files but use the null character as terminator"));
if (long_help) {
puts(_("\n Basic file format and compression options:\n"));
puts(_(
" -F, --format=FMT file format to encode or decode; possible values are\n"
" `auto' (default), `xz', `lzma', and `raw'\n"
" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n"
" or `sha256'"));
}
puts(_(
" -p, --preset=NUM compression preset: 1-2 fast compression, 3-6 good\n"
" compression, 7-9 excellent compression; default is 7"));
puts(_(
" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n"
" the default setting, which depends on the operation mode\n"
" and the amount of physical memory (RAM)"));
if (long_help) {
puts(_(
"\n Custom filter chain for compression (alternative for using presets):"));
#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \
|| defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
puts(_(
"\n"
" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n"
" --lzma2=[OPTS] more of the following options (valid values; default):\n"
" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n"
" lc=NUM number of literal context bits (0-4; 3)\n"
" lp=NUM number of literal position bits (0-4; 0)\n"
" pb=NUM number of position bits (0-4; 2)\n"
" mode=MODE compression mode (fast, normal; normal)\n"
" nice=NUM nice length of a match (2-273; 64)\n"
" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n"
" depth=NUM maximum search depth; 0=automatic (default)"));
#endif
puts(_(
"\n"
" --x86 x86 filter (sometimes called BCJ filter)\n"
" --powerpc PowerPC (big endian) filter\n"
" --ia64 IA64 (Itanium) filter\n"
" --arm ARM filter\n"
" --armthumb ARM-Thumb filter\n"
" --sparc SPARC filter"));
#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
puts(_(
"\n"
" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n"
" dist=NUM distance between bytes being subtracted\n"
" from each other (1-256; 1)"));
#endif
#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK)
puts(_(
"\n"
" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n"
" size=NUM number of bytes of data per subblock\n"
" (1 - 256Mi; 4Ki)\n"
" rle=NUM run-length encoder chunk size (0-256; 0)"));
#endif
}
/*
if (long_help)
puts(_(
"\n"
" Resource usage options:\n"
"\n"
" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n"
" the default setting, which depends on the operation mode\n"
" and the amount of physical memory (RAM)\n"
" -T, --threads=NUM use a maximum of NUM (de)compression threads"
// " --threading=STR threading style; possible values are `auto' (default),\n"
// " `files', and `stream'
));
*/
if (long_help)
puts(_("\n Other options:\n"));
puts(_(
" -q, --quiet suppress warnings; specify twice to suppress errors too\n"
" -v, --verbose be verbose; specify twice for even more verbose"));
if (long_help)
puts(_(
"\n"
" -h, --help display the short help (lists only the basic options)\n"
" -H, --long-help display this long help"));
else
puts(_(
" -h, --help display this short help\n"
" -H, --long-help display the long help (lists also the advanced options)"));
puts(_(
" -V, --version display the version number"));
puts(_("\nWith no FILE, or when FILE is -, read standard input.\n"));
if (long_help) {
// FIXME !!!
size_t mem_limit = hardware_memlimit_encoder() / (1024 * 1024);
if (mem_limit == 0)
mem_limit = 1;
// We use PRIu64 instead of %zu to support pre-C99 libc.
// FIXME: Use ' but avoid warnings.
puts(_("On this system and configuration, the tool will use"));
printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"),
(uint64_t)(mem_limit));
printf(N_(" * at maximum of one thread for (de)compression.\n\n",
" * at maximum of %" PRIu64
" threads for (de)compression.\n\n",
(uint64_t)(opt_threads)), (uint64_t)(opt_threads));
}
printf(_("Report bugs to <%s> (in English or Finnish).\n"),
PACKAGE_BUGREPORT);
my_exit(E_SUCCESS);
}

132
src/lzma/message.h Normal file
View file

@ -0,0 +1,132 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file message.h
/// \brief Printing messages to stderr
//
// Copyright (C) 2007-2008 Lasse Collin
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef MESSAGE_H
#define MESSAGE_H
/// Verbosity levels
enum message_verbosity {
V_SILENT, ///< No messages
V_ERROR, ///< Only error messages
V_WARNING, ///< Errors and warnings
V_VERBOSE, ///< Errors, warnings, and verbose statistics
V_DEBUG, ///< Debugging, FIXME remove?
};
/// \brief Initializes the message functions
///
/// \param argv0 Name of the program i.e. argv[0] from main()
/// \param verbosity Verbosity level
///
/// If an error occurs, this function doesn't return.
///
extern void message_init(const char *argv0);
/// Increase verbosity level by one step unless it was at maximum.
extern void message_verbosity_increase(void);
/// Decrease verbosity level by one step unless it was at minimum.
extern void message_verbosity_decrease(void);
/// Set the total number of files to be processed (stdin is counted as a file
/// here). The default is one.
extern void message_set_files(unsigned int files);
/// \brief Print a message if verbosity level is at least "verbosity"
///
/// This doesn't touch the exit status.
extern void message(enum message_verbosity verbosity, const char *fmt, ...)
lzma_attribute((format(printf, 2, 3)));
/// \brief Prints a warning and possibly sets exit status
///
/// The message is printed only if verbosity level is at least V_WARNING.
/// The exit status is set to WARNING unless it was already at ERROR.
extern void message_warning(const char *fmt, ...)
lzma_attribute((format(printf, 1, 2)));
/// \brief Prints an error message and sets exit status
///
/// The message is printed only if verbosity level is at least V_ERROR.
/// The exit status is set to ERROR.
extern void message_error(const char *fmt, ...)
lzma_attribute((format(printf, 1, 2)));
/// \brief Prints an error message and exits with EXIT_ERROR
///
/// The message is printed only if verbosity level is at least V_ERROR.
extern void message_fatal(const char *fmt, ...)
lzma_attribute((format(printf, 1, 2)))
lzma_attribute((noreturn));
/// Print an error message that an internal error occurred and exit with
/// EXIT_ERROR.
extern void message_bug(void) lzma_attribute((noreturn));
/// Print a message that establishing signal handlers failed, and exit with
/// exit status ERROR.
extern void message_signal_handler(void) lzma_attribute((noreturn));
/// Converts lzma_ret to a string.
extern const char *message_strm(lzma_ret code);
/// Print a message that user should try --help.
extern void message_try_help(void);
/// Prints the version number to stdout and exits with exit status SUCCESS.
extern void message_version(void) lzma_attribute((noreturn));
/// Print the help message.
extern void message_help(bool long_help) lzma_attribute((noreturn));
///
extern void message_progress_start(const char *filename, uint64_t in_size);
///
extern void message_progress_update(uint64_t in_pos, uint64_t out_pos);
/// \brief Finishes the progress message if we were in verbose mode
///
/// \param in_pos Final input position i.e. how much input there was.
/// \param out_pos Final output position
/// \param success True if the operation was successful. We don't
/// print the final progress message if the operation
/// wasn't successful.
///
extern void message_progress_end(
uint64_t in_pos, uint64_t out_pos, bool success);
#endif

View file

@ -79,11 +79,9 @@ parse_options(const char *str, const option_map *opts,
if (value != NULL)
*value++ = '\0';
if (value == NULL || value[0] == '\0') {
errmsg(V_ERROR, _("%s: Options must be `name=value' "
if (value == NULL || value[0] == '\0')
message_fatal(_("%s: Options must be `name=value' "
"pairs separated with commas"), str);
my_exit(ERROR);
}
// Look for the option name from the option map.
bool found = false;
@ -106,11 +104,9 @@ parse_options(const char *str, const option_map *opts,
break;
}
if (opts[i].map[j].name == NULL) {
errmsg(V_ERROR, _("%s: Invalid option "
if (opts[i].map[j].name == NULL)
message_fatal(_("%s: Invalid option "
"value"), value);
my_exit(ERROR);
}
set(filter_options, i, opts[i].map[j].id);
}
@ -119,10 +115,8 @@ parse_options(const char *str, const option_map *opts,
break;
}
if (!found) {
errmsg(V_ERROR, _("%s: Invalid option name"), name);
my_exit(ERROR);
}
if (!found)
message_fatal(_("%s: Invalid option name"), name);
if (split == NULL)
break;
@ -168,7 +162,7 @@ set_subblock(void *options, uint32_t key, uint64_t value)
extern lzma_options_subblock *
parse_options_subblock(const char *str)
options_subblock(const char *str)
{
static const option_map opts[] = {
{ "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN,
@ -217,7 +211,7 @@ set_delta(void *options, uint32_t key, uint64_t value)
extern lzma_options_delta *
parse_options_delta(const char *str)
options_delta(const char *str)
{
static const option_map opts[] = {
{ "dist", NULL, LZMA_DELTA_DIST_MIN,
@ -225,7 +219,7 @@ parse_options_delta(const char *str)
{ NULL, NULL, 0, 0 }
};
lzma_options_delta *options = xmalloc(sizeof(lzma_options_subblock));
lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta));
*options = (lzma_options_delta){
// It's hard to give a useful default for this.
.type = LZMA_DELTA_TYPE_BYTE,
@ -296,7 +290,7 @@ set_lzma(void *options, uint32_t key, uint64_t value)
extern lzma_options_lzma *
parse_options_lzma(const char *str)
options_lzma(const char *str)
{
static const name_id_map modes[] = {
{ "fast", LZMA_MODE_FAST },
@ -345,18 +339,14 @@ parse_options_lzma(const char *str)
parse_options(str, opts, &set_lzma, options);
if (options->lc + options->lp > LZMA_LCLP_MAX) {
errmsg(V_ERROR, "The sum of lc and lp must be at "
"maximum of 4");
exit(ERROR);
}
if (options->lc + options->lp > LZMA_LCLP_MAX)
message_fatal(_("The sum of lc and lp must be at "
"maximum of 4"));
const uint32_t nice_len_min = options->mf & 0x0F;
if (options->nice_len < nice_len_min) {
errmsg(V_ERROR, "The selected match finder requires at "
"least nice=%" PRIu32, nice_len_min);
exit(ERROR);
}
if (options->nice_len < nice_len_min)
message_fatal(_("The selected match finder requires at "
"least nice=%" PRIu32), nice_len_min);
return options;
}

View file

@ -27,20 +27,20 @@
///
/// \return Pointer to allocated options structure.
/// Doesn't return on error.
extern lzma_options_subblock *parse_options_subblock(const char *str);
extern lzma_options_subblock *options_subblock(const char *str);
/// \brief Parser for Delta options
///
/// \return Pointer to allocated options structure.
/// Doesn't return on error.
extern lzma_options_delta *parse_options_delta(const char *str);
extern lzma_options_delta *options_delta(const char *str);
/// \brief Parser for LZMA options
///
/// \return Pointer to allocated options structure.
/// Doesn't return on error.
extern lzma_options_lzma *parse_options_lzma(const char *str);
extern lzma_options_lzma *options_lzma(const char *str);
#endif

View file

@ -22,32 +22,30 @@
#include "sysdefs.h"
#ifdef HAVE_ERRNO_H
# include <errno.h>
#else
extern int errno;
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <signal.h>
#include <pthread.h>
#include <locale.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include "gettext.h"
#ifdef ENABLE_NLS
# include <libintl.h>
# define _(msgid) gettext(msgid)
# define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n)
#else
# define _(msgid) (msgid)
# define N_(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2))
#endif
#include "alloc.h"
#include "main.h"
#include "process.h"
#include "message.h"
#include "args.h"
#include "error.h"
#include "hardware.h"
#include "help.h"
#include "io.h"
#include "options.h"
#include "process.h"
#include "suffix.h"
#include "util.h"

View file

@ -20,137 +20,158 @@
#include "private.h"
typedef struct {
lzma_stream strm;
void *options;
enum operation_mode opt_mode = MODE_COMPRESS;
file_pair *pair;
/// We don't need this for *anything* but seems that at least with
/// glibc pthread_create() doesn't allow NULL.
pthread_t thread;
bool in_use;
} thread_data;
enum format_type opt_format = FORMAT_AUTO;
/// Number of available threads
static size_t free_threads;
/// Stream used to communicate with liblzma
static lzma_stream strm = LZMA_STREAM_INIT;
/// Thread-specific data
static thread_data *threads;
/// Filters needed for all encoding all formats, and also decoding in raw data
static lzma_filter filters[LZMA_FILTERS_MAX + 1];
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
/// Number of filters. Zero indicates that we are using a preset.
static size_t filters_count = 0;
/// Attributes of new coder threads. They are created in detached state.
/// Coder threads signal to the service thread themselves when they are done.
static pthread_attr_t thread_attr;
/// Number of the preset (1-9)
static size_t preset_number = 7;
/// Indicate if no preset has been given. In that case, we will auto-adjust
/// the compression preset so that it doesn't use too much RAM.
// FIXME
static bool preset_default = true;
/// Integrity check type
static lzma_check check = LZMA_CHECK_CRC64;
//////////
// Init //
//////////
extern void
process_init(void)
coder_set_check(lzma_check new_check)
{
threads = malloc(sizeof(thread_data) * opt_threads);
if (threads == NULL) {
out_of_memory();
my_exit(ERROR);
check = new_check;
return;
}
for (size_t i = 0; i < opt_threads; ++i)
memzero(&threads[i], sizeof(threads[0]));
if (pthread_attr_init(&thread_attr)
|| pthread_attr_setdetachstate(
&thread_attr, PTHREAD_CREATE_DETACHED)) {
out_of_memory();
my_exit(ERROR);
extern void
coder_set_preset(size_t new_preset)
{
preset_number = new_preset;
preset_default = false;
return;
}
free_threads = opt_threads;
extern void
coder_add_filter(lzma_vli id, void *options)
{
if (filters_count == LZMA_FILTERS_MAX)
message_fatal(_("Maximum number of filters is four"));
filters[filters_count].id = id;
filters[filters_count].options = options;
++filters_count;
return;
}
//////////////////////////
// Thread-specific data //
//////////////////////////
static thread_data *
get_thread_data(void)
extern void
coder_set_compression_settings(void)
{
pthread_mutex_lock(&mutex);
// Options for LZMA1 or LZMA2 in case we are using a preset.
static lzma_options_lzma opt_lzma;
while (free_threads == 0) {
pthread_cond_wait(&cond, &mutex);
if (user_abort) {
pthread_cond_signal(&cond);
pthread_mutex_unlock(&mutex);
return NULL;
}
if (filters_count == 0) {
// We are using a preset. This is not a good idea in raw mode
// except when playing around with things. Different versions
// of this software may use different options in presets, and
// thus make uncompressing the raw data difficult.
if (opt_format == FORMAT_RAW) {
// The message is shown only if warnings are allowed
// but the exit status isn't changed.
message(V_WARNING, _("Using a preset in raw mode "
"is discouraged."));
message(V_WARNING, _("The exact options of the "
"presets may vary between software "
"versions."));
}
thread_data *t = threads;
while (t->in_use)
++t;
// Get the preset for LZMA1 or LZMA2.
if (lzma_lzma_preset(&opt_lzma, preset_number))
message_bug();
t->in_use = true;
--free_threads;
pthread_mutex_unlock(&mutex);
return t;
// Use LZMA2 except with --format=lzma we use LZMA1.
filters[0].id = opt_format == FORMAT_LZMA
? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
filters[0].options = &opt_lzma;
filters_count = 1;
}
// Terminate the filter options array.
filters[filters_count].id = LZMA_VLI_UNKNOWN;
static void
release_thread_data(thread_data *t)
{
pthread_mutex_lock(&mutex);
// If we are using the LZMA_Alone format, allow exactly one filter
// which has to be LZMA.
if (opt_format == FORMAT_LZMA && (filters_count != 1
|| filters[0].id != LZMA_FILTER_LZMA1))
message_fatal(_("With --format=lzma only the LZMA1 filter "
"is supported"));
t->in_use = false;
++free_threads;
// TODO: liblzma probably needs an API to validate the filter chain.
pthread_cond_signal(&cond);
pthread_mutex_unlock(&mutex);
return;
}
static int
create_thread(void *(*func)(thread_data *t), thread_data *t)
{
if (opt_threads == 1) {
func(t);
// If using --format=raw, we can be decoding.
uint64_t memory_usage;
uint64_t memory_limit;
if (opt_mode == MODE_COMPRESS) {
memory_usage = lzma_memusage_encoder(filters);
memory_limit = hardware_memlimit_encoder();
} else {
const int err = pthread_create(&t->thread, &thread_attr,
(void *(*)(void *))(func), t);
if (err) {
errmsg(V_ERROR, _("Cannot create a thread: %s"),
strerror(err));
user_abort = 1;
return -1;
}
memory_usage = lzma_memusage_decoder(filters);
memory_limit = hardware_memlimit_decoder();
}
return 0;
if (memory_usage == UINT64_MAX)
message_bug();
if (preset_default) {
// When no preset was explicitly requested, we use the default
// preset only if the memory usage limit allows. Otherwise we
// select a lower preset automatically.
while (memory_usage > memory_limit) {
if (preset_number == 1)
message_fatal(_("Memory usage limit is too "
"small for any internal "
"filter preset"));
if (lzma_lzma_preset(&opt_lzma, --preset_number))
message_bug();
memory_usage = lzma_memusage_encoder(filters);
}
} else {
if (memory_usage > memory_limit)
message_fatal(_("Memory usage limit is too small "
"for the given filter setup"));
}
// Limit the number of worked threads so that memory usage
// limit isn't exceeded.
assert(memory_usage > 0);
size_t thread_limit = memory_limit / memory_usage;
if (thread_limit == 0)
thread_limit = 1;
if (opt_threads > thread_limit)
opt_threads = thread_limit;
return;
}
/////////////////////////
// One thread per file //
/////////////////////////
static int
single_init(thread_data *t)
static bool
coder_init(void)
{
lzma_ret ret = LZMA_PROG_ERROR;
@ -162,17 +183,15 @@ single_init(thread_data *t)
break;
case FORMAT_XZ:
ret = lzma_stream_encoder(&t->strm,
opt_filters, opt_check);
ret = lzma_stream_encoder(&strm, filters, check);
break;
case FORMAT_LZMA:
ret = lzma_alone_encoder(&t->strm,
opt_filters[0].options);
ret = lzma_alone_encoder(&strm, filters[0].options);
break;
case FORMAT_RAW:
ret = lzma_raw_encoder(&t->strm, opt_filters);
ret = lzma_raw_encoder(&strm, filters);
break;
}
} else {
@ -181,254 +200,192 @@ single_init(thread_data *t)
switch (opt_format) {
case FORMAT_AUTO:
ret = lzma_auto_decoder(&t->strm, opt_memory, flags);
ret = lzma_auto_decoder(&strm,
hardware_memlimit_decoder(), flags);
break;
case FORMAT_XZ:
ret = lzma_stream_decoder(&t->strm, opt_memory, flags);
ret = lzma_stream_decoder(&strm,
hardware_memlimit_decoder(), flags);
break;
case FORMAT_LZMA:
ret = lzma_alone_decoder(&t->strm, opt_memory);
ret = lzma_alone_decoder(&strm,
hardware_memlimit_decoder());
break;
case FORMAT_RAW:
// Memory usage has already been checked in args.c.
ret = lzma_raw_decoder(&t->strm, opt_filters);
// FIXME Comment
ret = lzma_raw_decoder(&strm, filters);
break;
}
}
if (ret != LZMA_OK) {
if (ret == LZMA_MEM_ERROR)
out_of_memory();
message_error("%s", message_strm(LZMA_MEM_ERROR));
else
internal_error();
message_bug();
return -1;
return true;
}
return 0;
return false;
}
static void *
single(thread_data *t)
static bool
coder_run(file_pair *pair)
{
if (single_init(t)) {
io_close(t->pair, false);
release_thread_data(t);
return NULL;
}
// Buffers to hold input and output data.
uint8_t in_buf[IO_BUFFER_SIZE];
uint8_t out_buf[IO_BUFFER_SIZE];
// Initialize the progress indicator.
const uint64_t in_size = pair->src_st.st_size <= (off_t)(0)
? 0 : (uint64_t)(pair->src_st.st_size);
message_progress_start(pair->src_name, in_size);
uint8_t in_buf[BUFSIZ];
uint8_t out_buf[BUFSIZ];
lzma_action action = LZMA_RUN;
lzma_ret ret;
bool success = false;
t->strm.avail_in = 0;
t->strm.next_out = out_buf;
t->strm.avail_out = BUFSIZ;
strm.avail_in = 0;
strm.next_out = out_buf;
strm.avail_out = IO_BUFFER_SIZE;
while (!user_abort) {
if (t->strm.avail_in == 0 && !t->pair->src_eof) {
t->strm.next_in = in_buf;
t->strm.avail_in = io_read(t->pair, in_buf, BUFSIZ);
// Fill the input buffer if it is empty and we haven't reached
// end of file yet.
if (strm.avail_in == 0 && !pair->src_eof) {
strm.next_in = in_buf;
strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE);
if (t->strm.avail_in == SIZE_MAX)
if (strm.avail_in == SIZE_MAX)
break;
if (t->pair->src_eof)
// Encoder needs to know when we have given all the
// input to it. The decoders need to know it too when
// we are using LZMA_CONCATENATED.
if (pair->src_eof)
action = LZMA_FINISH;
}
ret = lzma_code(&t->strm, action);
// Let liblzma do the actual work.
ret = lzma_code(&strm, action);
if ((t->strm.avail_out == 0 || ret != LZMA_OK)
&& opt_mode != MODE_TEST) {
if (io_write(t->pair, out_buf,
BUFSIZ - t->strm.avail_out))
break;
// Write out if the output buffer became full.
if (strm.avail_out == 0) {
if (opt_mode != MODE_TEST && io_write(pair, out_buf,
IO_BUFFER_SIZE - strm.avail_out))
return false;
t->strm.next_out = out_buf;
t->strm.avail_out = BUFSIZ;
strm.next_out = out_buf;
strm.avail_out = IO_BUFFER_SIZE;
}
if (ret != LZMA_OK) {
// Check that there is no trailing garbage. This is
// needed for LZMA_Alone and raw streams.
if (ret == LZMA_STREAM_END && (t->strm.avail_in != 0
|| (!t->pair->src_eof && io_read(
t->pair, in_buf, 1) != 0)))
// Determine if the return value indicates that we
// won't continue coding.
const bool stop = ret != LZMA_NO_CHECK
&& ret != LZMA_UNSUPPORTED_CHECK;
if (stop) {
// First print the final progress info.
// This way the user sees more accurately
// where the error occurred. Note that we
// print this *before* the possible error
// message.
//
// FIXME: What if something goes wrong
// after this?
message_progress_end(strm.total_in,
strm.total_out,
ret == LZMA_STREAM_END);
// Write the remaining bytes even if something
// went wrong, because that way the user gets
// as much data as possible, which can be good
// when trying to get at least some useful
// data out of damaged files.
if (opt_mode != MODE_TEST && io_write(pair,
out_buf, IO_BUFFER_SIZE
- strm.avail_out))
return false;
}
if (ret == LZMA_STREAM_END) {
// Check that there is no trailing garbage.
// This is needed for LZMA_Alone and raw
// streams.
if (strm.avail_in == 0 && (pair->src_eof
|| io_read(pair, in_buf, 1)
== 0)) {
assert(pair->src_eof);
return true;
}
// FIXME: What about io_read() failing?
// We hadn't reached the end of the file.
ret = LZMA_DATA_ERROR;
if (ret != LZMA_STREAM_END) {
errmsg(V_ERROR, "%s: %s", t->pair->src_name,
str_strm_error(ret));
break;
assert(stop);
}
assert(t->pair->src_eof);
success = true;
break;
}
}
io_close(t->pair, success);
release_thread_data(t);
return NULL;
}
///////////////////////////////
// Multiple threads per file //
///////////////////////////////
// TODO
// I'm not sure what would the best way to implement this. Here's one
// possible way:
// - Reader thread would read the input data and control the coders threads.
// - Every coder thread is associated with input and output buffer pools.
// The input buffer pool is filled by reader thread, and the output buffer
// pool is emptied by the writer thread.
// - Writer thread writes the output data of the oldest living coder thread.
//
// The per-file thread started by the application's main thread is used as
// the reader thread. In the beginning, it starts the writer thread and the
// first coder thread. The coder thread would be left waiting for input from
// the reader thread, and the writer thread would be waiting for input from
// the coder thread.
//
// The reader thread reads the input data into a ring buffer, whose size
// depends on the value returned by lzma_chunk_size(). If the ring buffer
// gets full, the buffer is marked "to be finished", which indicates to
// the coder thread that no more input is coming. Then a new coder thread
// would be started.
//
// TODO
/*
typedef struct {
/// Buffers
uint8_t (*buffers)[BUFSIZ];
/// Number of buffers
size_t buffer_count;
/// buffers[read_pos] is the buffer currently being read. Once finish
/// is true and read_pos == write_pos, end of input has been reached.
size_t read_pos;
/// buffers[write_pos] is the buffer into which data is currently
/// being written.
size_t write_pos;
/// This variable matters only when read_pos == write_pos && finish.
/// In that case, this variable will contain the size of the
/// buffers[read_pos].
size_t last_size;
/// True once no more data is being written to the buffer. When this
/// is set, the last_size variable must have been set too.
bool finish;
/// Mutex to protect access to the variables in this structure
pthread_mutex_t mutex;
/// Condition to indicate when another thread can continue
pthread_cond_t cond;
} mem_pool;
static foo
multi_reader(thread_data *t)
{
bool done = false;
do {
const size_t size = io_read(t->pair,
m->buffers + m->write_pos, BUFSIZ);
if (size == SIZE_MAX) {
// TODO
} else if (t->pair->src_eof) {
m->last_size = size;
}
pthread_mutex_lock(&m->mutex);
if (++m->write_pos == m->buffer_count)
m->write_pos = 0;
if (m->write_pos == m->read_pos || t->pair->src_eof)
m->finish = true;
pthread_cond_signal(&m->cond);
pthread_mutex_unlock(&m->mutex);
} while (!m->finish);
return done ? 0 : -1;
}
static foo
multi_code()
{
lzma_action = LZMA_RUN;
while (true) {
pthread_mutex_lock(&m->mutex);
while (m->read_pos == m->write_pos && !m->finish)
pthread_cond_wait(&m->cond, &m->mutex);
pthread_mutex_unlock(&m->mutex);
if (m->finish) {
t->strm.avail_in = m->last_size;
if (opt_mode == MODE_COMPRESS)
action = LZMA_FINISH;
// If we get here and stop is true, something went
// wrong and we print an error. Otherwise it's just
// a warning and coding can continue.
if (stop) {
message_error("%s: %s", pair->src_name,
message_strm(ret));
} else {
t->strm.avail_in = BUFSIZ;
message_warning("%s: %s", pair->src_name,
message_strm(ret));
// When compressing, all possible errors set
// stop to true.
assert(opt_mode != MODE_COMPRESS);
}
t->strm.next_in = m->buffers + m->read_pos;
const lzma_ret ret = lzma_code(&t->strm, action);
}
if (ret == LZMA_MEMLIMIT_ERROR) {
// Figure out how much memory would have
// actually needed.
// TODO
}
*/
if (stop)
return false;
}
// Show progress information if --verbose was specified and
// stderr is a terminal.
message_progress_update(strm.total_in, strm.total_out);
}
return false;
}
///////////////////////
// Starting new file //
///////////////////////
extern void
process_file(const char *filename)
{
thread_data *t = get_thread_data();
if (t == NULL)
return; // User abort
// If this fails, it shows appropriate error messages too.
t->pair = io_open(filename);
if (t->pair == NULL) {
release_thread_data(t);
// First try initializing the coder. If it fails, it's useless to try
// opening the file. Check also for user_abort just in case if we had
// got a signal while initializing the coder.
if (coder_init() || user_abort)
return;
}
// TODO Currently only one-thread-per-file mode is implemented.
// Try to open the input and output files.
file_pair *pair = io_open(filename);
if (pair == NULL)
return;
if (create_thread(&single, t)) {
io_close(t->pair, false);
release_thread_data(t);
}
// Do the actual coding.
const bool success = coder_run(pair);
// Close the file pair. It needs to know if coding was successful to
// know if the source or target file should be unlinked.
io_close(pair, success);
return;
}

View file

@ -23,6 +23,46 @@
#include "private.h"
enum operation_mode {
MODE_COMPRESS,
MODE_DECOMPRESS,
MODE_TEST,
MODE_LIST,
};
// NOTE: The order of these is significant in suffix.c.
enum format_type {
FORMAT_AUTO,
FORMAT_XZ,
FORMAT_LZMA,
// HEADER_GZIP,
FORMAT_RAW,
};
/// Operation mode of the command line tool. This is set in args.c and read
/// in several files.
extern enum operation_mode opt_mode;
/// File format to use when encoding or what format(s) to accept when
/// decoding. This is a global because it's needed also in suffix.c.
/// This is set in args.c.
extern enum format_type opt_format;
/// Set the integrity check type used when compressing
extern void coder_set_check(lzma_check check);
/// Set preset number
extern void coder_set_preset(size_t new_preset);
/// Add a filter to the custom filter chain
extern void coder_add_filter(lzma_vli id, void *options);
///
extern void coder_set_compression_settings(void);
extern void process_init(void);
extern void process_file(const char *filename);

View file

@ -20,6 +20,9 @@
#include "private.h"
static char *custom_suffix = NULL;
struct suffix_pair {
const char *compressed;
const char *uncompressed;
@ -74,8 +77,8 @@ uncompressed_name(const char *src_name, const size_t src_len)
if (opt_format == FORMAT_RAW) {
// Don't check for known suffixes when --format=raw was used.
if (opt_suffix == NULL) {
errmsg(V_ERROR, _("%s: With --format=raw, "
if (custom_suffix == NULL) {
message_error(_("%s: With --format=raw, "
"--suffix=.SUF is required unless "
"writing to stdout"), src_name);
return NULL;
@ -91,21 +94,17 @@ uncompressed_name(const char *src_name, const size_t src_len)
}
}
if (new_len == 0 && opt_suffix != NULL)
new_len = test_suffix(opt_suffix, src_name, src_len);
if (new_len == 0 && custom_suffix != NULL)
new_len = test_suffix(custom_suffix, src_name, src_len);
if (new_len == 0) {
errmsg(V_WARNING, _("%s: Filename has an unknown suffix, "
message_warning(_("%s: Filename has an unknown suffix, "
"skipping"), src_name);
return NULL;
}
const size_t new_suffix_len = strlen(new_suffix);
char *dest_name = malloc(new_len + new_suffix_len + 1);
if (dest_name == NULL) {
out_of_memory();
return NULL;
}
char *dest_name = xmalloc(new_len + new_suffix_len + 1);
memcpy(dest_name, src_name, new_len);
memcpy(dest_name + new_len, new_suffix, new_suffix_len);
@ -154,7 +153,7 @@ compressed_name(const char *src_name, const size_t src_len)
for (size_t i = 0; suffixes[i].compressed != NULL; ++i) {
if (test_suffix(suffixes[i].compressed, src_name, src_len)
!= 0) {
errmsg(V_WARNING, _("%s: File already has `%s' "
message_warning(_("%s: File already has `%s' "
"suffix, skipping"), src_name,
suffixes[i].compressed);
return NULL;
@ -163,22 +162,18 @@ compressed_name(const char *src_name, const size_t src_len)
// TODO: Hmm, maybe it would be better to validate this in args.c,
// since the suffix handling when decoding is weird now.
if (opt_format == FORMAT_RAW && opt_suffix == NULL) {
errmsg(V_ERROR, _("%s: With --format=raw, "
if (opt_format == FORMAT_RAW && custom_suffix == NULL) {
message_error(_("%s: With --format=raw, "
"--suffix=.SUF is required unless "
"writing to stdout"), src_name);
return NULL;
}
const char *suffix = opt_suffix != NULL
? opt_suffix : suffixes[0].compressed;
const char *suffix = custom_suffix != NULL
? custom_suffix : suffixes[0].compressed;
const size_t suffix_len = strlen(suffix);
char *dest_name = malloc(src_len + suffix_len + 1);
if (dest_name == NULL) {
out_of_memory();
return NULL;
}
char *dest_name = xmalloc(src_len + suffix_len + 1);
memcpy(dest_name, src_name, src_len);
memcpy(dest_name + src_len, suffix, suffix_len);
@ -189,7 +184,7 @@ compressed_name(const char *src_name, const size_t src_len)
extern char *
get_dest_name(const char *src_name)
suffix_get_dest_name(const char *src_name)
{
assert(src_name != NULL);
@ -201,3 +196,18 @@ get_dest_name(const char *src_name)
? compressed_name(src_name, src_len)
: uncompressed_name(src_name, src_len);
}
extern void
suffix_set(const char *suffix)
{
// Empty suffix and suffixes having a slash are rejected. Such
// suffixes would break things later.
if (suffix[0] == '\0' || strchr(suffix, '/') != NULL)
message_fatal(_("%s: Invalid filename suffix"), optarg);
// Replace the old custom_suffix (if any) with the new suffix.
free(custom_suffix);
custom_suffix = xstrdup(suffix);
return;
}

View file

@ -20,6 +20,21 @@
#ifndef SUFFIX_H
#define SUFFIX_H
extern char *get_dest_name(const char *src_name);
/// \brief Get the name of the destination file
///
/// Depending on the global variable opt_mode, this tries to find a matching
/// counterpart for src_name. If the name can be constructed, it is allocated
/// and returned (caller must free it). On error, a message is printed and
/// NULL is returned.
extern char *suffix_get_dest_name(const char *src_name);
/// \brief Set a custom filename suffix
///
/// This function calls xstrdup() for the given suffix, thus the caller
/// doesn't need to keep the memory allocated. There can be only one custom
/// suffix, thus if this is called multiple times, the old suffixes are freed
/// and forgotten.
extern void suffix_set(const char *suffix);
#endif

View file

@ -20,17 +20,29 @@
#include "private.h"
/// \brief Fancy version of strtoull()
///
/// \param name Name of the option to show in case of an error
/// \param value String containing the number to be parsed; may
/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi"
/// \param min Minimum valid value
/// \param max Maximum valid value
///
/// \return Parsed value that is in the range [min, max]. Does not return
/// if an error occurs.
///
extern void *
xrealloc(void *ptr, size_t size)
{
assert(size > 0);
ptr = realloc(ptr, size);
if (ptr == NULL)
message_fatal("%s", strerror(errno));
return ptr;
}
extern char *
xstrdup(const char *src)
{
assert(src != NULL);
const size_t size = strlen(src) + 1;
char *dest = xmalloc(size);
return memcpy(dest, src, size);
}
extern uint64_t
str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
{
@ -40,12 +52,9 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
while (*value == ' ' || *value == '\t')
++value;
if (*value < '0' || *value > '9') {
errmsg(V_ERROR, _("%s: Value is not a non-negative "
"decimal integer"),
value);
my_exit(ERROR);
}
if (*value < '0' || *value > '9')
message_fatal(_("%s: Value is not a non-negative "
"decimal integer"), value);
do {
// Don't overflow.
@ -86,12 +95,11 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
}
if (multiplier == 0) {
errmsg(V_ERROR, _("%s: Invalid multiplier suffix. "
message(V_ERROR, _("%s: Invalid multiplier suffix. "
"Valid suffixes:"), value);
errmsg(V_ERROR, "`k' (10^3), `M' (10^6), `G' (10^9) "
message_fatal("`k' (10^3), `M' (10^6), `G' (10^9) "
"`Ki' (2^10), `Mi' (2^20), "
"`Gi' (2^30)");
my_exit(ERROR);
}
// Don't overflow here either.
@ -107,32 +115,10 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
return result;
error:
errmsg(V_ERROR, _("Value of the option `%s' must be in the range "
message_fatal(_("Value of the option `%s' must be in the range "
"[%llu, %llu]"), name,
(unsigned long long)(min),
(unsigned long long)(max));
my_exit(ERROR);
}
/// \brief Gets filename part from pathname+filename
///
/// \return Pointer in the filename where the actual filename starts.
/// If the last character is a slash, NULL is returned.
///
extern const char *
str_filename(const char *name)
{
const char *base = strrchr(name, '/');
if (base == NULL) {
base = name;
} else if (*++base == '\0') {
base = NULL;
errmsg(V_ERROR, _("%s: Invalid filename"), name);
}
return base;
}
@ -179,9 +165,35 @@ extern bool
is_empty_filename(const char *filename)
{
if (filename[0] == '\0') {
errmsg(V_WARNING, _("Empty filename, skipping"));
message_error(_("Empty filename, skipping"));
return true;
}
return false;
}
extern bool
is_tty_stdin(void)
{
const bool ret = isatty(STDIN_FILENO);
if (ret)
message_error(_("Compressed data not read from a terminal "
"unless `--force' is used."));
return ret;
}
extern bool
is_tty_stdout(void)
{
const bool ret = isatty(STDOUT_FILENO);
if (ret)
message_error(_("Compressed data not written to a terminal "
"unless `--force' is used."));
return ret;
}

View file

@ -20,13 +20,52 @@
#ifndef UTIL_H
#define UTIL_H
#include "private.h"
/// \brief Safe malloc() that never returns NULL
///
/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when
/// there are files open for writing, that should be cleaned up
/// before exiting.
#define xmalloc(size) xrealloc(NULL, size)
/// \brief Safe realloc() that never returns NULL
extern void *xrealloc(void *ptr, size_t size);
/// \brief Safe strdup() that never returns NULL
extern char *xstrdup(const char *src);
/// \brief Fancy version of strtoull()
///
/// \param name Name of the option to show in case of an error
/// \param value String containing the number to be parsed; may
/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi"
/// \param min Minimum valid value
/// \param max Maximum valid value
///
/// \return Parsed value that is in the range [min, max]. Does not return
/// if an error occurs.
///
extern uint64_t str_to_uint64(const char *name, const char *value,
uint64_t min, uint64_t max);
extern const char *str_filename(const char *filename);
/// \brief Check if filename is empty and print an error message
extern bool is_empty_filename(const char *filename);
/// \brief Test if stdin is a terminal
///
/// If stdin is a terminal, an error message is printed and exit status set
/// to EXIT_ERROR.
extern bool is_tty_stdin(void);
/// \brief Test if stdout is a terminal
///
/// If stdout is a terminal, an error message is printed and exit status set
/// to EXIT_ERROR.
extern bool is_tty_stdout(void);
#endif

View file

@ -19,12 +19,7 @@
#include "sysdefs.h"
#ifdef HAVE_ERRNO_H
#include <errno.h>
#else
extern int errno;
#endif
#include <stdio.h>
#include <unistd.h>
@ -65,7 +60,7 @@ static uint8_t out_buf[BUFSIZ];
static lzma_stream strm = LZMA_STREAM_INIT;
/// Number of bytes to use memory at maximum
static size_t memlimit;
static uint64_t memlimit;
/// Program name to be shown in error messages
static const char *argv0;
@ -94,8 +89,8 @@ help(void)
" -d, --decompress (ignored)\n"
" -k, --keep (ignored)\n"
" -f, --force allow reading compressed data from a terminal\n"
" -M, --memory=NUM use NUM bytes of memory at maximum; the suffixes\n"
" k, M, G, Ki, Mi, and Gi are supported.\n"
" -M, --memory=NUM use NUM bytes of memory at maximum (0 means default);\n"
" the suffixes k, M, G, Ki, Mi, and Gi are supported.\n"
" --format=FMT accept only files in the given file format;\n"
" possible FMTs are `auto', `native', and alone',\n"
" -h, --help display this help and exit\n"
@ -141,20 +136,14 @@ version(void)
static void
set_default_memlimit(void)
{
uint64_t mem = physmem();
if (mem != 0) {
mem /= 3;
const uint64_t mem = physmem();
#if UINT64_MAX > SIZE_MAX
if (mem > SIZE_MAX)
mem = SIZE_MAX;
#endif
memlimit = mem / 3;
} else {
if (mem == 0)
// Cannot autodetect, use 10 MiB as the default limit.
memlimit = (1U << 23) + (1U << 21);
}
else
// Limit is 33 % of RAM.
memlimit = mem / 3;
return;
}
@ -165,7 +154,7 @@ set_default_memlimit(void)
/// This is rudely copied from src/lzma/util.c and modified a little. :-(
///
static size_t
str_to_size(const char *value)
str_to_uint64(const char *value)
{
size_t result = 0;
@ -263,7 +252,10 @@ parse_options(int argc, char **argv)
break;
case 'M':
memlimit = str_to_size(optarg);
memlimit = str_to_uint64(optarg);
if (memlimit == 0)
set_default_memlimit();
break;
case 'h':

View file

@ -172,11 +172,14 @@
bad-1-block_header-3.xz has wrong CRC32 in Block Header.
bad-1-block_header-4.xz has too big Compressed Size (2^63 bytes while
maximum is 2^63 - 4 bytes) in Block Header. It's important that the
file gets rejected due to invalid Compressed Size value; the decoder
bad-1-block_header-4.xz has too big Compressed Size in Block Header
(2^64 - 1 bytes while maximum is a little less, because the whole
Block must stay smaller than 2^64). It's important that the file
gets rejected due to invalid Compressed Size value; the decoder
must not try decoding the Compressed Data field.
bad-1-block_header-5.xz has zero as Compressed Size in Block Header.
bad-2-index-1.xz has wrong Total Sizes in Index.
bad-2-index-2.xz has wrong Uncompressed Sizes in Index.
@ -185,6 +188,9 @@
bad-2-index-4.xz wrong CRC32 in Index.
bad-2-index-5.xz has zero as Total Size. It is important that the file
gets rejected specifically due to Total Size having an invalid value.
bad-2-compressed_data_padding.xz has non-nul byte in the padding of
the Compressed Data field of the first Block.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more