From bd3d93c89a624abf1dfc1f59625025bbf51f1b14 Mon Sep 17 00:00:00 2001 From: "Ryan C. Gordon" Date: Mon, 15 Nov 2010 15:18:44 -0500 Subject: [PATCH] Added support for .xz files, via the XZ Utils liblzma. Eventually I'll be building 7-Zip support on top of liblzma too (the actual 7-Zip LZMA SDK isn't really usable for our purposes, but the heavy lifting it would supply is also done in liblzma, I think). --- CMakeLists.txt | 87 ++ LICENSE.txt | 10 +- fileio.c | 226 ++++- liblzma/README.txt | 13 + liblzma/api/lzma.h | 313 ++++++ liblzma/api/lzma/base.h | 601 ++++++++++++ liblzma/api/lzma/bcj.h | 90 ++ liblzma/api/lzma/block.h | 529 ++++++++++ liblzma/api/lzma/check.h | 150 +++ liblzma/api/lzma/container.h | 412 ++++++++ liblzma/api/lzma/delta.h | 77 ++ liblzma/api/lzma/filter.h | 420 ++++++++ liblzma/api/lzma/hardware.h | 50 + liblzma/api/lzma/index.h | 682 +++++++++++++ liblzma/api/lzma/index_hash.h | 107 ++ liblzma/api/lzma/lzma.h | 417 ++++++++ liblzma/api/lzma/stream_flags.h | 223 +++++ liblzma/api/lzma/version.h | 121 +++ liblzma/api/lzma/vli.h | 166 ++++ liblzma/check/check.c | 185 ++++ liblzma/check/check.h | 98 ++ liblzma/check/crc_macros.h | 30 + liblzma/common/alone_decoder.c | 232 +++++ liblzma/common/alone_decoder.h | 22 + liblzma/common/block_buffer_decoder.c | 80 ++ liblzma/common/block_decoder.c | 242 +++++ liblzma/common/block_decoder.h | 22 + liblzma/common/block_header_decoder.c | 130 +++ liblzma/common/block_util.c | 90 ++ liblzma/common/common.c | 388 ++++++++ liblzma/common/common.h | 296 ++++++ liblzma/common/easy_decoder_memusage.c | 24 + liblzma/common/easy_preset.c | 27 + liblzma/common/easy_preset.h | 32 + liblzma/common/filter_buffer_decoder.c | 87 ++ liblzma/common/filter_common.c | 349 +++++++ liblzma/common/filter_common.h | 48 + liblzma/common/filter_decoder.c | 188 ++++ liblzma/common/filter_decoder.h | 23 + liblzma/common/filter_flags_decoder.c | 46 + liblzma/common/index.c | 1241 ++++++++++++++++++++++++ liblzma/common/index.h | 73 ++ liblzma/common/index_decoder.c | 344 +++++++ liblzma/common/index_hash.c | 332 +++++++ liblzma/common/mythread.h | 42 + liblzma/common/stream_decoder.c | 458 +++++++++ liblzma/common/stream_decoder.h | 21 + liblzma/common/stream_flags_common.c | 47 + liblzma/common/stream_flags_common.h | 33 + liblzma/common/stream_flags_decoder.c | 82 ++ liblzma/common/sysdefs.h | 184 ++++ liblzma/common/tuklib_common.h | 71 ++ liblzma/common/tuklib_config.h | 7 + liblzma/common/tuklib_integer.h | 523 ++++++++++ liblzma/common/vli_decoder.c | 86 ++ liblzma/common/vli_size.c | 30 + liblzma/delta/delta_common.c | 70 ++ liblzma/delta/delta_common.h | 20 + liblzma/delta/delta_decoder.c | 81 ++ liblzma/delta/delta_decoder.h | 25 + liblzma/delta/delta_private.h | 37 + liblzma/lz/lz_decoder.c | 300 ++++++ liblzma/lz/lz_decoder.h | 234 +++++ liblzma/lzma/lzma2_decoder.c | 304 ++++++ liblzma/lzma/lzma2_decoder.h | 28 + liblzma/lzma/lzma_common.h | 230 +++++ liblzma/lzma/lzma_decoder.c | 1084 +++++++++++++++++++++ liblzma/lzma/lzma_decoder.h | 52 + liblzma/lzma/lzma_encoder_presets.c | 61 ++ liblzma/rangecoder/range_common.h | 85 ++ liblzma/rangecoder/range_decoder.h | 179 ++++ liblzma/rangecoder/range_encoder.h | 237 +++++ liblzma/simple/arm.c | 69 ++ liblzma/simple/armthumb.c | 74 ++ liblzma/simple/ia64.c | 130 +++ liblzma/simple/powerpc.c | 73 ++ liblzma/simple/simple_coder.c | 280 ++++++ liblzma/simple/simple_coder.h | 60 ++ liblzma/simple/simple_decoder.c | 40 + liblzma/simple/simple_decoder.h | 22 + liblzma/simple/simple_private.h | 76 ++ liblzma/simple/sparc.c | 81 ++ liblzma/simple/x86.c | 159 +++ 83 files changed, 14987 insertions(+), 11 deletions(-) create mode 100644 liblzma/README.txt create mode 100644 liblzma/api/lzma.h create mode 100644 liblzma/api/lzma/base.h create mode 100644 liblzma/api/lzma/bcj.h create mode 100644 liblzma/api/lzma/block.h create mode 100644 liblzma/api/lzma/check.h create mode 100644 liblzma/api/lzma/container.h create mode 100644 liblzma/api/lzma/delta.h create mode 100644 liblzma/api/lzma/filter.h create mode 100644 liblzma/api/lzma/hardware.h create mode 100644 liblzma/api/lzma/index.h create mode 100644 liblzma/api/lzma/index_hash.h create mode 100644 liblzma/api/lzma/lzma.h create mode 100644 liblzma/api/lzma/stream_flags.h create mode 100644 liblzma/api/lzma/version.h create mode 100644 liblzma/api/lzma/vli.h create mode 100644 liblzma/check/check.c create mode 100644 liblzma/check/check.h create mode 100644 liblzma/check/crc_macros.h create mode 100644 liblzma/common/alone_decoder.c create mode 100644 liblzma/common/alone_decoder.h create mode 100644 liblzma/common/block_buffer_decoder.c create mode 100644 liblzma/common/block_decoder.c create mode 100644 liblzma/common/block_decoder.h create mode 100644 liblzma/common/block_header_decoder.c create mode 100644 liblzma/common/block_util.c create mode 100644 liblzma/common/common.c create mode 100644 liblzma/common/common.h create mode 100644 liblzma/common/easy_decoder_memusage.c create mode 100644 liblzma/common/easy_preset.c create mode 100644 liblzma/common/easy_preset.h create mode 100644 liblzma/common/filter_buffer_decoder.c create mode 100644 liblzma/common/filter_common.c create mode 100644 liblzma/common/filter_common.h create mode 100644 liblzma/common/filter_decoder.c create mode 100644 liblzma/common/filter_decoder.h create mode 100644 liblzma/common/filter_flags_decoder.c create mode 100644 liblzma/common/index.c create mode 100644 liblzma/common/index.h create mode 100644 liblzma/common/index_decoder.c create mode 100644 liblzma/common/index_hash.c create mode 100644 liblzma/common/mythread.h create mode 100644 liblzma/common/stream_decoder.c create mode 100644 liblzma/common/stream_decoder.h create mode 100644 liblzma/common/stream_flags_common.c create mode 100644 liblzma/common/stream_flags_common.h create mode 100644 liblzma/common/stream_flags_decoder.c create mode 100644 liblzma/common/sysdefs.h create mode 100644 liblzma/common/tuklib_common.h create mode 100644 liblzma/common/tuklib_config.h create mode 100644 liblzma/common/tuklib_integer.h create mode 100644 liblzma/common/vli_decoder.c create mode 100644 liblzma/common/vli_size.c create mode 100644 liblzma/delta/delta_common.c create mode 100644 liblzma/delta/delta_common.h create mode 100644 liblzma/delta/delta_decoder.c create mode 100644 liblzma/delta/delta_decoder.h create mode 100644 liblzma/delta/delta_private.h create mode 100644 liblzma/lz/lz_decoder.c create mode 100644 liblzma/lz/lz_decoder.h create mode 100644 liblzma/lzma/lzma2_decoder.c create mode 100644 liblzma/lzma/lzma2_decoder.h create mode 100644 liblzma/lzma/lzma_common.h create mode 100644 liblzma/lzma/lzma_decoder.c create mode 100644 liblzma/lzma/lzma_decoder.h create mode 100644 liblzma/lzma/lzma_encoder_presets.c create mode 100644 liblzma/rangecoder/range_common.h create mode 100644 liblzma/rangecoder/range_decoder.h create mode 100644 liblzma/rangecoder/range_encoder.h create mode 100644 liblzma/simple/arm.c create mode 100644 liblzma/simple/armthumb.c create mode 100644 liblzma/simple/ia64.c create mode 100644 liblzma/simple/powerpc.c create mode 100644 liblzma/simple/simple_coder.c create mode 100644 liblzma/simple/simple_coder.h create mode 100644 liblzma/simple/simple_decoder.c create mode 100644 liblzma/simple/simple_decoder.h create mode 100644 liblzma/simple/simple_private.h create mode 100644 liblzma/simple/sparc.c create mode 100644 liblzma/simple/x86.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 301924b..0db9922 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,6 +210,44 @@ SET(BZLIB_SRCS ${BZLIB_DIR}/bzlib.c ) +SET(LIBLZMA_DIR liblzma) +SET(LIBLZMA_SRCS + ${LIBLZMA_DIR}/check/check.c + ${LIBLZMA_DIR}/common/block_buffer_decoder.c + ${LIBLZMA_DIR}/common/block_decoder.c + ${LIBLZMA_DIR}/common/block_header_decoder.c + ${LIBLZMA_DIR}/common/block_util.c + ${LIBLZMA_DIR}/common/common.c + ${LIBLZMA_DIR}/common/easy_decoder_memusage.c + ${LIBLZMA_DIR}/common/easy_preset.c + ${LIBLZMA_DIR}/common/filter_buffer_decoder.c + ${LIBLZMA_DIR}/common/filter_common.c + ${LIBLZMA_DIR}/common/filter_decoder.c + ${LIBLZMA_DIR}/common/filter_flags_decoder.c + ${LIBLZMA_DIR}/common/index.c + ${LIBLZMA_DIR}/common/index_decoder.c + ${LIBLZMA_DIR}/common/index_hash.c + ${LIBLZMA_DIR}/common/stream_decoder.c + ${LIBLZMA_DIR}/common/stream_flags_common.c + ${LIBLZMA_DIR}/common/stream_flags_decoder.c + ${LIBLZMA_DIR}/common/vli_decoder.c + ${LIBLZMA_DIR}/common/vli_size.c + ${LIBLZMA_DIR}/delta/delta_common.c + ${LIBLZMA_DIR}/delta/delta_decoder.c + ${LIBLZMA_DIR}/lz/lz_decoder.c + ${LIBLZMA_DIR}/lzma/lzma2_decoder.c + ${LIBLZMA_DIR}/lzma/lzma_decoder.c + ${LIBLZMA_DIR}/lzma/lzma_encoder_presets.c + ${LIBLZMA_DIR}/simple/arm.c + ${LIBLZMA_DIR}/simple/armthumb.c + ${LIBLZMA_DIR}/simple/ia64.c + ${LIBLZMA_DIR}/simple/powerpc.c + ${LIBLZMA_DIR}/simple/simple_coder.c + ${LIBLZMA_DIR}/simple/simple_decoder.c + ${LIBLZMA_DIR}/simple/sparc.c + ${LIBLZMA_DIR}/simple/x86.c +) + SET(LIBFETCH_DIR libfetch) SET(LIBFETCH_SRCS ${LIBFETCH_DIR}/fetch.c @@ -515,6 +553,8 @@ ENDIF(MOJOSETUP_ARCHIVE_ZIP) # BINARY SIZE += 2 OPTION(MOJOSETUP_ARCHIVE_TAR "Enable TAR support" TRUE) IF(MOJOSETUP_ARCHIVE_TAR) + # !!! FIXME: the gzip/bzip2/xz support doesn't require .tar archives now. + # !!! FIXME: Maybe not ask here, so the questions look the same everywhere. ADD_DEFINITIONS(-DSUPPORT_TAR=1) OPTION(MOJOSETUP_ARCHIVE_TAR_GZ "Enable TAR.GZ support" TRUE) IF(MOJOSETUP_ARCHIVE_TAR_GZ) @@ -525,6 +565,11 @@ IF(MOJOSETUP_ARCHIVE_TAR) IF(MOJOSETUP_ARCHIVE_TAR_BZ2) SET(MOJOSETUP_INPUT_BZIP2 TRUE) ENDIF(MOJOSETUP_ARCHIVE_TAR_BZ2) + + OPTION(MOJOSETUP_ARCHIVE_TAR_XZ "Enable TAR.XZ support" TRUE) + IF(MOJOSETUP_ARCHIVE_TAR_XZ) + SET(MOJOSETUP_INPUT_XZ TRUE) + ENDIF(MOJOSETUP_ARCHIVE_TAR_XZ) ENDIF(MOJOSETUP_ARCHIVE_TAR) OPTION(MOJOSETUP_ARCHIVE_UZ2 "Enable UZ2 support" FALSE) @@ -562,6 +607,15 @@ IF(MOJOSETUP_INPUT_BZIP2) SET(MOJOSETUP_NEED_BZLIB TRUE) ENDIF(MOJOSETUP_INPUT_BZIP2) +# BINARY SIZE += ??? +IF(NOT MOJOSETUP_INPUT_XZ) # optional if something didn't force it. + OPTION(MOJOSETUP_INPUT_XZ "Enable XZ support" FALSE) +ENDIF(NOT MOJOSETUP_INPUT_XZ) + +IF(MOJOSETUP_INPUT_XZ) + ADD_DEFINITIONS(-DSUPPORT_XZ=1) + SET(MOJOSETUP_NEED_LIBLZMA TRUE) +ENDIF(MOJOSETUP_INPUT_XZ) # Image decoders for GUIs... @@ -697,6 +751,39 @@ IF(MOJOSETUP_NEED_BZLIB) ENDIF(MOJOSETUP_INTERNAL_BZLIB) ENDIF(MOJOSETUP_NEED_BZLIB) +IF(MOJOSETUP_NEED_LIBLZMA) + SET(HAVE_SYSTEM_LIBLZMA FALSE) + CHECK_INCLUDE_FILE(lzma.h HAVE_LZMA_H) + IF(HAVE_LZMA_H) + CHECK_LIBRARY_EXISTS("lzma" "lzma_stream_decoder" "" HAVE_LIBLZMA) + IF(HAVE_LIBLZMA) + SET(HAVE_SYSTEM_LIBLZMA TRUE) + ENDIF(HAVE_LIBLZMA) + ENDIF(HAVE_LZMA_H) + + IF(HAVE_SYSTEM_LIBLZMA) + OPTION(MOJOSETUP_INTERNAL_LIBLZMA "Link own liblzma instead of system library" FALSE) + ELSE(HAVE_SYSTEM_LIBLZMA) + SET(MOJOSETUP_INTERNAL_LIBLZMA TRUE) + ENDIF(HAVE_SYSTEM_LIBLZMA) + + # BINARY SIZE += ??? + IF(MOJOSETUP_INTERNAL_LIBLZMA) + ADD_DEFINITIONS(-DMOJOSETUP_INTERNAL_LIBLZMA=1) + INCLUDE_DIRECTORIES(${LIBLZMA_DIR}/api) + INCLUDE_DIRECTORIES(${LIBLZMA_DIR}/common) + INCLUDE_DIRECTORIES(${LIBLZMA_DIR}/check) + INCLUDE_DIRECTORIES(${LIBLZMA_DIR}/delta) + INCLUDE_DIRECTORIES(${LIBLZMA_DIR}/lz) + INCLUDE_DIRECTORIES(${LIBLZMA_DIR}/lzma) + INCLUDE_DIRECTORIES(${LIBLZMA_DIR}/rangecoder) + INCLUDE_DIRECTORIES(${LIBLZMA_DIR}/simple) + SET(OPTIONAL_SRCS ${OPTIONAL_SRCS} ${LIBLZMA_SRCS}) + ELSE(MOJOSETUP_INTERNAL_LIBLZMA) + SET(OPTIONAL_LIBS ${OPTIONAL_LIBS} lzma) + ENDIF(MOJOSETUP_INTERNAL_LIBLZMA) +ENDIF(MOJOSETUP_NEED_LIBLZMA) + IF(UNIX) CHECK_INCLUDE_FILE(sys/ucred.h HAVE_UCRED_H) IF(HAVE_UCRED_H) diff --git a/LICENSE.txt b/LICENSE.txt index 7fdefa6..46c9df2 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -24,9 +24,9 @@ (Please note that other pieces of code in MojoSetup fall under BSD-like and/or - public domain licenses: lua, libfetch, zlib, bzlib, stb_image, etc. All - software statically linked to MojoSetup was explicitly chosen to be friendly - with closed-source software, in case the installer needs a proprietary - change. I am not a lawyer and this is not legal advice. Please have a lawyer - consider the licenses if you have any concerns.) + public domain licenses: lua, libfetch, zlib, bzlib, liblzma, stb_image, etc. + All software statically linked to MojoSetup was explicitly chosen to be + friendly with closed-source software, in case the installer needs a + proprietary change. I am not a lawyer and this is not legal advice. Please + have a lawyer consider the licenses if you have any concerns.) diff --git a/fileio.c b/fileio.c index 47bf9e3..b308dc1 100644 --- a/fileio.c +++ b/fileio.c @@ -30,11 +30,13 @@ static const MojoArchiveType archives[] = { "zip", MojoArchive_createZIP, true }, { "tar", MojoArchive_createTAR, true }, { "tar.gz", MojoArchive_createTAR, true }, + { "tar.xz", MojoArchive_createTAR, true }, { "tar.bz2", MojoArchive_createTAR, true }, { "tgz", MojoArchive_createTAR, true }, { "tbz2", MojoArchive_createTAR, true }, { "tb2", MojoArchive_createTAR, true }, { "tbz", MojoArchive_createTAR, true }, + { "txz", MojoArchive_createTAR, true }, { "uz2", MojoArchive_createUZ2, false }, { "pck", MojoArchive_createPCK, true }, }; @@ -429,24 +431,236 @@ static MojoInput *make_bzip2_input(MojoInput *origio) #endif // SUPPORT_BZIP2 +#if SUPPORT_XZ + +#include "lzma.h" + +#define XZ_READBUFSIZE (128 * 1024) + +static MojoInput *make_xz_input(MojoInput *origio); + +typedef struct XZinfo +{ + MojoInput *origio; + uint64 uncompressed_position; + uint8 buffer[XZ_READBUFSIZE]; + lzma_stream stream; +} XZinfo; + +static void *mojoLzmaAlloc(void *opaque, size_t items, size_t size) +{ + return xmalloc(items * size); +} // mojoLzmaAlloc + +static void mojoLzmaFree(void *opaque, void *address) +{ + free(address); +} // mojoZlibFree + +static void initializeXZStream(lzma_stream *pstr) +{ + static lzma_allocator lzmaAlloc = { mojoLzmaAlloc, mojoLzmaFree }; + memset(pstr, '\0', sizeof (lzma_stream)); + pstr->allocator = &lzmaAlloc; +} // initializeXZStream + +static boolean MojoInput_xz_ready(MojoInput *io) +{ + return true; // !!! FIXME: ready if there are bytes uncompressed. +} // MojoInput_xz_ready + +static boolean MojoInput_xz_seek(MojoInput *io, uint64 offset) +{ + // This is all really expensive. + XZinfo *info = (XZinfo *) io->opaque; + + /* + * If seeking backwards, we need to redecode the file + * from the start and throw away the compressed bits until we hit + * the offset we need. If seeking forward, we still need to + * decode, but we don't rewind first. + */ + if (offset < info->uncompressed_position) + { + lzma_stream *strm = &info->stream; + if (!info->origio->seek(info->origio, 0)) + return false; + lzma_end(strm); + initializeXZStream(strm); + if (lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED) != LZMA_OK) + return false; + info->uncompressed_position = 0; + } // if + + while (info->uncompressed_position != offset) + { + uint8 buf[512]; + uint32 maxread; + int64 br; + + maxread = (uint32) (offset - info->uncompressed_position); + if (maxread > sizeof (buf)) + maxread = sizeof (buf); + + br = io->read(io, buf, maxread); + if (br != maxread) + return false; + } /* while */ + + return true; +} // MojoInput_xz_seek + +static int64 MojoInput_xz_tell(MojoInput *io) +{ + return (((XZinfo *) io->opaque)->uncompressed_position); +} // MojoInput_xz_tell + +static int64 MojoInput_xz_length(MojoInput *io) +{ + return -1; +} // MojoInput_xz_length + +static int64 MojoInput_xz_read(MojoInput *io, void *buf, uint32 bufsize) +{ + XZinfo *info = (XZinfo *) io->opaque; + MojoInput *origio = info->origio; + int64 retval = 0; + + if (bufsize == 0) + return 0; // quick rejection. + + info->stream.next_out = buf; + info->stream.avail_out = bufsize; + + while (retval < ((int64) bufsize)) + { + const uint32 before = info->stream.total_out; + lzma_ret rc; + + lzma_action action = LZMA_FINISH; + if (info->stream.avail_in == 0) + { + int64 br = origio->length(origio) - origio->tell(origio); + if (br > 0) + { + action = LZMA_RUN; + if (br > XZ_READBUFSIZE) + br = XZ_READBUFSIZE; + + br = origio->read(origio, info->buffer, (uint32) br); + if (br <= 0) + return -1; + + info->stream.next_in = info->buffer; + info->stream.avail_in = (uint32) br; + } // if + } // if + + rc = lzma_code(&info->stream, LZMA_RUN); + retval += (info->stream.total_out - before); + + if (rc != LZMA_OK) + return -1; + } // while + + assert(retval >= 0); + info->uncompressed_position += (uint32) retval; + + return retval; +} // MojoInput_xz_read + +static MojoInput* MojoInput_xz_duplicate(MojoInput *io) +{ + XZinfo *info = (XZinfo *) io->opaque; + MojoInput *retval = NULL; + MojoInput *newio = info->origio->duplicate(info->origio); + if (newio != NULL) + { + retval = make_xz_input(newio); + if (retval != NULL) + retval->seek(retval, io->tell(io)); // slow, slow, slow... + } // if + return retval; +} // MojoInput_xz_duplicate + +static void MojoInput_xz_close(MojoInput *io) +{ + XZinfo *info = (XZinfo *) io->opaque; + if (info->origio != NULL) + info->origio->close(info->origio); + lzma_end(&info->stream); + free(info); + free(io); +} // MojoInput_xz_close + +static MojoInput *make_xz_input(MojoInput *origio) +{ + MojoInput *io = NULL; + XZinfo *info = (XZinfo *) xmalloc(sizeof (XZinfo)); + lzma_stream *strm = &info->stream; + + // UINT64_MAX is the memory usage limit (so basically, no artificial + // limit here). Internally, this holds its entire dictionary in + // RAM (8 megabytes by default), plus a few other structures, so we + // shouldn't eat tons of memory in the normal case. Note that the + // dictionary can max out at _four gigabytes_, but obviously no one does + // that. + initializeXZStream(strm); + if (lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED) != LZMA_OK) + { + free(info); + return NULL; + } // if + + info->origio = origio; + + io = (MojoInput *) xmalloc(sizeof (MojoInput)); + io->ready = MojoInput_xz_ready; + io->read = MojoInput_xz_read; + io->seek = MojoInput_xz_seek; + io->tell = MojoInput_xz_tell; + io->length = MojoInput_xz_length; + io->duplicate = MojoInput_xz_duplicate; + io->close = MojoInput_xz_close; + io->opaque = info; + return io; +} // make_xz_input + +#endif // SUPPORT_XZ + + MojoInput *MojoInput_newCompressedStream(MojoInput *origio) { -#if SUPPORT_GZIP || SUPPORT_BZIP2 +#if SUPPORT_GZIP || SUPPORT_BZIP2 || SUPPORT_XZ // Look at the first piece of the file to decide if it is compressed // by a general compression algorithm, and if so, wrap the MojoInput // in a decompressor. - uint8 magic[4]; + uint8 magic[6]; const int64 br = origio->read(origio, magic, sizeof (magic)); if ((origio->seek(origio, 0)) && (br == sizeof (magic))) { #if SUPPORT_GZIP - if ((magic[0] == 0x1F) && (magic[1] == 0x8B) && (magic[2] == 0x08)) - return make_gzip_input(origio); + { + static const uint8 gzip_sig[] = { 0x1F, 0x8B, 0x08 }; + if (memcmp(magic, gzip_sig, sizeof (gzip_sig)) == 0) + return make_gzip_input(origio); + } #endif #if SUPPORT_BZIP2 - if ((magic[0] == 0x42) && (magic[1] == 0x5A)) - return make_bzip2_input(origio); + { + static const uint8 bzip2_sig[] = { 0x42, 0x5A }; + if (memcmp(magic, bzip2_sig, sizeof (bzip2_sig)) == 0) + return make_bzip2_input(origio); + } + #endif + + #if SUPPORT_XZ + { + static const uint8 xz_sig[] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 }; + if (memcmp(magic, xz_sig, sizeof (xz_sig)) == 0) + return make_xz_input(origio); + } #endif } // if #endif diff --git a/liblzma/README.txt b/liblzma/README.txt new file mode 100644 index 0000000..7dd4631 --- /dev/null +++ b/liblzma/README.txt @@ -0,0 +1,13 @@ + +This is the source code to liblzma from XZ Utils: + + http://tukaani.org/xz/ + +This is a snapshot of the xz-5.0.0 release. Changes I've made to it are + wrapped in #if __MOJOSETUP__ blocks. I've tried to agressively delete files + we don't use at all, too. + +The source code in this directory is public domain. + +--ryan. + diff --git a/liblzma/api/lzma.h b/liblzma/api/lzma.h new file mode 100644 index 0000000..fb874c3 --- /dev/null +++ b/liblzma/api/lzma.h @@ -0,0 +1,313 @@ +/** + * \file api/lzma.h + * \brief The public API of liblzma data compression library + * + * liblzma is a public domain general-purpose data compression library with + * a zlib-like API. The native file format is .xz, but also the old .lzma + * format and raw (no headers) streams are supported. Multiple compression + * algorithms (filters) are supported. Currently LZMA2 is the primary filter. + * + * liblzma is part of XZ Utils . XZ Utils includes + * a gzip-like command line tool named xz and some other tools. XZ Utils + * is developed and maintained by Lasse Collin. + * + * Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK + * . + * + * The SHA-256 implementation is based on the public domain code found from + * 7-Zip , which has a modified version of the public + * domain SHA-256 code found from Crypto++ . + * The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef LZMA_H +#define LZMA_H + +/***************************** + * Required standard headers * + *****************************/ + +/* + * liblzma API headers need some standard types and macros. To allow + * including lzma.h without requiring the application to include other + * headers first, lzma.h includes the required standard headers unless + * they already seem to be included already or if LZMA_MANUAL_HEADERS + * has been defined. + * + * Here's what types and macros are needed and from which headers: + * - stddef.h: size_t, NULL + * - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n), + * UINT32_MAX, UINT64_MAX + * + * However, inttypes.h is a little more portable than stdint.h, although + * inttypes.h declares some unneeded things compared to plain stdint.h. + * + * The hacks below aren't perfect, specifically they assume that inttypes.h + * exists and that it typedefs at least uint8_t, uint32_t, and uint64_t, + * and that, in case of incomplete inttypes.h, unsigned int is 32-bit. + * If the application already takes care of setting up all the types and + * macros properly (for example by using gnulib's stdint.h or inttypes.h), + * we try to detect that the macros are already defined and don't include + * inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to + * force this file to never include any system headers. + * + * Some could argue that liblzma API should provide all the required types, + * for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was + * seen as an unnecessary mess, since most systems already provide all the + * necessary types and macros in the standard headers. + * + * Note that liblzma API still has lzma_bool, because using stdbool.h would + * break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't + * necessarily the same as sizeof(bool) in C++. + */ + +#ifndef LZMA_MANUAL_HEADERS + /* + * I suppose this works portably also in C++. Note that in C++, + * we need to get size_t into the global namespace. + */ +# include + + /* + * Skip inttypes.h if we already have all the required macros. If we + * have the macros, we assume that we have the matching typedefs too. + */ +# if !defined(UINT32_C) || !defined(UINT64_C) \ + || !defined(UINT32_MAX) || !defined(UINT64_MAX) + /* + * MSVC has no C99 support, and thus it cannot be used to + * compile liblzma. The liblzma API has to still be usable + * from MSVC, so we need to define the required standard + * integer types here. + */ +# if defined(_WIN32) && defined(_MSC_VER) + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else + /* Use the standard inttypes.h. */ +# ifdef __cplusplus + /* + * C99 sections 7.18.2 and 7.18.4 specify + * that C++ implementations define the limit + * and constant macros only if specifically + * requested. Note that if you want the + * format macros (PRIu64 etc.) too, you need + * to define __STDC_FORMAT_MACROS before + * including lzma.h, since re-including + * inttypes.h with __STDC_FORMAT_MACROS + * defined doesn't necessarily work. + */ +# ifndef __STDC_LIMIT_MACROS +# define __STDC_LIMIT_MACROS 1 +# endif +# ifndef __STDC_CONSTANT_MACROS +# define __STDC_CONSTANT_MACROS 1 +# endif +# endif + +# include +# endif + + /* + * Some old systems have only the typedefs in inttypes.h, and + * lack all the macros. For those systems, we need a few more + * hacks. We assume that unsigned int is 32-bit and unsigned + * long is either 32-bit or 64-bit. If these hacks aren't + * enough, the application has to setup the types manually + * before including lzma.h. + */ +# ifndef UINT32_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT32_C(n) n ## UI32 +# else +# define UINT32_C(n) n ## U +# endif +# endif + +# ifndef UINT64_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT64_C(n) n ## UI64 +# else + /* Get ULONG_MAX. */ +# include +# if ULONG_MAX == 4294967295UL +# define UINT64_C(n) n ## ULL +# else +# define UINT64_C(n) n ## UL +# endif +# endif +# endif + +# ifndef UINT32_MAX +# define UINT32_MAX (UINT32_C(4294967295)) +# endif + +# ifndef UINT64_MAX +# define UINT64_MAX (UINT64_C(18446744073709551615)) +# endif +# endif +#endif /* ifdef LZMA_MANUAL_HEADERS */ + + +/****************** + * LZMA_API macro * + ******************/ + +/* + * Some systems require that the functions and function pointers are + * declared specially in the headers. LZMA_API_IMPORT is for importing + * symbols and LZMA_API_CALL is to specify the calling convention. + * + * By default it is assumed that the application will link dynamically + * against liblzma. #define LZMA_API_STATIC in your application if you + * want to link against static liblzma. If you don't care about portability + * to operating systems like Windows, or at least don't care about linking + * against static liblzma on them, don't worry about LZMA_API_STATIC. That + * is, most developers will never need to use LZMA_API_STATIC. + * + * The GCC variants are a special case on Windows (Cygwin and MinGW). + * We rely on GCC doing the right thing with its auto-import feature, + * and thus don't use __declspec(dllimport). This way developers don't + * need to worry about LZMA_API_STATIC. Also the calling convention is + * omitted on Cygwin but not on MinGW. + */ +#ifndef LZMA_API_IMPORT +# if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__GNUC__) +# define LZMA_API_IMPORT __declspec(dllimport) +# else +# define LZMA_API_IMPORT +# endif +#endif + +#ifndef LZMA_API_CALL +# if defined(_WIN32) && !defined(__CYGWIN__) +# define LZMA_API_CALL __cdecl +# else +# define LZMA_API_CALL +# endif +#endif + +#ifndef LZMA_API +# define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL +#endif + + +/*********** + * nothrow * + ***********/ + +/* + * None of the functions in liblzma may throw an exception. Even + * the functions that use callback functions won't throw exceptions, + * because liblzma would break if a callback function threw an exception. + */ +#ifndef lzma_nothrow +# if defined(__cplusplus) +# define lzma_nothrow throw() +# elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) +# define lzma_nothrow __attribute__((__nothrow__)) +# else +# define lzma_nothrow +# endif +#endif + + +/******************** + * GNU C extensions * + ********************/ + +/* + * GNU C extensions are used conditionally in the public API. It doesn't + * break anything if these are sometimes enabled and sometimes not, only + * affects warnings and optimizations. + */ +#if __GNUC__ >= 3 +# ifndef lzma_attribute +# define lzma_attribute(attr) __attribute__(attr) +# endif + + /* warn_unused_result was added in GCC 3.4. */ +# ifndef lzma_attr_warn_unused_result +# if __GNUC__ == 3 && __GNUC_MINOR__ < 4 +# define lzma_attr_warn_unused_result +# endif +# endif + +#else +# ifndef lzma_attribute +# define lzma_attribute(attr) +# endif +#endif + + +#ifndef lzma_attr_pure +# define lzma_attr_pure lzma_attribute((__pure__)) +#endif + +#ifndef lzma_attr_const +# define lzma_attr_const lzma_attribute((__const__)) +#endif + +#ifndef lzma_attr_warn_unused_result +# define lzma_attr_warn_unused_result \ + lzma_attribute((__warn_unused_result__)) +#endif + + +/************** + * Subheaders * + **************/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Subheaders check that this is defined. It is to prevent including + * them directly from applications. + */ +#define LZMA_H_INTERNAL 1 + +/* Basic features */ +#include "lzma/version.h" +#include "lzma/base.h" +#include "lzma/vli.h" +#include "lzma/check.h" + +/* Filters */ +#include "lzma/filter.h" +#include "lzma/bcj.h" +#include "lzma/delta.h" +#include "lzma/lzma.h" + +/* Container formats */ +#include "lzma/container.h" + +/* Advanced features */ +#include "lzma/stream_flags.h" +#include "lzma/block.h" +#include "lzma/index.h" +#include "lzma/index_hash.h" + +/* Hardware information */ +#include "lzma/hardware.h" + +/* + * All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications + * re-including the subheaders. + */ +#undef LZMA_H_INTERNAL + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef LZMA_H */ diff --git a/liblzma/api/lzma/base.h b/liblzma/api/lzma/base.h new file mode 100644 index 0000000..43dde8d --- /dev/null +++ b/liblzma/api/lzma/base.h @@ -0,0 +1,601 @@ +/** + * \file lzma/base.h + * \brief Data types and functions used in many places in liblzma API + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Boolean + * + * This is here because C89 doesn't have stdbool.h. To set a value for + * variables having type lzma_bool, you can use + * - C99's `true' and `false' from stdbool.h; + * - C++'s internal `true' and `false'; or + * - integers one (true) and zero (false). + */ +typedef unsigned char lzma_bool; + + +/** + * \brief Type of reserved enumeration variable in structures + * + * To avoid breaking library ABI when new features are added, several + * structures contain extra variables that may be used in future. Since + * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may + * even vary depending on the range of enumeration constants, we specify + * a separate type to be used for reserved enumeration variables. All + * enumeration constants in liblzma API will be non-negative and less + * than 128, which should guarantee that the ABI won't break even when + * new constants are added to existing enumerations. + */ +typedef enum { + LZMA_RESERVED_ENUM = 0 +} lzma_reserved_enum; + + +/** + * \brief Return values used by several functions in liblzma + * + * Check the descriptions of specific functions to find out which return + * values they can return. With some functions the return values may have + * more specific meanings than described here; those differences are + * described per-function basis. + */ +typedef enum { + LZMA_OK = 0, + /**< + * \brief Operation completed successfully + */ + + LZMA_STREAM_END = 1, + /**< + * \brief End of stream was reached + * + * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or + * LZMA_FINISH was finished. In decoder, this indicates + * that all the data was successfully decoded. + * + * In all cases, when LZMA_STREAM_END is returned, the last + * output bytes should be picked from strm->next_out. + */ + + LZMA_NO_CHECK = 2, + /**< + * \brief Input stream has no integrity check + * + * This return value can be returned only if the + * LZMA_TELL_NO_CHECK flag was used when initializing + * the decoder. LZMA_NO_CHECK is just a warning, and + * the decoding can be continued normally. + * + * It is possible to call lzma_get_check() immediately after + * lzma_code has returned LZMA_NO_CHECK. The result will + * naturally be LZMA_CHECK_NONE, but the possibility to call + * lzma_get_check() may be convenient in some applications. + */ + + LZMA_UNSUPPORTED_CHECK = 3, + /**< + * \brief Cannot calculate the integrity check + * + * The usage of this return value is different in encoders + * and decoders. + * + * Encoders can return this value only from the initialization + * function. If initialization fails with this value, the + * encoding cannot be done, because there's no way to produce + * output with the correct integrity check. + * + * Decoders can return this value only from lzma_code() and + * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when + * initializing the decoder. The decoding can still be + * continued normally even if the check type is unsupported, + * but naturally the check will not be validated, and possible + * errors may go undetected. + * + * With decoder, it is possible to call lzma_get_check() + * immediately after lzma_code() has returned + * LZMA_UNSUPPORTED_CHECK. This way it is possible to find + * out what the unsupported Check ID was. + */ + + LZMA_GET_CHECK = 4, + /**< + * \brief Integrity check type is now available + * + * This value can be returned only by the lzma_code() function + * and only if the decoder was initialized with the + * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the + * application that it may now call lzma_get_check() to find + * out the Check ID. This can be used, for example, to + * implement a decoder that accepts only files that have + * strong enough integrity check. + */ + + LZMA_MEM_ERROR = 5, + /**< + * \brief Cannot allocate memory + * + * Memory allocation failed, or the size of the allocation + * would be greater than SIZE_MAX. + * + * Due to internal implementation reasons, the coding cannot + * be continued even if more memory were made available after + * LZMA_MEM_ERROR. + */ + + LZMA_MEMLIMIT_ERROR = 6, + /** + * \brief Memory usage limit was reached + * + * Decoder would need more memory than allowed by the + * specified memory usage limit. To continue decoding, + * the memory usage limit has to be increased with + * lzma_memlimit_set(). + */ + + LZMA_FORMAT_ERROR = 7, + /**< + * \brief File format not recognized + * + * The decoder did not recognize the input as supported file + * format. This error can occur, for example, when trying to + * decode .lzma format file with lzma_stream_decoder, + * because lzma_stream_decoder accepts only the .xz format. + */ + + LZMA_OPTIONS_ERROR = 8, + /**< + * \brief Invalid or unsupported options + * + * Invalid or unsupported options, for example + * - unsupported filter(s) or filter options; or + * - reserved bits set in headers (decoder only). + * + * Rebuilding liblzma with more features enabled, or + * upgrading to a newer version of liblzma may help. + */ + + LZMA_DATA_ERROR = 9, + /**< + * \brief Data is corrupt + * + * The usage of this return value is different in encoders + * and decoders. In both encoder and decoder, the coding + * cannot continue after this error. + * + * Encoders return this if size limits of the target file + * format would be exceeded. These limits are huge, thus + * getting this error from an encoder is mostly theoretical. + * For example, the maximum compressed and uncompressed + * size of a .xz Stream is roughly 8 EiB (2^63 bytes). + * + * Decoders return this error if the input data is corrupt. + * This can mean, for example, invalid CRC32 in headers + * or invalid check of uncompressed data. + */ + + LZMA_BUF_ERROR = 10, + /**< + * \brief No progress is possible + * + * This error code is returned when the coder cannot consume + * any new input and produce any new output. The most common + * reason for this error is that the input stream being + * decoded is truncated or corrupt. + * + * This error is not fatal. Coding can be continued normally + * by providing more input and/or more output space, if + * possible. + * + * Typically the first call to lzma_code() that can do no + * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only + * the second consecutive call doing no progress will return + * LZMA_BUF_ERROR. This is intentional. + * + * With zlib, Z_BUF_ERROR may be returned even if the + * application is doing nothing wrong, so apps will need + * to handle Z_BUF_ERROR specially. The above hack + * guarantees that liblzma never returns LZMA_BUF_ERROR + * to properly written applications unless the input file + * is truncated or corrupt. This should simplify the + * applications a little. + */ + + LZMA_PROG_ERROR = 11, + /**< + * \brief Programming error + * + * This indicates that the arguments given to the function are + * invalid or the internal state of the decoder is corrupt. + * - Function arguments are invalid or the structures + * pointed by the argument pointers are invalid + * e.g. if strm->next_out has been set to NULL and + * strm->avail_out > 0 when calling lzma_code(). + * - lzma_* functions have been called in wrong order + * e.g. lzma_code() was called right after lzma_end(). + * - If errors occur randomly, the reason might be flaky + * hardware. + * + * If you think that your code is correct, this error code + * can be a sign of a bug in liblzma. See the documentation + * how to report bugs. + */ +} lzma_ret; + + +/** + * \brief The `action' argument for lzma_code() + * + * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH, + * the same `action' must is used until lzma_code() returns LZMA_STREAM_END. + * Also, the amount of input (that is, strm->avail_in) must not be modified + * by the application until lzma_code() returns LZMA_STREAM_END. Changing the + * `action' or modifying the amount of input will make lzma_code() return + * LZMA_PROG_ERROR. + */ +typedef enum { + LZMA_RUN = 0, + /**< + * \brief Continue coding + * + * Encoder: Encode as much input as possible. Some internal + * buffering will probably be done (depends on the filter + * chain in use), which causes latency: the input used won't + * usually be decodeable from the output of the same + * lzma_code() call. + * + * Decoder: Decode as much input as possible and produce as + * much output as possible. + */ + + LZMA_SYNC_FLUSH = 1, + /**< + * \brief Make all the input available at output + * + * Normally the encoder introduces some latency. + * LZMA_SYNC_FLUSH forces all the buffered data to be + * available at output without resetting the internal + * state of the encoder. This way it is possible to use + * compressed stream for example for communication over + * network. + * + * Only some filters support LZMA_SYNC_FLUSH. Trying to use + * LZMA_SYNC_FLUSH with filters that don't support it will + * make lzma_code() return LZMA_OPTIONS_ERROR. For example, + * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does. + * + * Using LZMA_SYNC_FLUSH very often can dramatically reduce + * the compression ratio. With some filters (for example, + * LZMA2), fine-tuning the compression options may help + * mitigate this problem significantly (for example, + * match finder with LZMA2). + * + * Decoders don't support LZMA_SYNC_FLUSH. + */ + + LZMA_FULL_FLUSH = 2, + /**< + * \brief Finish encoding of the current Block + * + * All the input data going to the current Block must have + * been given to the encoder (the last bytes can still be + * pending in* next_in). Call lzma_code() with LZMA_FULL_FLUSH + * until it returns LZMA_STREAM_END. Then continue normally + * with LZMA_RUN or finish the Stream with LZMA_FINISH. + * + * This action is currently supported only by Stream encoder + * and easy encoder (which uses Stream encoder). If there is + * no unfinished Block, no empty Block is created. + */ + + LZMA_FINISH = 3 + /**< + * \brief Finish the coding operation + * + * All the input data must have been given to the encoder + * (the last bytes can still be pending in next_in). + * Call lzma_code() with LZMA_FINISH until it returns + * LZMA_STREAM_END. Once LZMA_FINISH has been used, + * the amount of input must no longer be changed by + * the application. + * + * When decoding, using LZMA_FINISH is optional unless the + * LZMA_CONCATENATED flag was used when the decoder was + * initialized. When LZMA_CONCATENATED was not used, the only + * effect of LZMA_FINISH is that the amount of input must not + * be changed just like in the encoder. + */ +} lzma_action; + + +/** + * \brief Custom functions for memory handling + * + * A pointer to lzma_allocator may be passed via lzma_stream structure + * to liblzma, and some advanced functions take a pointer to lzma_allocator + * as a separate function argument. The library will use the functions + * specified in lzma_allocator for memory handling instead of the default + * malloc() and free(). C++ users should note that the custom memory + * handling functions must not throw exceptions. + * + * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is + * OK to change these function pointers in the middle of the coding + * process, but obviously it must be done carefully to make sure that the + * replacement `free' can deallocate memory allocated by the earlier + * `alloc' function(s). + */ +typedef struct { + /** + * \brief Pointer to a custom memory allocation function + * + * If you don't want a custom allocator, but still want + * custom free(), set this to NULL and liblzma will use + * the standard malloc(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param nmemb Number of elements like in calloc(). liblzma + * will always set nmemb to 1, so it is safe to + * ignore nmemb in a custom allocator if you like. + * The nmemb argument exists only for + * compatibility with zlib and libbzip2. + * \param size Size of an element in bytes. + * liblzma never sets this to zero. + * + * \return Pointer to the beginning of a memory block of + * `size' bytes, or NULL if allocation fails + * for some reason. When allocation fails, functions + * of liblzma return LZMA_MEM_ERROR. + * + * The allocator should not waste time zeroing the allocated buffers. + * This is not only about speed, but also memory usage, since the + * operating system kernel doesn't necessarily allocate the requested + * memory in physical memory until it is actually used. With small + * input files, liblzma may actually need only a fraction of the + * memory that it requested for allocation. + * + * \note LZMA_MEM_ERROR is also used when the size of the + * allocation would be greater than SIZE_MAX. Thus, + * don't assume that the custom allocator must have + * returned NULL if some function from liblzma + * returns LZMA_MEM_ERROR. + */ + void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size); + + /** + * \brief Pointer to a custom memory freeing function + * + * If you don't want a custom freeing function, but still + * want a custom allocator, set this to NULL and liblzma + * will use the standard free(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param ptr Pointer returned by lzma_allocator.alloc(), + * or when it is set to NULL, a pointer returned + * by the standard malloc(). + */ + void (LZMA_API_CALL *free)(void *opaque, void *ptr); + + /** + * \brief Pointer passed to .alloc() and .free() + * + * opaque is passed as the first argument to lzma_allocator.alloc() + * and lzma_allocator.free(). This intended to ease implementing + * custom memory allocation functions for use with liblzma. + * + * If you don't need this, you should set this to NULL. + */ + void *opaque; + +} lzma_allocator; + + +/** + * \brief Internal data structure + * + * The contents of this structure is not visible outside the library. + */ +typedef struct lzma_internal_s lzma_internal; + + +/** + * \brief Passing data to and from liblzma + * + * The lzma_stream structure is used for + * - passing pointers to input and output buffers to liblzma; + * - defining custom memory hander functions; and + * - holding a pointer to coder-specific internal data structures. + * + * Typical usage: + * + * - After allocating lzma_stream (on stack or with malloc()), it must be + * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details). + * + * - Initialize a coder to the lzma_stream, for example by using + * lzma_easy_encoder() or lzma_auto_decoder(). Some notes: + * - In contrast to zlib, strm->next_in and strm->next_out are + * ignored by all initialization functions, thus it is safe + * to not initialize them yet. + * - The initialization functions always set strm->total_in and + * strm->total_out to zero. + * - If the initialization function fails, no memory is left allocated + * that would require freeing with lzma_end() even if some memory was + * associated with the lzma_stream structure when the initialization + * function was called. + * + * - Use lzma_code() to do the actual work. + * + * - Once the coding has been finished, the existing lzma_stream can be + * reused. It is OK to reuse lzma_stream with different initialization + * function without calling lzma_end() first. Old allocations are + * automatically freed. + * + * - Finally, use lzma_end() to free the allocated memory. lzma_end() never + * frees the lzma_stream structure itself. + * + * Application may modify the values of total_in and total_out as it wants. + * They are updated by liblzma to match the amount of data read and + * written, but aren't used for anything else. + */ +typedef struct { + const uint8_t *next_in; /**< Pointer to the next input byte. */ + size_t avail_in; /**< Number of available input bytes in next_in. */ + uint64_t total_in; /**< Total number of bytes read by liblzma. */ + + uint8_t *next_out; /**< Pointer to the next output position. */ + size_t avail_out; /**< Amount of free space in next_out. */ + uint64_t total_out; /**< Total number of bytes written by liblzma. */ + + /** + * \brief Custom memory allocation functions + * + * In most cases this is NULL which makes liblzma use + * the standard malloc() and free(). + */ + lzma_allocator *allocator; + + /** Internal state is not visible to applications. */ + lzma_internal *internal; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. Excluding the initialization of this structure, + * you should not touch these, because the names of these variables + * may change. + */ + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + void *reserved_ptr4; + uint64_t reserved_int1; + uint64_t reserved_int2; + size_t reserved_int3; + size_t reserved_int4; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + +} lzma_stream; + + +/** + * \brief Initialization for lzma_stream + * + * When you declare an instance of lzma_stream, you can immediately + * initialize it so that initialization functions know that no memory + * has been allocated yet: + * + * lzma_stream strm = LZMA_STREAM_INIT; + * + * If you need to initialize a dynamically allocated lzma_stream, you can use + * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this + * violates the C standard since NULL may have different internal + * representation than zero, but it should be portable enough in practice. + * Anyway, for maximum portability, you can use something like this: + * + * lzma_stream tmp = LZMA_STREAM_INIT; + * *strm = tmp; + */ +#define LZMA_STREAM_INIT \ + { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ + NULL, NULL, NULL, NULL, 0, 0, 0, 0, \ + LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } + + +/** + * \brief Encode or decode data + * + * Once the lzma_stream has been successfully initialized (e.g. with + * lzma_stream_encoder()), the actual encoding or decoding is done + * using this function. The application has to update strm->next_in, + * strm->avail_in, strm->next_out, and strm->avail_out to pass input + * to and get output from liblzma. + * + * See the description of the coder-specific initialization function to find + * out what `action' values are supported by the coder. + */ +extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Free memory allocated for the coder data structures + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * + * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other + * members of the lzma_stream structure are touched. + * + * \note zlib indicates an error if application end()s unfinished + * stream structure. liblzma doesn't do this, and assumes that + * application knows what it is doing. + */ +extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow; + + +/** + * \brief Get the memory usage of decoder filter chain + * + * This function is currently supported only when *strm has been initialized + * with a function that takes a memlimit argument. With other functions, you + * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage() + * to estimate the memory requirements. + * + * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big + * the memory usage limit should have been to decode the input. Note that + * this may give misleading information if decoding .xz Streams that have + * multiple Blocks, because each Block can have different memory requirements. + * + * \return How much memory is currently allocated for the filter + * decoders. If no filter chain is currently allocated, + * some non-zero value is still returned, which is less than + * or equal to what any filter chain would indicate as its + * memory requirement. + * + * If this function isn't supported by *strm or some other error + * occurs, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the current memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return On success, the current memory usage limit is returned + * (always non-zero). On error, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return - LZMA_OK: New memory usage limit successfully set. + * - LZMA_MEMLIMIT_ERROR: The new limit is too small. + * The limit was not changed. + * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't + * support memory usage limit or memlimit was zero. + */ +extern LZMA_API(lzma_ret) lzma_memlimit_set( + lzma_stream *strm, uint64_t memlimit) lzma_nothrow; diff --git a/liblzma/api/lzma/bcj.h b/liblzma/api/lzma/bcj.h new file mode 100644 index 0000000..8e37538 --- /dev/null +++ b/liblzma/api/lzma/bcj.h @@ -0,0 +1,90 @@ +/** + * \file lzma/bcj.h + * \brief Branch/Call/Jump conversion filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* Filter IDs for lzma_filter.id */ + +#define LZMA_FILTER_X86 LZMA_VLI_C(0x04) + /**< + * Filter for x86 binaries + */ + +#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05) + /**< + * Filter for Big endian PowerPC binaries + */ + +#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06) + /**< + * Filter for IA-64 (Itanium) binaries. + */ + +#define LZMA_FILTER_ARM LZMA_VLI_C(0x07) + /**< + * Filter for ARM binaries. + */ + +#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08) + /**< + * Filter for ARM-Thumb binaries. + */ + +#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09) + /**< + * Filter for SPARC binaries. + */ + + +/** + * \brief Options for BCJ filters + * + * The BCJ filters never change the size of the data. Specifying options + * for them is optional: if pointer to options is NULL, default value is + * used. You probably never need to specify options to BCJ filters, so just + * set the options pointer to NULL and be happy. + * + * If options with non-default values have been specified when encoding, + * the same options must also be specified when decoding. + * + * \note At the moment, none of the BCJ filters support + * LZMA_SYNC_FLUSH. If LZMA_SYNC_FLUSH is specified, + * LZMA_OPTIONS_ERROR will be returned. If there is need, + * partial support for LZMA_SYNC_FLUSH can be added in future. + * Partial means that flushing would be possible only at + * offsets that are multiple of 2, 4, or 16 depending on + * the filter, except x86 which cannot be made to support + * LZMA_SYNC_FLUSH predictably. + */ +typedef struct { + /** + * \brief Start offset for conversions + * + * This setting is useful only when the same filter is used + * _separately_ for multiple sections of the same executable file, + * and the sections contain cross-section branch/call/jump + * instructions. In that case it is beneficial to set the start + * offset of the non-first sections so that the relative addresses + * of the cross-section branch/call/jump instructions will use the + * same absolute addresses as in the first section. + * + * When the pointer to options is NULL, the default value (zero) + * is used. + */ + uint32_t start_offset; + +} lzma_options_bcj; diff --git a/liblzma/api/lzma/block.h b/liblzma/api/lzma/block.h new file mode 100644 index 0000000..3019bf9 --- /dev/null +++ b/liblzma/api/lzma/block.h @@ -0,0 +1,529 @@ +/** + * \file lzma/block.h + * \brief .xz Block handling + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Options for the Block and Block Header encoders and decoders + * + * Different Block handling functions use different parts of this structure. + * Some read some members, other functions write, and some do both. Only the + * members listed for reading need to be initialized when the specified + * functions are called. The members marked for writing will be assigned + * new values at some point either by calling the given function or by + * later calls to lzma_code(). + */ +typedef struct { + /** + * \brief Block format version + * + * To prevent API and ABI breakages if new features are needed in + * the Block field, a version number is used to indicate which + * fields in this structure are in use. For now, version must always + * be zero. With non-zero version, most Block related functions will + * return LZMA_OPTIONS_ERROR. + * + * Read by: + * - All functions that take pointer to lzma_block as argument, + * including lzma_block_header_decode(). + * + * Written by: + * - lzma_block_header_decode() + */ + uint32_t version; + + /** + * \brief Size of the Block Header field + * + * This is always a multiple of four. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_size() + * - lzma_block_buffer_encode() + */ + uint32_t header_size; +# define LZMA_BLOCK_HEADER_SIZE_MIN 8 +# define LZMA_BLOCK_HEADER_SIZE_MAX 1024 + + /** + * \brief Type of integrity Check + * + * The Check ID is not stored into the Block Header, thus its value + * must be provided also when decoding. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_check check; + + /** + * \brief Size of the Compressed Data in bytes + * + * Encoding: If this is not LZMA_VLI_UNKNOWN, Block Header encoder + * will store this value to the Block Header. Block encoder doesn't + * care about this value, but will set it once the encoding has been + * finished. + * + * Decoding: If this is not LZMA_VLI_UNKNOWN, Block decoder will + * verify that the size of the Compressed Data field matches + * compressed_size. + * + * Usually you don't know this value when encoding in streamed mode, + * and thus cannot write this field into the Block Header. + * + * In non-streamed mode you can reserve space for this field before + * encoding the actual Block. After encoding the data, finish the + * Block by encoding the Block Header. Steps in detail: + * + * - Set compressed_size to some big enough value. If you don't know + * better, use LZMA_VLI_MAX, but remember that bigger values take + * more space in Block Header. + * + * - Call lzma_block_header_size() to see how much space you need to + * reserve for the Block Header. + * + * - Encode the Block using lzma_block_encoder() and lzma_code(). + * It sets compressed_size to the correct value. + * + * - Use lzma_block_header_encode() to encode the Block Header. + * Because space was reserved in the first step, you don't need + * to call lzma_block_header_size() anymore, because due to + * reserving, header_size has to be big enough. If it is "too big", + * lzma_block_header_encode() will add enough Header Padding to + * make Block Header to match the size specified by header_size. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed Size in bytes + * + * This is handled very similarly to compressed_size above. + * + * uncompressed_size is needed by fewer functions than + * compressed_size. This is because uncompressed_size isn't + * needed to validate that Block stays within proper limits. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli uncompressed_size; + + /** + * \brief Array of filters + * + * There can be 1-4 filters. The end of the array is marked with + * .id = LZMA_VLI_UNKNOWN. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode(): Note that this does NOT free() + * the old filter options structures. All unused filters[] will + * have .id == LZMA_VLI_UNKNOWN and .options == NULL. If + * decoding fails, all filters[] are guaranteed to be + * LZMA_VLI_UNKNOWN and NULL. + * + * \note Because of the array is terminated with + * .id = LZMA_VLI_UNKNOWN, the actual array must + * have LZMA_FILTERS_MAX + 1 members or the Block + * Header decoder will overflow the buffer. + */ + lzma_filter *filters; + + /** + * \brief Raw value stored in the Check field + * + * After successful coding, the first lzma_check_size(check) bytes + * of this array contain the raw value stored in the Check field. + * + * Note that CRC32 and CRC64 are stored in little endian byte order. + * Take it into account if you display the Check values to the user. + * + * Written by: + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + uint8_t raw_check[LZMA_CHECK_SIZE_MAX]; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + uint32_t reserved_int1; + uint32_t reserved_int2; + lzma_vli reserved_int3; + lzma_vli reserved_int4; + lzma_vli reserved_int5; + lzma_vli reserved_int6; + lzma_vli reserved_int7; + lzma_vli reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + lzma_bool reserved_bool1; + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + +} lzma_block; + + +/** + * \brief Decode the Block Header Size field + * + * To decode Block Header using lzma_block_header_decode(), the size of the + * Block Header has to be known and stored into lzma_block.header_size. + * The size can be calculated from the first byte of a Block using this macro. + * Note that if the first byte is 0x00, it indicates beginning of Index; use + * this macro only when the byte is not 0x00. + * + * There is no encoding macro, because Block Header encoder is enough for that. + */ +#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) + + +/** + * \brief Calculate Block Header Size + * + * Calculate the minimum size needed for the Block Header field using the + * settings specified in the lzma_block structure. Note that it is OK to + * increase the calculated header_size value as long as it is a multiple of + * four and doesn't exceed LZMA_BLOCK_HEADER_SIZE_MAX. Increasing header_size + * just means that lzma_block_header_encode() will add Header Padding. + * + * \return - LZMA_OK: Size calculated successfully and stored to + * block->header_size. + * - LZMA_OPTIONS_ERROR: Unsupported version, filters or + * filter options. + * - LZMA_PROG_ERROR: Invalid values like compressed_size == 0. + * + * \note This doesn't check that all the options are valid i.e. this + * may return LZMA_OK even if lzma_block_header_encode() or + * lzma_block_encoder() would fail. If you want to validate the + * filter chain, consider using lzma_memlimit_encoder() which as + * a side-effect validates the filter chain. + */ +extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Block Header + * + * The caller must have calculated the size of the Block Header already with + * lzma_block_header_size(). If a value larger than the one calculated by + * lzma_block_header_size() is used, the Block Header will be padded to the + * specified size. + * + * \param out Beginning of the output buffer. This must be + * at least block->header_size bytes. + * \param block Block options to be encoded. + * + * \return - LZMA_OK: Encoding was successful. block->header_size + * bytes were written to output buffer. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_encode( + const lzma_block *block, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Block Header + * + * block->version should be set to the highest value supported by the + * application; currently the only possible version is zero. This function + * will set version to the lowest value that still supports all the features + * required by the Block Header. + * + * The size of the Block Header must have already been decoded with + * lzma_block_header_size_decode() macro and stored to block->header_size. + * + * block->filters must have been allocated, but they don't need to be + * initialized (possible existing filter options are not freed). + * + * \param block Destination for Block options. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() (and also free() + * if an error occurs). + * \param in Beginning of the input buffer. This must be + * at least block->header_size bytes. + * + * \return - LZMA_OK: Decoding was successful. block->header_size + * bytes were read from the input buffer. + * - LZMA_OPTIONS_ERROR: The Block Header specifies some + * unsupported options such as unsupported filters. This can + * happen also if block->version was set to a too low value + * compared to what would be required to properly represent + * the information stored in the Block Header. + * - LZMA_DATA_ERROR: Block Header is corrupt, for example, + * the CRC32 doesn't match. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block, + lzma_allocator *allocator, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Validate and set Compressed Size according to Unpadded Size + * + * Block Header stores Compressed Size, but Index has Unpadded Size. If the + * application has already parsed the Index and is now decoding Blocks, + * it can calculate Compressed Size from Unpadded Size. This function does + * exactly that with error checking: + * + * - Compressed Size calculated from Unpadded Size must be positive integer, + * that is, Unpadded Size must be big enough that after Block Header and + * Check fields there's still at least one byte for Compressed Size. + * + * - If Compressed Size was present in Block Header, the new value + * calculated from Unpadded Size is compared against the value + * from Block Header. + * + * \note This function must be called _after_ decoding the Block Header + * field so that it can properly validate Compressed Size if it + * was present in Block Header. + * + * \return - LZMA_OK: block->compressed_size was set successfully. + * - LZMA_DATA_ERROR: unpadded_size is too small compared to + * block->header_size and lzma_check_size(block->check). + * - LZMA_PROG_ERROR: Some values are invalid. For example, + * block->header_size must be a multiple of four and + * between 8 and 1024 inclusive. + */ +extern LZMA_API(lzma_ret) lzma_block_compressed_size( + lzma_block *block, lzma_vli unpadded_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate Unpadded Size + * + * The Index field stores Unpadded Size and Uncompressed Size. The latter + * can be taken directly from the lzma_block structure after coding a Block, + * but Unpadded Size needs to be calculated from Block Header Size, + * Compressed Size, and size of the Check field. This is where this function + * is needed. + * + * \return Unpadded Size on success, or zero on error. + */ +extern LZMA_API(lzma_vli) lzma_block_unpadded_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. + */ +extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Block encoder + * + * Valid actions for lzma_code() are LZMA_RUN, LZMA_SYNC_FLUSH (only if the + * filter chain supports it), and LZMA_FINISH. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_UNSUPPORTED_CHECK: block->check specifies a Check ID + * that is not supported by this buid of liblzma. Initializing + * the encoder failed. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_encoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Block decoder + * + * Valid actions for lzma_code() are LZMA_RUN and LZMA_FINISH. Using + * LZMA_FINISH is not required. It is supported only for convenience. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but + * the given Check ID is not supported, thus Check will be + * ignored. + * - LZMA_PROG_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_decoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate maximum output size for single-call Block encoding + * + * This is equivalent to lzma_stream_buffer_bound() but for .xz Blocks. + * See the documentation of lzma_stream_buffer_bound(). + */ +extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Block encoder + * + * In contrast to the multi-call encoder initialized with + * lzma_block_encoder(), this function encodes also the Block Header. This + * is required to make it possible to write appropriate Block Header also + * in case the data isn't compressible, and different filter chain has to be + * used to encode the data in uncompressed form using uncompressed chunks + * of the LZMA2 filter. + * + * When the data isn't compressible, header_size, compressed_size, and + * uncompressed_size are set just like when the data was compressible, but + * it is possible that header_size is too small to hold the filter chain + * specified in block->filters, because that isn't necessarily the filter + * chain that was actually used to encode the data. lzma_block_unpadded_size() + * still works normally, because it doesn't read the filters array. + * + * \param block Block options: block->version, block->check, + * and block->filters must have been initialized. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_encode( + lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Block decoder + * + * This is single-call equivalent of lzma_block_decoder(), and requires that + * the caller has already decoded Block Header and checked its memory usage. + * + * \param block Block options just like with lzma_block_decoder(). + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_MEM_ERROR + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_decode( + lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow; diff --git a/liblzma/api/lzma/check.h b/liblzma/api/lzma/check.h new file mode 100644 index 0000000..6a243db --- /dev/null +++ b/liblzma/api/lzma/check.h @@ -0,0 +1,150 @@ +/** + * \file lzma/check.h + * \brief Integrity checks + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Type of the integrity check (Check ID) + * + * The .xz format supports multiple types of checks that are calculated + * from the uncompressed data. They vary in both speed and ability to + * detect errors. + */ +typedef enum { + LZMA_CHECK_NONE = 0, + /**< + * No Check is calculated. + * + * Size of the Check field: 0 bytes + */ + + LZMA_CHECK_CRC32 = 1, + /**< + * CRC32 using the polynomial from the IEEE 802.3 standard + * + * Size of the Check field: 4 bytes + */ + + LZMA_CHECK_CRC64 = 4, + /**< + * CRC64 using the polynomial from the ECMA-182 standard + * + * Size of the Check field: 8 bytes + */ + + LZMA_CHECK_SHA256 = 10 + /**< + * SHA-256 + * + * Size of the Check field: 32 bytes + */ +} lzma_check; + + +/** + * \brief Maximum valid Check ID + * + * The .xz file format specification specifies 16 Check IDs (0-15). Some + * of them are only reserved, that is, no actual Check algorithm has been + * assigned. When decoding, liblzma still accepts unknown Check IDs for + * future compatibility. If a valid but unsupported Check ID is detected, + * liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK, + * LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK in container.h. + */ +#define LZMA_CHECK_ID_MAX 15 + + +/** + * \brief Test if the given Check ID is supported + * + * Return true if the given Check ID is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * is not in the range [0, 15]; in that case the return value is always false. + * + * You can assume that LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always + * supported (even if liblzma is built with limited features). + */ +extern LZMA_API(lzma_bool) lzma_check_is_supported(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Get the size of the Check field with the given Check ID + * + * Although not all Check IDs have a check algorithm associated, the size of + * every Check is already frozen. This function returns the size (in bytes) of + * the Check field with the specified Check ID. The values are: + * { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 } + * + * If the argument is not in the range [0, 15], UINT32_MAX is returned. + */ +extern LZMA_API(uint32_t) lzma_check_size(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Maximum size of a Check field + */ +#define LZMA_CHECK_SIZE_MAX 64 + + +/** + * \brief Calculate CRC32 + * + * Calculate CRC32 using the polynomial from the IEEE 802.3 standard. + * + * \param buf Pointer to the input buffer + * \param size Size of the input buffer + * \param crc Previously returned CRC value. This is used to + * calculate the CRC of a big buffer in smaller chunks. + * Set to zero when starting a new calculation. + * + * \return Updated CRC value, which can be passed to this function + * again to continue CRC calculation. + */ +extern LZMA_API(uint32_t) lzma_crc32( + const uint8_t *buf, size_t size, uint32_t crc) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate CRC64 + * + * Calculate CRC64 using the polynomial from the ECMA-182 standard. + * + * This function is used similarly to lzma_crc32(). See its documentation. + */ +extern LZMA_API(uint64_t) lzma_crc64( + const uint8_t *buf, size_t size, uint64_t crc) + lzma_nothrow lzma_attr_pure; + + +/* + * SHA-256 functions are currently not exported to public API. + * Contact Lasse Collin if you think it should be. + */ + + +/** + * \brief Get the type of the integrity check + * + * This function can be called only immediately after lzma_code() has + * returned LZMA_NO_CHECK, LZMA_UNSUPPORTED_CHECK, or LZMA_GET_CHECK. + * Calling this function in any other situation has undefined behavior. + */ +extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm) + lzma_nothrow; diff --git a/liblzma/api/lzma/container.h b/liblzma/api/lzma/container.h new file mode 100644 index 0000000..83e70b4 --- /dev/null +++ b/liblzma/api/lzma/container.h @@ -0,0 +1,412 @@ +/** + * \file lzma/container.h + * \brief File formats + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/************ + * Encoding * + ************/ + +/** + * \brief Default compression preset + * + * It's not straightforward to recommend a default preset, because in some + * cases keeping the resource usage relatively low is more important that + * getting the maximum compression ratio. + */ +#define LZMA_PRESET_DEFAULT UINT32_C(6) + + +/** + * \brief Mask for preset level + * + * This is useful only if you need to extract the level from the preset + * variable. That should be rare. + */ +#define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F) + + +/* + * Preset flags + * + * Currently only one flag is defined. + */ + +/** + * \brief Extreme compression preset + * + * This flag modifies the preset to make the encoding significantly slower + * while improving the compression ratio only marginally. This is useful + * when you don't mind wasting time to get as small result as possible. + * + * This flag doesn't affect the memory usage requirements of the decoder (at + * least not significantly). The memory usage of the encoder may be increased + * a little but only at the lowest preset levels (0-3). + */ +#define LZMA_PRESET_EXTREME (UINT32_C(1) << 31) + + +/** + * \brief Calculate approximate memory usage of easy encoder + * + * This function is a wrapper for lzma_raw_encoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + */ +extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate approximate decoder memory usage of a preset + * + * This function is a wrapper for lzma_raw_decoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + */ +extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Stream encoder using a preset number + * + * This function is intended for those who just want to use the basic features + * if liblzma (that is, most developers out there). + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param preset Compression preset to use. A preset consist of level + * number and zero or more flags. Usually flags aren't + * used, so preset is simply a number [0, 9] which match + * the options -0 ... -9 of the xz command line tool. + * Additional flags can be be set using bitwise-or with + * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME. + * \param check Integrity check type to use. See check.h for available + * checks. The xz command line tool defaults to + * LZMA_CHECK_CRC64, which is a good choice if you are + * unsure. LZMA_CHECK_CRC32 is good too as long as the + * uncompressed file is not many gigabytes. + * + * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to + * encode your data. + * - LZMA_MEM_ERROR: Memory allocation failed. + * - LZMA_OPTIONS_ERROR: The given compression preset is not + * supported by this build of liblzma. + * - LZMA_UNSUPPORTED_CHECK: The given check type is not + * supported by this liblzma build. + * - LZMA_PROG_ERROR: One or more of the parameters have values + * that will never be valid. For example, strm == NULL. + * + * If initialization fails (return value is not LZMA_OK), all the memory + * allocated for *strm by liblzma is always freed. Thus, there is no need + * to call lzma_end() after failed initialization. + * + * If initialization succeeds, use lzma_code() to do the actual encoding. + * Valid values for `action' (the second argument of lzma_code()) are + * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, + * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. + */ +extern LZMA_API(lzma_ret) lzma_easy_encoder( + lzma_stream *strm, uint32_t preset, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream encoding using a preset number + * + * The maximum required output buffer size can be calculated with + * lzma_stream_buffer_bound(). + * + * \param preset Compression preset to use. See the description + * in lzma_easy_encoder(). + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_easy_buffer_encode( + uint32_t preset, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Initialize .xz Stream encoder using a custom filter chain + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h for + * more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma encoder (legacy file format) + * + * The .lzma format is sometimes called the LZMA_Alone format, which is the + * reason for the name of this function. The .lzma format supports only the + * LZMA1 filter. There is no support for integrity checks like CRC32. + * + * Use this function if and only if you need to create files readable by + * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format + * is strongly recommended. + * + * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * No kind of flushing is supported, because the file format doesn't make + * it possible. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_encoder( + lzma_stream *strm, const lzma_options_lzma *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate output buffer size for single-call Stream encoder + * + * When trying to compress uncompressible data, the encoded size will be + * slightly bigger than the input data. This function calculates how much + * output buffer space is required to be sure that lzma_stream_buffer_encode() + * doesn't return LZMA_BUF_ERROR. + * + * The calculated value is not exact, but it is guaranteed to be big enough. + * The actual maximum output space required may be slightly smaller (up to + * about 100 bytes). This should not be a problem in practice. + * + * If the calculated maximum size doesn't fit into size_t or would make the + * Stream grow past LZMA_VLI_MAX (which should never happen in practice), + * zero is returned to indicate the error. + * + * \note The limit calculated by this function applies only to + * single-call encoding. Multi-call encoding may (and probably + * will) have larger maximum expansion when encoding + * uncompressible data. Currently there is no function to + * calculate the maximum expansion of multi-call encoding. + */ +extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Stream encoder + * + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h + * for more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_encode( + lzma_filter *filters, lzma_check check, + lzma_allocator *allocator, const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/************ + * Decoding * + ************/ + +/** + * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream + * being decoded has no integrity check. Note that when used with + * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK + * if LZMA_TELL_NO_CHECK is used. + */ +#define LZMA_TELL_NO_CHECK UINT32_C(0x01) + + +/** + * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input + * stream has an integrity check, but the type of the integrity check is not + * supported by this liblzma version or build. Such files can still be + * decoded, but the integrity check cannot be verified. + */ +#define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02) + + +/** + * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type + * of the integrity check is known. The type can then be got with + * lzma_get_check(). + */ +#define LZMA_TELL_ANY_CHECK UINT32_C(0x04) + + +/** + * This flag enables decoding of concatenated files with file formats that + * allow concatenating compressed files as is. From the formats currently + * supported by liblzma, only the .xz format allows concatenated files. + * Concatenated files are not allowed with the legacy .lzma format. + * + * This flag also affects the usage of the `action' argument for lzma_code(). + * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END + * unless LZMA_FINISH is used as `action'. Thus, the application has to set + * LZMA_FINISH in the same way as it does when encoding. + * + * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH + * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required. + */ +#define LZMA_CONCATENATED UINT32_C(0x08) + + +/** + * \brief Initialize .xz Stream decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode .xz Streams and .lzma files with autodetection + * + * This decoder autodetects between the .xz and .lzma file formats, and + * calls lzma_stream_decoder() or lzma_alone_decoder() once the type + * of the input file has been detected. + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. + * \param flags Bitwise-or of flags, or zero for no flags. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_auto_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma decoder (legacy file format) + * + * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * There is no need to use LZMA_FINISH, but allowing it may simplify + * certain types of applications. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_decoder( + lzma_stream *strm, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream decoder + * + * \param memlimit Pointer to how much memory the decoder is allowed + * to allocate. The value pointed by this pointer is + * modified if and only if LZMA_MEMLIMIT_ERROR is + * returned. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK + * is not allowed and will return LZMA_PROG_ERROR. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if decoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_NO_CHECK: This can be returned only if using + * the LZMA_TELL_NO_CHECK flag. + * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using + * the LZMA_TELL_UNSUPPORTED_CHECK flag. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( + uint64_t *memlimit, uint32_t flags, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/liblzma/api/lzma/delta.h b/liblzma/api/lzma/delta.h new file mode 100644 index 0000000..592fc4f --- /dev/null +++ b/liblzma/api/lzma/delta.h @@ -0,0 +1,77 @@ +/** + * \file lzma/delta.h + * \brief Delta filter + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Delta filter. This is used as lzma_filter.id. + */ +#define LZMA_FILTER_DELTA LZMA_VLI_C(0x03) + + +/** + * \brief Type of the delta calculation + * + * Currently only byte-wise delta is supported. Other possible types could + * be, for example, delta of 16/32/64-bit little/big endian integers, but + * these are not currently planned since byte-wise delta is almost as good. + */ +typedef enum { + LZMA_DELTA_TYPE_BYTE +} lzma_delta_type; + + +/** + * \brief Options for the Delta filter + * + * These options are needed by both encoder and decoder. + */ +typedef struct { + /** For now, this must always be LZMA_DELTA_TYPE_BYTE. */ + lzma_delta_type type; + + /** + * \brief Delta distance + * + * With the only currently supported type, LZMA_DELTA_TYPE_BYTE, + * the distance is as bytes. + * + * Examples: + * - 16-bit stereo audio: distance = 4 bytes + * - 24-bit RGB image data: distance = 3 bytes + */ + uint32_t dist; +# define LZMA_DELTA_DIST_MIN 1 +# define LZMA_DELTA_DIST_MAX 256 + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * when type is LZMA_DELTA_TYPE_BYTE, so it is safe to leave these + * uninitialized. + */ + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + void *reserved_ptr1; + void *reserved_ptr2; + +} lzma_options_delta; diff --git a/liblzma/api/lzma/filter.h b/liblzma/api/lzma/filter.h new file mode 100644 index 0000000..efd036f --- /dev/null +++ b/liblzma/api/lzma/filter.h @@ -0,0 +1,420 @@ +/** + * \file lzma/filter.h + * \brief Common filter related types and functions + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum number of filters in a chain + * + * A filter chain can have 1-4 filters, of which three are allowed to change + * the size of the data. Usually only one or two filters are needed. + */ +#define LZMA_FILTERS_MAX 4 + + +/** + * \brief Filter options + * + * This structure is used to pass Filter ID and a pointer filter's + * options to liblzma. A few functions work with a single lzma_filter + * structure, while most functions expect a filter chain. + * + * A filter chain is indicated with an array of lzma_filter structures. + * The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter + * array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to + * be able to hold any arbitrary filter chain. This is important when + * using lzma_block_header_decode() from block.h, because too small + * array would make liblzma write past the end of the filters array. + */ +typedef struct { + /** + * \brief Filter ID + * + * Use constants whose name begin with `LZMA_FILTER_' to specify + * different filters. In an array of lzma_filter structures, use + * LZMA_VLI_UNKNOWN to indicate end of filters. + * + * \note This is not an enum, because on some systems enums + * cannot be 64-bit. + */ + lzma_vli id; + + /** + * \brief Pointer to filter-specific options structure + * + * If the filter doesn't need options, set this to NULL. If id is + * set to LZMA_VLI_UNKNOWN, options is ignored, and thus + * doesn't need be initialized. + */ + void *options; + +} lzma_filter; + + +/** + * \brief Test if the given Filter ID is supported for encoding + * + * Return true if the give Filter ID is supported for encoding by this + * liblzma build. Otherwise false is returned. + * + * There is no way to list which filters are available in this particular + * liblzma version and build. It would be useless, because the application + * couldn't know what kind of options the filter would need. + */ +extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Test if the given Filter ID is supported for decoding + * + * Return true if the give Filter ID is supported for decoding by this + * liblzma build. Otherwise false is returned. + */ +extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Copy the filters array + * + * Copy the Filter IDs and filter-specific options from src to dest. + * Up to LZMA_FILTERS_MAX filters are copied, plus the terminating + * .id == LZMA_VLI_UNKNOWN. Thus, dest should have at least + * LZMA_FILTERS_MAX + 1 elements space unless the caller knows that + * src is smaller than that. + * + * Unless the filter-specific options is NULL, the Filter ID has to be + * supported by liblzma, because liblzma needs to know the size of every + * filter-specific options structure. The filter-specific options are not + * validated. If options is NULL, any unsupported Filter IDs are copied + * without returning an error. + * + * Old filter-specific options in dest are not freed, so dest doesn't + * need to be initialized by the caller in any way. + * + * If an error occurs, memory possibly already allocated by this function + * is always freed. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR: Unsupported Filter ID and its options + * is not NULL. + * - LZMA_PROG_ERROR: src or dest is NULL. + */ +extern LZMA_API(lzma_ret) lzma_filters_copy(const lzma_filter *src, + lzma_filter *dest, lzma_allocator *allocator) lzma_nothrow; + + +/** + * \brief Calculate approximate memory requirements for raw encoder + * + * This function can be used to calculate the memory requirements for + * Block and Stream encoders too because Block and Stream encoders don't + * need significantly more memory than raw encoder. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Number of bytes of memory required for the given + * filter chain when encoding. + */ +extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate approximate memory requirements for raw decoder + * + * This function can be used to calculate the memory requirements for + * Block and Stream decoders too because Block and Stream decoders don't + * need significantly more memory than raw decoder. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Number of bytes of memory required for the given + * filter chain when decoding. + */ +extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize raw encoder + * + * This function may be useful when implementing custom file formats. + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * + * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the + * filter chain supports it), or LZMA_FINISH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_encoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize raw decoder + * + * The initialization of raw decoder goes similarly to raw encoder. + * + * The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using + * LZMA_FINISH is not required, it is supported just for convenience. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_decoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Update the filter chain in the encoder + * + * This function is for advanced users only. This function has two slightly + * different purposes: + * + * - After LZMA_FULL_FLUSH when using Stream encoder: Set a new filter + * chain, which will be used starting from the next Block. + * + * - After LZMA_SYNC_FLUSH using Raw, Block, or Stream encoder: Change + * the filter-specific options in the middle of encoding. The actual + * filters in the chain (Filter IDs) cannot be changed. In the future, + * it might become possible to change the filter options without + * using LZMA_SYNC_FLUSH. + * + * While rarely useful, this function may be called also when no data has + * been compressed yet. In that case, this function will behave as if + * LZMA_FULL_FLUSH (Stream encoder) or LZMA_SYNC_FLUSH (Raw or Block + * encoder) had been used right before calling this function. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filters_update( + lzma_stream *strm, const lzma_filter *filters) lzma_nothrow; + + +/** + * \brief Single-call raw encoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + * + * \note There is no function to calculate how big output buffer + * would surely be big enough. (lzma_stream_buffer_bound() + * works only for lzma_stream_buffer_encode(); raw encoder + * won't necessarily meet that bound.) + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_encode( + const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, + size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call raw decoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_decode( + const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Get the size of the Filter Properties field + * + * This function may be useful when implementing custom file formats + * using the raw encoder and decoder. + * + * \param size Pointer to uint32_t to hold the size of the properties + * \param filter Filter ID and options (the size of the properties may + * vary depending on the options) + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note This function validates the Filter ID, but does not + * necessarily validate the options. Thus, it is possible + * that this returns LZMA_OK while the following call to + * lzma_properties_encode() returns LZMA_OPTIONS_ERROR. + */ +extern LZMA_API(lzma_ret) lzma_properties_size( + uint32_t *size, const lzma_filter *filter) lzma_nothrow; + + +/** + * \brief Encode the Filter Properties field + * + * \param filter Filter ID and options + * \param props Buffer to hold the encoded options. The size of + * buffer must have been already determined with + * lzma_properties_size(). + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note Even this function won't validate more options than actually + * necessary. Thus, it is possible that encoding the properties + * succeeds but using the same options to initialize the encoder + * will fail. + * + * \note If lzma_properties_size() indicated that the size + * of the Filter Properties field is zero, calling + * lzma_properties_encode() is not required, but it + * won't do any harm either. + */ +extern LZMA_API(lzma_ret) lzma_properties_encode( + const lzma_filter *filter, uint8_t *props) lzma_nothrow; + + +/** + * \brief Decode the Filter Properties field + * + * \param filter filter->id must have been set to the correct + * Filter ID. filter->options doesn't need to be + * initialized (it's not freed by this function). The + * decoded options will be stored to filter->options. + * filter->options is set to NULL if there are no + * properties or if an error occurs. + * \param allocator Custom memory allocator used to allocate the + * options. Set to NULL to use the default malloc(), + * and in case of an error, also free(). + * \param props Input buffer containing the properties. + * \param props_size Size of the properties. This must be the exact + * size; giving too much or too little input will + * return LZMA_OPTIONS_ERROR. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_properties_decode( + lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) lzma_nothrow; + + +/** + * \brief Calculate encoded size of a Filter Flags field + * + * Knowing the size of Filter Flags is useful to know when allocating + * memory to hold the encoded Filter Flags. + * + * \param size Pointer to integer to hold the calculated size + * \param filter Filter ID and associated options whose encoded + * size is to be calculated + * + * \return - LZMA_OK: *size set successfully. Note that this doesn't + * guarantee that filter->options is valid, thus + * lzma_filter_flags_encode() may still fail. + * - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options. + * - LZMA_PROG_ERROR: Invalid options + * + * \note If you need to calculate size of List of Filter Flags, + * you need to loop over every lzma_filter entry. + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_size( + uint32_t *size, const lzma_filter *filter) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Filter Flags into given buffer + * + * In contrast to some functions, this doesn't allocate the needed buffer. + * This is due to how this function is used internally by liblzma. + * + * \param filter Filter ID and options to be encoded + * \param out Beginning of the output buffer + * \param out_pos out[*out_pos] is the next write position. This + * is updated by the encoder. + * \param out_size out[out_size] is the first byte to not write. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid options or not enough output + * buffer space (you should have checked it with + * lzma_filter_flags_size()). + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filter, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Filter Flags from given buffer + * + * The decoded result is stored into *filter. The old value of + * filter->options is not free()d. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_decode( + lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/liblzma/api/lzma/hardware.h b/liblzma/api/lzma/hardware.h new file mode 100644 index 0000000..e7dd03c --- /dev/null +++ b/liblzma/api/lzma/hardware.h @@ -0,0 +1,50 @@ +/** + * \file lzma/hardware.h + * \brief Hardware information + * + * Since liblzma can consume a lot of system resources, it also provides + * ways to limit the resource usage. Applications linking against liblzma + * need to do the actual decisions how much resources to let liblzma to use. + * To ease making these decisions, liblzma provides functions to find out + * the relevant capabilities of the underlaying hardware. Currently there + * is only a function to find out the amount of RAM, but in the future there + * will be also a function to detect how many concurrent threads the system + * can run. + * + * \note On some operating systems, these function may temporarily + * load a shared library or open file descriptor(s) to find out + * the requested hardware information. Unless the application + * assumes that specific file descriptors are not touched by + * other threads, this should have no effect on thread safety. + * Possible operations involving file descriptors will restart + * the syscalls if they return EINTR. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Get the total amount of physical memory (RAM) in bytes + * + * This function may be useful when determining a reasonable memory + * usage limit for decompressing or how much memory it is OK to use + * for compressing. + * + * \return On success, the total amount of physical memory in bytes + * is returned. If the amount of RAM cannot be determined, + * zero is returned. This can happen if an error occurs + * or if there is no code in liblzma to detect the amount + * of RAM on the specific operating system. + */ +extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow; diff --git a/liblzma/api/lzma/index.h b/liblzma/api/lzma/index.h new file mode 100644 index 0000000..16bacc2 --- /dev/null +++ b/liblzma/api/lzma/index.h @@ -0,0 +1,682 @@ +/** + * \file lzma/index.h + * \brief Handling of .xz Index and related information + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Opaque data type to hold the Index(es) and other information + * + * lzma_index often holds just one .xz Index and possibly the Stream Flags + * of the same Stream and size of the Stream Padding field. However, + * multiple lzma_indexes can be concatenated with lzma_index_cat() and then + * there may be information about multiple Streams in the same lzma_index. + * + * Notes about thread safety: Only one thread may modify lzma_index at + * a time. All functions that take non-const pointer to lzma_index + * modify it. As long as no thread is modifying the lzma_index, getting + * information from the same lzma_index can be done from multiple threads + * at the same time with functions that take a const pointer to + * lzma_index or use lzma_index_iter. The same iterator must be used + * only by one thread at a time, of course, but there can be as many + * iterators for the same lzma_index as needed. + */ +typedef struct lzma_index_s lzma_index; + + +/** + * \brief Iterator to get information about Blocks and Streams + */ +typedef struct { + struct { + /** + * \brief Pointer to Stream Flags + * + * This is NULL if Stream Flags have not been set for + * this Stream with lzma_index_stream_flags(). + */ + const lzma_stream_flags *flags; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + + /** + * \brief Stream number in the lzma_index + * + * The first Stream is 1. + */ + lzma_vli number; + + /** + * \brief Number of Blocks in the Stream + * + * If this is zero, the block structure below has + * undefined values. + */ + lzma_vli block_count; + + /** + * \brief Compressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli compressed_offset; + + /** + * \brief Uncompressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli uncompressed_offset; + + /** + * \brief Compressed size of this Stream + * + * This includes all headers except the possible + * Stream Padding after this Stream. + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed size of this Stream + */ + lzma_vli uncompressed_size; + + /** + * \brief Size of Stream Padding after this Stream + * + * If it hasn't been set with lzma_index_stream_padding(), + * this defaults to zero. Stream Padding is always + * a multiple of four bytes. + */ + lzma_vli padding; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + } stream; + + struct { + /** + * \brief Block number in the file + * + * The first Block is 1. + */ + lzma_vli number_in_file; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the + * lzma_index (i.e. usually the beginning of the .xz file). + * Normally this is where you should seek in the .xz file + * to start decompressing this Block. + */ + lzma_vli compressed_file_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + * + * When doing random-access reading, it is possible that + * the target offset is not exactly at Block boundary. One + * will need to compare the target offset against + * uncompressed_file_offset or uncompressed_stream_offset, + * and possibly decode and throw away some amount of data + * before reaching the target offset. + */ + lzma_vli uncompressed_file_offset; + + /** + * \brief Block number in this Stream + * + * The first Block is 1. + */ + lzma_vli number_in_stream; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli compressed_stream_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli uncompressed_stream_offset; + + /** + * \brief Uncompressed size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. It will allow the Block decoder to + * validate the uncompressed size. + */ + lzma_vli uncompressed_size; + + /** + * \brief Unpadded size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. It will allow the Block decoder to + * validate the unpadded size. + */ + lzma_vli unpadded_size; + + /** + * \brief Total compressed size + * + * This includes all headers and padding in this Block. + * This is useful if you need to know how many bytes + * the Block decoder will actually read. + */ + lzma_vli total_size; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + const void *reserved_ptr4; + } block; + + /* + * Internal data which is used to store the state of the iterator. + * The exact format may vary between liblzma versions, so don't + * touch these in any way. + */ + union { + const void *p; + size_t s; + lzma_vli v; + } internal[6]; +} lzma_index_iter; + + +/** + * \brief Operation mode for lzma_index_iter_next() + */ +typedef enum { + LZMA_INDEX_ITER_ANY = 0, + /**< + * \brief Get the next Block or Stream + * + * Go to the next Block if the current Stream has at least + * one Block left. Otherwise go to the next Stream even if + * it has no Blocks. If the Stream has no Blocks + * (lzma_index_iter.stream.block_count == 0), + * lzma_index_iter.block will have undefined values. + */ + + LZMA_INDEX_ITER_STREAM = 1, + /**< + * \brief Get the next Stream + * + * Go to the next Stream even if the current Stream has + * unread Blocks left. If the next Stream has at least one + * Block, the iterator will point to the first Block. + * If there are no Blocks, lzma_index_iter.block will have + * undefined values. + */ + + LZMA_INDEX_ITER_BLOCK = 2, + /**< + * \brief Get the next Block + * + * Go to the next Block if the current Stream has at least + * one Block left. If the current Stream has no Blocks left, + * the next Stream with at least one Block is located and + * the iterator will be made to point to the first Block of + * that Stream. + */ + + LZMA_INDEX_ITER_NONEMPTY_BLOCK = 3 + /**< + * \brief Get the next non-empty Block + * + * This is like LZMA_INDEX_ITER_BLOCK except that it will + * skip Blocks whose Uncompressed Size is zero. + */ + +} lzma_index_iter_mode; + + +/** + * \brief Calculate memory usage of lzma_index + * + * On disk, the size of the Index field depends on both the number of Records + * stored and how big values the Records store (due to variable-length integer + * encoding). When the Index is kept in lzma_index structure, the memory usage + * depends only on the number of Records/Blocks stored in the Index(es), and + * in case of concatenated lzma_indexes, the number of Streams. The size in + * RAM is almost always significantly bigger than in the encoded form on disk. + * + * This function calculates an approximate amount of memory needed hold + * the given number of Streams and Blocks in lzma_index structure. This + * value may vary between CPU architectures and also between liblzma versions + * if the internal implementation is modified. + */ +extern LZMA_API(uint64_t) lzma_index_memusage( + lzma_vli streams, lzma_vli blocks) lzma_nothrow; + + +/** + * \brief Calculate the memory usage of an existing lzma_index + * + * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i), + * lzma_index_block_count(i)). + */ +extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) + lzma_nothrow; + + +/** + * \brief Allocate and initialize a new lzma_index structure + * + * \return On success, a pointer to an empty initialized lzma_index is + * returned. If allocation fails, NULL is returned. + */ +extern LZMA_API(lzma_index *) lzma_index_init(lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Deallocate lzma_index + * + * If i is NULL, this does nothing. + */ +extern LZMA_API(void) lzma_index_end(lzma_index *i, lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Block to lzma_index + * + * \param i Pointer to a lzma_index structure + * \param allocator Pointer to lzma_allocator, or NULL to + * use malloc() + * \param unpadded_size Unpadded Size of a Block. This can be + * calculated with lzma_block_unpadded_size() + * after encoding or decoding the Block. + * \param uncompressed_size Uncompressed Size of a Block. This can be + * taken directly from lzma_block structure + * after encoding or decoding the Block. + * + * Appending a new Block does not invalidate iterators. For example, + * if an iterator was pointing to the end of the lzma_index, after + * lzma_index_append() it is possible to read the next Block with + * an existing iterator. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_append( + lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Set the Stream Flags + * + * Set the Stream Flags of the last (and typically the only) Stream + * in lzma_index. This can be useful when reading information from the + * lzma_index, because to decode Blocks, knowing the integrity check type + * is needed. + * + * The given Stream Flags are copied into internal preallocated structure + * in the lzma_index, thus the caller doesn't need to keep the *stream_flags + * available after calling this function. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_flags( + lzma_index *i, const lzma_stream_flags *stream_flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the types of integrity Checks + * + * If lzma_index_stream_flags() is used to set the Stream Flags for + * every Stream, lzma_index_checks() can be used to get a bitmask to + * indicate which Check types have been used. It can be useful e.g. if + * showing the Check types to the user. + * + * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. + */ +extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the amount of Stream Padding + * + * Set the amount of Stream Padding of the last (and typically the only) + * Stream in the lzma_index. This is needed when planning to do random-access + * reading within multiple concatenated Streams. + * + * By default, the amount of Stream Padding is assumed to be zero bytes. + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: The file size would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_padding( + lzma_index *i, lzma_vli stream_padding) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the number of Streams + */ +extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the number of Blocks + * + * This returns the total number of Blocks in lzma_index. To get number + * of Blocks in individual Streams, use lzma_index_iter. + */ +extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Stream + * + * If multiple lzma_indexes have been combined, this works as if the Blocks + * were in a single Stream. This is useful if you are going to combine + * Blocks from multiple Streams into a single new Stream. + */ +extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Blocks + * + * This doesn't include the Stream Header, Stream Footer, Stream Padding, + * or Index fields. + */ +extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the file + * + * When no lzma_indexes have been combined with lzma_index_cat() and there is + * no Stream Padding, this function is identical to lzma_index_stream_size(). + * If multiple lzma_indexes have been combined, this includes also the headers + * of each separate Stream and the possible Stream Padding fields. + */ +extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the uncompressed size of the file + */ +extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize an iterator + * + * \param iter Pointer to a lzma_index_iter structure + * \param i lzma_index to which the iterator will be associated + * + * This function associates the iterator with the given lzma_index, and calls + * lzma_index_iter_rewind() on the iterator. + * + * This function doesn't allocate any memory, thus there is no + * lzma_index_iter_end(). The iterator is valid as long as the + * associated lzma_index is valid, that is, until lzma_index_end() or + * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't + * become invalid if new Blocks are added to it with lzma_index_append() or + * if it is used as the destination in lzma_index_cat(). + * + * It is safe to make copies of an initialized lzma_index_iter, for example, + * to easily restart reading at some particular position. + */ +extern LZMA_API(void) lzma_index_iter_init( + lzma_index_iter *iter, const lzma_index *i) lzma_nothrow; + + +/** + * \brief Rewind the iterator + * + * Rewind the iterator so that next call to lzma_index_iter_next() will + * return the first Block or Stream. + */ +extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) + lzma_nothrow; + + +/** + * \brief Get the next Block or Stream + * + * \param iter Iterator initialized with lzma_index_iter_init() + * \param mode Specify what kind of information the caller wants + * to get. See lzma_index_iter_mode for details. + * + * \return If next Block or Stream matching the mode was found, *iter + * is updated and this function returns false. If no Block or + * Stream matching the mode is found, *iter is not modified + * and this function returns true. If mode is set to an unknown + * value, *iter is not modified and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_next( + lzma_index_iter *iter, lzma_index_iter_mode mode) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Locate a Block + * + * If it is possible to seek in the .xz file, it is possible to parse + * the Index field(s) and use lzma_index_iter_locate() to do random-access + * reading with granularity of Block size. + * + * \param iter Iterator that was earlier initialized with + * lzma_index_iter_init(). + * \param target Uncompressed target offset which the caller would + * like to locate from the Stream + * + * If the target is smaller than the uncompressed size of the Stream (can be + * checked with lzma_index_uncompressed_size()): + * - Information about the Stream and Block containing the requested + * uncompressed offset is stored into *iter. + * - Internal state of the iterator is adjusted so that + * lzma_index_iter_next() can be used to read subsequent Blocks or Streams. + * - This function returns false. + * + * If target is greater than the uncompressed size of the Stream, *iter + * is not modified, and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_locate( + lzma_index_iter *iter, lzma_vli target) lzma_nothrow; + + +/** + * \brief Concatenate lzma_indexes + * + * Concatenating lzma_indexes is useful when doing random-access reading in + * multi-Stream .xz file, or when combining multiple Streams into single + * Stream. + * + * \param dest lzma_index after which src is appended + * \param src lzma_index to be appended after dest. If this + * function succeeds, the memory allocated for src + * is freed or moved to be part of dest, and all + * iterators pointing to src will become invalid. + * \param allocator Custom memory allocator; can be NULL to use + * malloc() and free(). + * + * \return - LZMA_OK: lzma_indexes were concatenated successfully. + * src is now a dangling pointer. + * - LZMA_DATA_ERROR: *dest would grow too big. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_cat( + lzma_index *dest, lzma_index *src, lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Duplicate lzma_index + * + * \return A copy of the lzma_index, or NULL if memory allocation failed. + */ +extern LZMA_API(lzma_index *) lzma_index_dup( + const lzma_index *i, lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index encoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i Pointer to lzma_index which should be encoded. + * + * The valid `action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * It is enough to use only one of them (you can choose freely; use LZMA_RUN + * to support liblzma versions older than 5.0.0). + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_encoder( + lzma_stream *strm, const lzma_index *i) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i The decoded Index will be made available via + * this pointer. Initially this function will + * set *i to NULL (the old value is ignored). If + * decoding succeeds (lzma_code() returns + * LZMA_STREAM_END), *i will be set to point + * to a new lzma_index, which the application + * has to later free with lzma_index_end(). + * \param memlimit How much memory the resulting lzma_index is + * allowed to require. + * + * The valid `action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * It is enough to use only one of them (you can choose freely; use LZMA_RUN + * to support liblzma versions older than 5.0.0). + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_decoder( + lzma_stream *strm, lzma_index **i, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Index encoder + * + * \param i lzma_index to be encoded + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Output buffer is too small. Use + * lzma_index_size() to find out how much output + * space is needed. + * - LZMA_PROG_ERROR + * + * \note This function doesn't take allocator argument since all + * the internal data is allocated on stack. + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call .xz Index decoder + * + * \param i If decoding succeeds, *i will point to a new + * lzma_index, which the application has to + * later free with lzma_index_end(). If an error + * occurs, *i will be NULL. The old value of *i + * is always ignored and thus doesn't need to be + * initialized by the caller. + * \param memlimit Pointer to how much memory the resulting + * lzma_index is allowed to require. The value + * pointed by this pointer is modified if and only + * if LZMA_MEMLIMIT_ERROR is returned. + * \param allocator Pointer to lzma_allocator, or NULL to use malloc() + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, + uint64_t *memlimit, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; diff --git a/liblzma/api/lzma/index_hash.h b/liblzma/api/lzma/index_hash.h new file mode 100644 index 0000000..fa2e048 --- /dev/null +++ b/liblzma/api/lzma/index_hash.h @@ -0,0 +1,107 @@ +/** + * \file lzma/index_hash.h + * \brief Validate Index by using a hash function + * + * Hashing makes it possible to use constant amount of memory to validate + * Index of arbitrary size. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + +/** + * \brief Opaque data type to hold the Index hash + */ +typedef struct lzma_index_hash_s lzma_index_hash; + + +/** + * \brief Allocate and initialize a new lzma_index_hash structure + * + * If index_hash is NULL, a new lzma_index_hash structure is allocated, + * initialized, and a pointer to it returned. If allocation fails, NULL + * is returned. + * + * If index_hash is non-NULL, it is reinitialized and the same pointer + * returned. In this case, return value cannot be NULL or a different + * pointer than the index_hash that was given as an argument. + */ +extern LZMA_API(lzma_index_hash *) lzma_index_hash_init( + lzma_index_hash *index_hash, lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Deallocate lzma_index_hash structure + */ +extern LZMA_API(void) lzma_index_hash_end( + lzma_index_hash *index_hash, lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Record to an Index hash + * + * \param index Pointer to a lzma_index_hash structure + * \param unpadded_size Unpadded Size of a Block + * \param uncompressed_size Uncompressed Size of a Block + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR: Invalid arguments or this function is being + * used when lzma_index_hash_decode() has already been used. + */ +extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode and validate the Index field + * + * After telling the sizes of all Blocks with lzma_index_hash_append(), + * the actual Index field is decoded with this function. Specifically, + * once decoding of the Index field has been started, no more Records + * can be added using lzma_index_hash_append(). + * + * This function doesn't use lzma_stream structure to pass the input data. + * Instead, the input buffer is specified using three arguments. This is + * because it matches better the internal APIs of liblzma. + * + * \param index_hash Pointer to a lzma_index_hash structure + * \param in Pointer to the beginning of the input buffer + * \param in_pos in[*in_pos] is the next byte to process + * \param in_size in[in_size] is the first byte not to process + * + * \return - LZMA_OK: So far good, but more input is needed. + * - LZMA_STREAM_END: Index decoded successfully and it matches + * the Records given with lzma_index_hash_append(). + * - LZMA_DATA_ERROR: Index is corrupt or doesn't match the + * information given with lzma_index_hash_append(). + * - LZMA_BUF_ERROR: Cannot progress because *in_pos >= in_size. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_hash_size( + const lzma_index_hash *index_hash) + lzma_nothrow lzma_attr_pure; diff --git a/liblzma/api/lzma/lzma.h b/liblzma/api/lzma/lzma.h new file mode 100644 index 0000000..8d5fdb6 --- /dev/null +++ b/liblzma/api/lzma/lzma.h @@ -0,0 +1,417 @@ +/** + * \file lzma/lzma.h + * \brief LZMA1 and LZMA2 filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief LZMA1 Filter ID + * + * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, + * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from + * accidentally using LZMA when they actually want LZMA2. + * + * LZMA1 shouldn't be used for new applications unless you _really_ know + * what you are doing. LZMA2 is almost always a better choice. + */ +#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) + +/** + * \brief LZMA2 Filter ID + * + * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds + * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion + * when trying to compress uncompressible data), possibility to change + * lc/lp/pb in the middle of encoding, and some other internal improvements. + */ +#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) + + +/** + * \brief Match finders + * + * Match finder has major effect on both speed and compression ratio. + * Usually hash chains are faster than binary trees. + * + * If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better + * choice, because binary trees get much higher compression ratio penalty + * with LZMA_SYNC_FLUSH. + * + * The memory usage formulas are only rough estimates, which are closest to + * reality when dict_size is a power of two. The formulas are more complex + * in reality, and can also change a little between liblzma versions. Use + * lzma_raw_encoder_memusage() to get more accurate estimate of memory usage. + */ +typedef enum { + LZMA_MF_HC3 = 0x03, + /**< + * \brief Hash Chain with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 7.5 + * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB + */ + + LZMA_MF_HC4 = 0x04, + /**< + * \brief Hash Chain with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: + * - dict_size <= 32 MiB: dict_size * 7.5 + * - dict_size > 32 MiB: dict_size * 6.5 + */ + + LZMA_MF_BT2 = 0x12, + /**< + * \brief Binary Tree with 2-byte hashing + * + * Minimum nice_len: 2 + * + * Memory usage: dict_size * 9.5 + */ + + LZMA_MF_BT3 = 0x13, + /**< + * \brief Binary Tree with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 11.5 + * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB + */ + + LZMA_MF_BT4 = 0x14 + /**< + * \brief Binary Tree with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: + * - dict_size <= 32 MiB: dict_size * 11.5 + * - dict_size > 32 MiB: dict_size * 10.5 + */ +} lzma_match_finder; + + +/** + * \brief Test if given match finder is supported + * + * Return true if the given match finder is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * isn't listed in lzma_match_finder enumeration; the return value will be + * false. + * + * There is no way to list which match finders are available in this + * particular liblzma version and build. It would be useless, because + * a new match finder, which the application developer wasn't aware, + * could require giving additional options to the encoder that the older + * match finders don't need. + */ +extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Compression modes + * + * This selects the function used to analyze the data produced by the match + * finder. + */ +typedef enum { + LZMA_MODE_FAST = 1, + /**< + * \brief Fast compression + * + * Fast mode is usually at its best when combined with + * a hash chain match finder. + */ + + LZMA_MODE_NORMAL = 2 + /**< + * \brief Normal compression + * + * This is usually notably slower than fast mode. Use this + * together with binary tree match finders to expose the + * full potential of the LZMA1 or LZMA2 encoder. + */ +} lzma_mode; + + +/** + * \brief Test if given compression mode is supported + * + * Return true if the given compression mode is supported by this liblzma + * build. Otherwise false is returned. It is safe to call this with a value + * that isn't listed in lzma_mode enumeration; the return value will be false. + * + * There is no way to list which modes are available in this particular + * liblzma version and build. It would be useless, because a new compression + * mode, which the application developer wasn't aware, could require giving + * additional options to the encoder that the older modes don't need. + */ +extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Options specific to the LZMA1 and LZMA2 filters + * + * Since LZMA1 and LZMA2 share most of the code, it's simplest to share + * the options structure too. For encoding, all but the reserved variables + * need to be initialized unless specifically mentioned otherwise. + * lzma_lzma_preset() can be used to get a good starting point. + * + * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and + * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb. + */ +typedef struct { + /** + * \brief Dictionary size in bytes + * + * Dictionary size indicates how many bytes of the recently processed + * uncompressed data is kept in memory. One method to reduce size of + * the uncompressed data is to store distance-length pairs, which + * indicate what data to repeat from the dictionary buffer. Thus, + * the bigger the dictionary, the better the compression ratio + * usually is. + * + * Maximum size of the dictionary depends on multiple things: + * - Memory usage limit + * - Available address space (not a problem on 64-bit systems) + * - Selected match finder (encoder only) + * + * Currently the maximum dictionary size for encoding is 1.5 GiB + * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit + * systems for certain match finder implementation reasons. In the + * future, there may be match finders that support bigger + * dictionaries. + * + * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e. + * UINT32_MAX), so increasing the maximum dictionary size of the + * encoder won't cause problems for old decoders. + * + * Because extremely small dictionaries sizes would have unneeded + * overhead in the decoder, the minimum dictionary size is 4096 bytes. + * + * \note When decoding, too big dictionary does no other harm + * than wasting memory. + */ + uint32_t dict_size; +# define LZMA_DICT_SIZE_MIN UINT32_C(4096) +# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) + + /** + * \brief Pointer to an initial dictionary + * + * It is possible to initialize the LZ77 history window using + * a preset dictionary. It is useful when compressing many + * similar, relatively small chunks of data independently from + * each other. The preset dictionary should contain typical + * strings that occur in the files being compressed. The most + * probable strings should be near the end of the preset dictionary. + * + * This feature should be used only in special situations. For + * now, it works correctly only with raw encoding and decoding. + * Currently none of the container formats supported by + * liblzma allow preset dictionary when decoding, thus if + * you create a .xz or .lzma file with preset dictionary, it + * cannot be decoded with the regular decoder functions. In the + * future, the .xz format will likely get support for preset + * dictionary though. + */ + const uint8_t *preset_dict; + + /** + * \brief Size of the preset dictionary + * + * Specifies the size of the preset dictionary. If the size is + * bigger than dict_size, only the last dict_size bytes are + * processed. + * + * This variable is read only when preset_dict is not NULL. + * If preset_dict is not NULL but preset_dict_size is zero, + * no preset dictionary is used (identical to only setting + * preset_dict to NULL). + */ + uint32_t preset_dict_size; + + /** + * \brief Number of literal context bits + * + * How many of the highest bits of the previous uncompressed + * eight-bit byte (also known as `literal') are taken into + * account when predicting the bits of the next literal. + * + * E.g. in typical English text, an upper-case letter is + * often followed by a lower-case letter, and a lower-case + * letter is usually followed by another lower-case letter. + * In the US-ASCII character set, the highest three bits are 010 + * for upper-case letters and 011 for lower-case letters. + * When lc is at least 3, the literal coding can take advantage of + * this property in the uncompressed data. + * + * There is a limit that applies to literal context bits and literal + * position bits together: lc + lp <= 4. Without this limit the + * decoding could become very slow, which could have security related + * results in some cases like email servers doing virus scanning. + * This limit also simplifies the internal implementation in liblzma. + * + * There may be LZMA1 streams that have lc + lp > 4 (maximum possible + * lc would be 8). It is not possible to decode such streams with + * liblzma. + */ + uint32_t lc; +# define LZMA_LCLP_MIN 0 +# define LZMA_LCLP_MAX 4 +# define LZMA_LC_DEFAULT 3 + + /** + * \brief Number of literal position bits + * + * lp affects what kind of alignment in the uncompressed data is + * assumed when encoding literals. A literal is a single 8-bit byte. + * See pb below for more information about alignment. + */ + uint32_t lp; +# define LZMA_LP_DEFAULT 0 + + /** + * \brief Number of position bits + * + * pb affects what kind of alignment in the uncompressed data is + * assumed in general. The default means four-byte alignment + * (2^ pb =2^2=4), which is often a good choice when there's + * no better guess. + * + * When the aligment is known, setting pb accordingly may reduce + * the file size a little. E.g. with text files having one-byte + * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can + * improve compression slightly. For UTF-16 text, pb=1 is a good + * choice. If the alignment is an odd number like 3 bytes, pb=0 + * might be the best choice. + * + * Even though the assumed alignment can be adjusted with pb and + * lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment. + * It might be worth taking into account when designing file formats + * that are likely to be often compressed with LZMA1 or LZMA2. + */ + uint32_t pb; +# define LZMA_PB_MIN 0 +# define LZMA_PB_MAX 4 +# define LZMA_PB_DEFAULT 2 + + /** Compression mode */ + lzma_mode mode; + + /** + * \brief Nice length of a match + * + * This determines how many bytes the encoder compares from the match + * candidates when looking for the best match. Once a match of at + * least nice_len bytes long is found, the encoder stops looking for + * better candidates and encodes the match. (Naturally, if the found + * match is actually longer than nice_len, the actual length is + * encoded; it's not truncated to nice_len.) + * + * Bigger values usually increase the compression ratio and + * compression time. For most files, 32 to 128 is a good value, + * which gives very good compression ratio at good speed. + * + * The exact minimum value depends on the match finder. The maximum + * is 273, which is the maximum length of a match that LZMA1 and + * LZMA2 can encode. + */ + uint32_t nice_len; + + /** Match finder ID */ + lzma_match_finder mf; + + /** + * \brief Maximum search depth in the match finder + * + * For every input byte, match finder searches through the hash chain + * or binary tree in a loop, each iteration going one step deeper in + * the chain or tree. The searching stops if + * - a match of at least nice_len bytes long is found; + * - all match candidates from the hash chain or binary tree have + * been checked; or + * - maximum search depth is reached. + * + * Maximum search depth is needed to prevent the match finder from + * wasting too much time in case there are lots of short match + * candidates. On the other hand, stopping the search before all + * candidates have been checked can reduce compression ratio. + * + * Setting depth to zero tells liblzma to use an automatic default + * value, that depends on the selected match finder and nice_len. + * The default is in the range [4, 200] or so (it may vary between + * liblzma versions). + * + * Using a bigger depth value than the default can increase + * compression ratio in some cases. There is no strict maximum value, + * but high values (thousands or millions) should be used with care: + * the encoder could remain fast enough with typical input, but + * malicious input could cause the match finder to slow down + * dramatically, possibly creating a denial of service attack. + */ + uint32_t depth; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + uint32_t reserved_int5; + uint32_t reserved_int6; + uint32_t reserved_int7; + uint32_t reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + void *reserved_ptr1; + void *reserved_ptr2; + +} lzma_options_lzma; + + +/** + * \brief Set a compression preset to lzma_options_lzma structure + * + * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9 + * of the xz command line tool. In addition, it is possible to bitwise-or + * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported. + * The flags are defined in container.h, because the flags are used also + * with lzma_easy_encoder(). + * + * The preset values are subject to changes between liblzma versions. + * + * This function is available only if LZMA1 or LZMA2 encoder has been enabled + * when building liblzma. + */ +extern LZMA_API(lzma_bool) lzma_lzma_preset( + lzma_options_lzma *options, uint32_t preset) lzma_nothrow; diff --git a/liblzma/api/lzma/stream_flags.h b/liblzma/api/lzma/stream_flags.h new file mode 100644 index 0000000..bbdd408 --- /dev/null +++ b/liblzma/api/lzma/stream_flags.h @@ -0,0 +1,223 @@ +/** + * \file lzma/stream_flags.h + * \brief .xz Stream Header and Stream Footer encoder and decoder + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Size of Stream Header and Stream Footer + * + * Stream Header and Stream Footer have the same size and they are not + * going to change even if a newer version of the .xz file format is + * developed in future. + */ +#define LZMA_STREAM_HEADER_SIZE 12 + + +/** + * \brief Options for encoding/decoding Stream Header and Stream Footer + */ +typedef struct { + /** + * \brief Stream Flags format version + * + * To prevent API and ABI breakages if new features are needed in + * Stream Header or Stream Footer, a version number is used to + * indicate which fields in this structure are in use. For now, + * version must always be zero. With non-zero version, the + * lzma_stream_header_encode() and lzma_stream_footer_encode() + * will return LZMA_OPTIONS_ERROR. + * + * lzma_stream_header_decode() and lzma_stream_footer_decode() + * will always set this to the lowest value that supports all the + * features indicated by the Stream Flags field. The application + * must check that the version number set by the decoding functions + * is supported by the application. Otherwise it is possible that + * the application will decode the Stream incorrectly. + */ + uint32_t version; + + /** + * \brief Backward Size + * + * Backward Size must be a multiple of four bytes. In this Stream + * format version, Backward Size is the size of the Index field. + * + * Backward Size isn't actually part of the Stream Flags field, but + * it is convenient to include in this structure anyway. Backward + * Size is present only in the Stream Footer. There is no need to + * initialize backward_size when encoding Stream Header. + * + * lzma_stream_header_decode() always sets backward_size to + * LZMA_VLI_UNKNOWN so that it is convenient to use + * lzma_stream_flags_compare() when both Stream Header and Stream + * Footer have been decoded. + */ + lzma_vli backward_size; +# define LZMA_BACKWARD_SIZE_MIN 4 +# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34) + + /** + * \brief Check ID + * + * This indicates the type of the integrity check calculated from + * uncompressed data. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the + * names of these variables may change. + * + * (We will never be able to use all of these since Stream Flags + * is just two bytes plus Backward Size of four bytes. But it's + * nice to have the proper types when they are needed.) + */ + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + lzma_bool reserved_bool1; + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + uint32_t reserved_int1; + uint32_t reserved_int2; + +} lzma_stream_flags; + + +/** + * \brief Encode Stream Header + * + * \param options Stream Header options to be encoded. + * options->backward_size is ignored and doesn't + * need to be initialized. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Stream Footer + * + * \param options Stream Footer options to be encoded. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Header + * + * \param options Target for the decoded Stream Header options. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * options->backward_size is always set to LZMA_VLI_UNKNOWN. This is to + * help comparing Stream Flags from Stream Header and Stream Footer with + * lzma_stream_flags_compare(). + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Header. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in the header. + * + * \note When decoding .xz files that contain multiple Streams, it may + * make sense to print "file format not recognized" only if + * decoding of the Stream Header of the _first_ Stream gives + * LZMA_FORMAT_ERROR. If non-first Stream Header gives + * LZMA_FORMAT_ERROR, the message used for LZMA_DATA_ERROR is + * probably more appropriate. + * + * For example, Stream decoder in liblzma uses LZMA_DATA_ERROR if + * LZMA_FORMAT_ERROR is returned by lzma_stream_header_decode() + * when decoding non-first Stream. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Footer + * + * \param options Target for the decoded Stream Header options. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Footer. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the Stream Footer + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in Stream Footer. + * + * \note If Stream Header was already decoded successfully, but + * decoding Stream Footer returns LZMA_FORMAT_ERROR, the + * application should probably report some other error message + * than "file format not recognized", since the file more likely + * is corrupt (possibly truncated). Stream decoder in liblzma + * uses LZMA_DATA_ERROR in this situation. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Compare two lzma_stream_flags structures + * + * backward_size values are compared only if both are not + * LZMA_VLI_UNKNOWN. + * + * \return - LZMA_OK: Both are equal. If either had backward_size set + * to LZMA_VLI_UNKNOWN, backward_size values were not + * compared or validated. + * - LZMA_DATA_ERROR: The structures differ. + * - LZMA_OPTIONS_ERROR: version in either structure is greater + * than the maximum supported version (currently zero). + * - LZMA_PROG_ERROR: Invalid value, e.g. invalid check or + * backward_size. + */ +extern LZMA_API(lzma_ret) lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) + lzma_nothrow lzma_attr_pure; diff --git a/liblzma/api/lzma/version.h b/liblzma/api/lzma/version.h new file mode 100644 index 0000000..25e8a82 --- /dev/null +++ b/liblzma/api/lzma/version.h @@ -0,0 +1,121 @@ +/** + * \file lzma/version.h + * \brief Version number + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* + * Version number split into components + */ +#define LZMA_VERSION_MAJOR 5 +#define LZMA_VERSION_MINOR 0 +#define LZMA_VERSION_PATCH 0 +#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE + +#ifndef LZMA_VERSION_COMMIT +# define LZMA_VERSION_COMMIT "" +#endif + + +/* + * Map symbolic stability levels to integers. + */ +#define LZMA_VERSION_STABILITY_ALPHA 0 +#define LZMA_VERSION_STABILITY_BETA 1 +#define LZMA_VERSION_STABILITY_STABLE 2 + + +/** + * \brief Compile-time version number + * + * The version number is of format xyyyzzzs where + * - x = major + * - yyy = minor + * - zzz = revision + * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable + * + * The same xyyyzzz triplet is never reused with different stability levels. + * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta + * or 5.1.0 stable. + * + * \note The version number of liblzma has nothing to with + * the version number of Igor Pavlov's LZMA SDK. + */ +#define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + + LZMA_VERSION_MINOR * UINT32_C(10000) \ + + LZMA_VERSION_PATCH * UINT32_C(10) \ + + LZMA_VERSION_STABILITY) + + +/* + * Macros to construct the compile-time version string + */ +#if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA +# define LZMA_VERSION_STABILITY_STRING "alpha" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA +# define LZMA_VERSION_STABILITY_STRING "beta" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE +# define LZMA_VERSION_STABILITY_STRING "" +#else +# error Incorrect LZMA_VERSION_STABILITY +#endif + +#define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ + #major "." #minor "." #patch stability commit + +#define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ + LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) + + +/** + * \brief Compile-time version as a string + * + * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable + * versions don't have any "stable" suffix). In future, a snapshot built + * from source code repository may include an additional suffix, for example + * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form + * in LZMA_VERSION macro. + */ +#define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ + LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ + LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ + LZMA_VERSION_COMMIT) + + +/* #ifndef is needed for use with windres (MinGW or Cygwin). */ +#ifndef LZMA_H_INTERNAL_RC + +/** + * \brief Run-time version number as an integer + * + * Return the value of LZMA_VERSION macro at the compile time of liblzma. + * This allows the application to compare if it was built against the same, + * older, or newer version of liblzma that is currently running. + */ +extern LZMA_API(uint32_t) lzma_version_number(void) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Run-time version as a string + * + * This function may be useful if you want to display which version of + * liblzma your application is currently using. + */ +extern LZMA_API(const char *) lzma_version_string(void) + lzma_nothrow lzma_attr_const; + +#endif diff --git a/liblzma/api/lzma/vli.h b/liblzma/api/lzma/vli.h new file mode 100644 index 0000000..9ad13f2 --- /dev/null +++ b/liblzma/api/lzma/vli.h @@ -0,0 +1,166 @@ +/** + * \file lzma/vli.h + * \brief Variable-length integer handling + * + * In the .xz format, most integers are encoded in a variable-length + * representation, which is sometimes called little endian base-128 encoding. + * This saves space when smaller values are more likely than bigger values. + * + * The encoding scheme encodes seven bits to every byte, using minimum + * number of bytes required to represent the given value. Encodings that use + * non-minimum number of bytes are invalid, thus every integer has exactly + * one encoded representation. The maximum number of bits in a VLI is 63, + * thus the vli argument must be less than or equal to UINT64_MAX / 2. You + * should use LZMA_VLI_MAX for clarity. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum supported value of a variable-length integer + */ +#define LZMA_VLI_MAX (UINT64_MAX / 2) + +/** + * \brief VLI value to denote that the value is unknown + */ +#define LZMA_VLI_UNKNOWN UINT64_MAX + +/** + * \brief Maximum supported encoded length of variable length integers + */ +#define LZMA_VLI_BYTES_MAX 9 + +/** + * \brief VLI constant suffix + */ +#define LZMA_VLI_C(n) UINT64_C(n) + + +/** + * \brief Variable-length integer type + * + * Valid VLI values are in the range [0, LZMA_VLI_MAX]. Unknown value is + * indicated with LZMA_VLI_UNKNOWN, which is the maximum value of the + * underlaying integer type. + * + * lzma_vli will be uint64_t for the foreseeable future. If a bigger size + * is needed in the future, it is guaranteed that 2 * LZMA_VLI_MAX will + * not overflow lzma_vli. This simplifies integer overflow detection. + */ +typedef uint64_t lzma_vli; + + +/** + * \brief Validate a variable-length integer + * + * This is useful to test that application has given acceptable values + * for example in the uncompressed_size and compressed_size variables. + * + * \return True if the integer is representable as VLI or if it + * indicates unknown value. + */ +#define lzma_vli_is_valid(vli) \ + ((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN) + + +/** + * \brief Encode a variable-length integer + * + * This function has two modes: single-call and multi-call. Single-call mode + * encodes the whole integer at once; it is an error if the output buffer is + * too small. Multi-call mode saves the position in *vli_pos, and thus it is + * possible to continue encoding if the buffer becomes full before the whole + * integer has been encoded. + * + * \param vli Integer to be encoded + * \param vli_pos How many VLI-encoded bytes have already been written + * out. When starting to encode a new integer in + * multi-call mode, *vli_pos must be set to zero. + * To use single-call encoding, set vli_pos to NULL. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully encoded. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to too little output space; single-call mode doesn't use + * LZMA_BUF_ERROR, since the application should have checked + * the encoded size with lzma_vli_size(). + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely written out yet. + * - LZMA_STREAM_END: Integer successfully encoded. + * - LZMA_BUF_ERROR: No output space was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli, size_t *vli_pos, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Decode a variable-length integer + * + * Like lzma_vli_encode(), this function has single-call and multi-call modes. + * + * \param vli Pointer to decoded integer. The decoder will + * initialize it to zero when *vli_pos == 0, so + * application isn't required to initialize *vli. + * \param vli_pos How many bytes have already been decoded. When + * starting to decode a new integer in multi-call + * mode, *vli_pos must be initialized to zero. To + * use single-call decoding, set vli_pos to NULL. + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting + * the end of the input buffer before the whole integer was + * decoded; providing no input at all will use LZMA_DATA_ERROR. + * - LZMA_PROG_ERROR: Arguments are not sane. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely decoded yet. + * - LZMA_STREAM_END: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_BUF_ERROR: No input was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *vli, size_t *vli_pos, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; + + +/** + * \brief Get the number of bytes required to encode a VLI + * + * \return Number of bytes on success (1-9). If vli isn't valid, + * zero is returned. + */ +extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli) + lzma_nothrow lzma_attr_pure; diff --git a/liblzma/check/check.c b/liblzma/check/check.c new file mode 100644 index 0000000..60efb47 --- /dev/null +++ b/liblzma/check/check.c @@ -0,0 +1,185 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check.c +/// \brief Single API to access different integrity checks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "check.h" + +#if __MOJOSETUP__ // use our crc32 to keep size down. +void MojoCrc32_append(uint32_t *_crc, const uint8_t *buf, uint32_t len); +extern LZMA_API(uint32_t) +lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ + crc ^= 0xFFFFFFFF; + MojoCrc32_append(&crc, buf, (uint32_t) size); + return crc ^ 0xFFFFFFFF; +} +#endif + + +extern LZMA_API(lzma_bool) +lzma_check_is_supported(lzma_check type) +{ + if ((unsigned int)(type) > LZMA_CHECK_ID_MAX) + return false; + + static const lzma_bool available_checks[LZMA_CHECK_ID_MAX + 1] = { + true, // LZMA_CHECK_NONE + +#ifdef HAVE_CHECK_CRC32 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + +#ifdef HAVE_CHECK_CRC64 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + +#ifdef HAVE_CHECK_SHA256 + true, +#else + false, +#endif + + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + }; + + return available_checks[(unsigned int)(type)]; +} + + +extern LZMA_API(uint32_t) +lzma_check_size(lzma_check type) +{ + if ((unsigned int)(type) > LZMA_CHECK_ID_MAX) + return UINT32_MAX; + + // See file-format.txt section 2.1.1.2. + static const uint8_t check_sizes[LZMA_CHECK_ID_MAX + 1] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 + }; + + return check_sizes[(unsigned int)(type)]; +} + + +extern void +lzma_check_init(lzma_check_state *check, lzma_check type) +{ + switch (type) { + case LZMA_CHECK_NONE: + break; + +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->state.crc32 = 0; + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->state.crc64 = 0; + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_init(check); + break; +#endif + + default: + break; + } + + return; +} + + +extern void +lzma_check_update(lzma_check_state *check, lzma_check type, + const uint8_t *buf, size_t size) +{ + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->state.crc32 = lzma_crc32(buf, size, check->state.crc32); + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->state.crc64 = lzma_crc64(buf, size, check->state.crc64); + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_update(buf, size, check); + break; +#endif + + default: + break; + } + + return; +} + + +extern void +lzma_check_finish(lzma_check_state *check, lzma_check type) +{ + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + check->buffer.u32[0] = conv32le(check->state.crc32); + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + check->buffer.u64[0] = conv64le(check->state.crc64); + break; +#endif + +#ifdef HAVE_CHECK_SHA256 + case LZMA_CHECK_SHA256: + lzma_sha256_finish(check); + break; +#endif + + default: + break; + } + + return; +} diff --git a/liblzma/check/check.h b/liblzma/check/check.h new file mode 100644 index 0000000..5069492 --- /dev/null +++ b/liblzma/check/check.h @@ -0,0 +1,98 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file check.h +/// \brief Internal API to different integrity check functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_CHECK_H +#define LZMA_CHECK_H + +#include "common.h" + +#if __MOJOSETUP__ +#define HAVE_CHECK_CRC32 1 +#endif + +// Index hashing needs the best possible hash function (preferably +// a cryptographic hash) for maximum reliability. +#if defined(HAVE_CHECK_SHA256) +# define LZMA_CHECK_BEST LZMA_CHECK_SHA256 +#elif defined(HAVE_CHECK_CRC64) +# define LZMA_CHECK_BEST LZMA_CHECK_CRC64 +#else +# define LZMA_CHECK_BEST LZMA_CHECK_CRC32 +#endif + + +/// \brief Structure to hold internal state of the check being calculated +/// +/// \note This is not in the public API because this structure may +/// change in future if new integrity check algorithms are added. +typedef struct { + /// Buffer to hold the final result and a temporary buffer for SHA256. + union { + uint8_t u8[64]; + uint32_t u32[16]; + uint64_t u64[8]; + } buffer; + + /// Check-specific data + union { + uint32_t crc32; + uint64_t crc64; + + struct { + /// Internal state + uint32_t state[8]; + + /// Size of the message excluding padding + uint64_t size; + } sha256; + } state; + +} lzma_check_state; + + +/// lzma_crc32_table[0] is needed by LZ encoder so we need to keep +/// the array two-dimensional. +#ifdef HAVE_SMALL +extern uint32_t lzma_crc32_table[1][256]; +extern void lzma_crc32_init(void); +#else +extern const uint32_t lzma_crc32_table[8][256]; +extern const uint64_t lzma_crc64_table[4][256]; +#endif + + +/// \brief Initialize *check depending on type +/// +/// \return LZMA_OK on success. LZMA_UNSUPPORTED_CHECK if the type is not +/// supported by the current version or build of liblzma. +/// LZMA_PROG_ERROR if type > LZMA_CHECK_ID_MAX. +extern void lzma_check_init(lzma_check_state *check, lzma_check type); + +/// Update the check state +extern void lzma_check_update(lzma_check_state *check, lzma_check type, + const uint8_t *buf, size_t size); + +/// Finish the check calculation and store the result to check->buffer.u8. +extern void lzma_check_finish(lzma_check_state *check, lzma_check type); + + +/// Prepare SHA-256 state for new input. +extern void lzma_sha256_init(lzma_check_state *check); + +/// Update the SHA-256 hash state +extern void lzma_sha256_update( + const uint8_t *buf, size_t size, lzma_check_state *check); + +/// Finish the SHA-256 calculation and store the result to check->buffer.u8. +extern void lzma_sha256_finish(lzma_check_state *check); + +#endif diff --git a/liblzma/check/crc_macros.h b/liblzma/check/crc_macros.h new file mode 100644 index 0000000..a7c21b7 --- /dev/null +++ b/liblzma/check/crc_macros.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file crc_macros.h +/// \brief Some endian-dependent macros for CRC32 and CRC64 +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef WORDS_BIGENDIAN +# define A(x) ((x) >> 24) +# define B(x) (((x) >> 16) & 0xFF) +# define C(x) (((x) >> 8) & 0xFF) +# define D(x) ((x) & 0xFF) + +# define S8(x) ((x) << 8) +# define S32(x) ((x) << 32) + +#else +# define A(x) ((x) & 0xFF) +# define B(x) (((x) >> 8) & 0xFF) +# define C(x) (((x) >> 16) & 0xFF) +# define D(x) ((x) >> 24) + +# define S8(x) ((x) >> 8) +# define S32(x) ((x) >> 32) +#endif diff --git a/liblzma/common/alone_decoder.c b/liblzma/common/alone_decoder.c new file mode 100644 index 0000000..039b428 --- /dev/null +++ b/liblzma/common/alone_decoder.c @@ -0,0 +1,232 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.c +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "alone_decoder.h" +#include "lzma_decoder.h" +#include "lz_decoder.h" + + +struct lzma_coder_s { + lzma_next_coder next; + + enum { + SEQ_PROPERTIES, + SEQ_DICTIONARY_SIZE, + SEQ_UNCOMPRESSED_SIZE, + SEQ_CODER_INIT, + SEQ_CODE, + } sequence; + + /// Position in the header fields + size_t pos; + + /// Uncompressed size decoded from the header + lzma_vli uncompressed_size; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// Options decoded from the header needed to initialize + /// the LZMA decoder + lzma_options_lzma options; +}; + + +static lzma_ret +alone_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + while (*out_pos < out_size + && (coder->sequence == SEQ_CODE || *in_pos < in_size)) + switch (coder->sequence) { + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos])) + return LZMA_FORMAT_ERROR; + + coder->sequence = SEQ_DICTIONARY_SIZE; + ++*in_pos; + break; + + case SEQ_DICTIONARY_SIZE: + coder->options.dict_size + |= (size_t)(in[*in_pos]) << (coder->pos * 8); + + if (++coder->pos == 4) { + if (coder->options.dict_size != UINT32_MAX) { + // A hack to ditch tons of false positives: + // We allow only dictionary sizes that are + // 2^n or 2^n + 2^(n-1). LZMA_Alone created + // only files with 2^n, but accepts any + // dictionary size. If someone complains, this + // will be reconsidered. + uint32_t d = coder->options.dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + if (d != coder->options.dict_size) + return LZMA_FORMAT_ERROR; + } + + coder->pos = 0; + coder->sequence = SEQ_UNCOMPRESSED_SIZE; + } + + ++*in_pos; + break; + + case SEQ_UNCOMPRESSED_SIZE: + coder->uncompressed_size + |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); + ++*in_pos; + if (++coder->pos < 8) + break; + + // Another hack to ditch false positives: Assume that + // if the uncompressed size is known, it must be less + // than 256 GiB. Again, if someone complains, this + // will be reconsidered. + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN + && coder->uncompressed_size + >= (LZMA_VLI_C(1) << 38)) + return LZMA_FORMAT_ERROR; + + // Calculate the memory usage so that it is ready + // for SEQ_CODER_INIT. + coder->memusage = lzma_lzma_decoder_memusage(&coder->options) + + LZMA_MEMUSAGE_BASE; + + coder->pos = 0; + coder->sequence = SEQ_CODER_INIT; + + // Fall through + + case SEQ_CODER_INIT: { + if (coder->memusage > coder->memlimit) + return LZMA_MEMLIMIT_ERROR; + + lzma_filter_info filters[2] = { + { + .init = &lzma_lzma_decoder_init, + .options = &coder->options, + }, { + .init = NULL, + } + }; + + const lzma_ret ret = lzma_next_filter_init(&coder->next, + allocator, filters); + if (ret != LZMA_OK) + return ret; + + // Use a hack to set the uncompressed size. + lzma_lz_decoder_uncompressed(coder->next.coder, + coder->uncompressed_size); + + coder->sequence = SEQ_CODE; + break; + } + + case SEQ_CODE: { + return coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + } + + default: + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +alone_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit) +{ + lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator); + + if (memlimit == 0) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &alone_decode; + next->end = &alone_decoder_end; + next->memconfig = &alone_decoder_memconfig; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + next->coder->sequence = SEQ_PROPERTIES; + next->coder->pos = 0; + next->coder->options.dict_size = 0; + next->coder->options.preset_dict = NULL; + next->coder->options.preset_dict_size = 0; + next->coder->uncompressed_size = 0; + next->coder->memlimit = memlimit; + next->coder->memusage = LZMA_MEMUSAGE_BASE; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit) +{ + lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/liblzma/common/alone_decoder.h b/liblzma/common/alone_decoder.h new file mode 100644 index 0000000..70d0d2a --- /dev/null +++ b/liblzma/common/alone_decoder.h @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file alone_decoder.h +/// \brief Decoder for LZMA_Alone files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_ALONE_DECODER_H +#define LZMA_ALONE_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, uint64_t memlimit); + +#endif diff --git a/liblzma/common/block_buffer_decoder.c b/liblzma/common/block_buffer_decoder.c new file mode 100644 index 0000000..ff27a11 --- /dev/null +++ b/liblzma/common/block_buffer_decoder.c @@ -0,0 +1,80 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_buffer_decoder.c +/// \brief Single-call .xz Block decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_block_buffer_decode(lzma_block *block, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if (in_pos == NULL || (in == NULL && *in_pos != in_size) + || *in_pos > in_size || out_pos == NULL + || (out == NULL && *out_pos != out_size) + || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the Block decoder. + lzma_next_coder block_decoder = LZMA_NEXT_CODER_INIT; + lzma_ret ret = lzma_block_decoder_init( + &block_decoder, allocator, block); + + if (ret == LZMA_OK) { + // Save the positions so that we can restore them in case + // an error occurs. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding. + ret = block_decoder.code(block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size + || *out_pos == out_size); + + // If all the input was consumed, then the + // input is truncated, even if the output + // buffer is also full. This is because + // processing the last byte of the Block + // never produces output. + // + // NOTE: This assumption may break when new + // filters are added, if the end marker of + // the filter doesn't consume at least one + // complete byte. + if (*in_pos == in_size) + ret = LZMA_DATA_ERROR; + else + ret = LZMA_BUF_ERROR; + } + + // Restore the positions. + *in_pos = in_start; + *out_pos = out_start; + } + } + + // Free the decoder memory. This needs to be done even if + // initialization fails, because the internal API doesn't + // require the initialization function to free its memory on error. + lzma_next_end(&block_decoder, allocator); + + return ret; +} diff --git a/liblzma/common/block_decoder.c b/liblzma/common/block_decoder.c new file mode 100644 index 0000000..a3ce6f4 --- /dev/null +++ b/liblzma/common/block_decoder.c @@ -0,0 +1,242 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.c +/// \brief Decodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "block_decoder.h" +#include "filter_decoder.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_CODE, + SEQ_PADDING, + SEQ_CHECK, + } sequence; + + /// The filters in the chain; initialized with lzma_raw_decoder_init(). + lzma_next_coder next; + + /// Decoding options; we also write Compressed Size and Uncompressed + /// Size back to this structure when the decoding has been finished. + lzma_block *block; + + /// Compressed Size calculated while decoding + lzma_vli compressed_size; + + /// Uncompressed Size calculated while decoding + lzma_vli uncompressed_size; + + /// Maximum allowed Compressed Size; this takes into account the + /// size of the Block Header and Check fields when Compressed Size + /// is unknown. + lzma_vli compressed_limit; + + /// Position when reading the Check field + size_t check_pos; + + /// Check of the uncompressed data + lzma_check_state check; +}; + + +static inline bool +update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) +{ + if (limit > LZMA_VLI_MAX) + limit = LZMA_VLI_MAX; + + if (limit < *size || limit - *size < add) + return true; + + *size += add; + + return false; +} + + +static inline bool +is_size_valid(lzma_vli size, lzma_vli reference) +{ + return reference == LZMA_VLI_UNKNOWN || reference == size; +} + + +static lzma_ret +block_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + switch (coder->sequence) { + case SEQ_CODE: { + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, + allocator, in, in_pos, in_size, + out, out_pos, out_size, action); + + const size_t in_used = *in_pos - in_start; + const size_t out_used = *out_pos - out_start; + + // NOTE: We compare to compressed_limit here, which prevents + // the total size of the Block growing past LZMA_VLI_MAX. + if (update_size(&coder->compressed_size, in_used, + coder->compressed_limit) + || update_size(&coder->uncompressed_size, + out_used, + coder->block->uncompressed_size)) + return LZMA_DATA_ERROR; + + lzma_check_update(&coder->check, coder->block->check, + out + out_start, out_used); + + if (ret != LZMA_STREAM_END) + return ret; + + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->block->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->block->uncompressed_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->block. The caller + // may use this information to construct Index. + coder->block->compressed_size = coder->compressed_size; + coder->block->uncompressed_size = coder->uncompressed_size; + + coder->sequence = SEQ_PADDING; + } + + // Fall through + + case SEQ_PADDING: + // Compressed Data is padded to a multiple of four bytes. + while (coder->compressed_size & 3) { + if (*in_pos >= in_size) + return LZMA_OK; + + // We use compressed_size here just get the Padding + // right. The actual Compressed Size was stored to + // coder->block already, and won't be modified by + // us anymore. + ++coder->compressed_size; + + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + } + + if (coder->block->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; + + lzma_check_finish(&coder->check, coder->block->check); + coder->sequence = SEQ_CHECK; + + // Fall through + + case SEQ_CHECK: { + const size_t check_size = lzma_check_size(coder->block->check); + lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check, + &coder->check_pos, check_size); + if (coder->check_pos < check_size) + return LZMA_OK; + + // Validate the Check only if we support it. + // coder->check.buffer may be uninitialized + // when the Check ID is not supported. + if (lzma_check_is_supported(coder->block->check) + && memcmp(coder->block->raw_check, + coder->check.buffer.u8, + check_size) != 0) + return LZMA_DATA_ERROR; + + return LZMA_STREAM_END; + } + } + + return LZMA_PROG_ERROR; +} + + +static void +block_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_block *block) +{ + lzma_next_coder_init(&lzma_block_decoder_init, next, allocator); + + // Validate the options. lzma_block_unpadded_size() does that for us + // except for Uncompressed Size and filters. Filters are validated + // by the raw decoder. + if (lzma_block_unpadded_size(block) == 0 + || !lzma_vli_is_valid(block->uncompressed_size)) + return LZMA_PROG_ERROR; + + // Allocate and initialize *next->coder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &block_decode; + next->end = &block_decoder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Basic initializations + next->coder->sequence = SEQ_CODE; + next->coder->block = block; + next->coder->compressed_size = 0; + next->coder->uncompressed_size = 0; + + // If Compressed Size is not known, we calculate the maximum allowed + // value so that encoded size of the Block (including Block Padding) + // is still a valid VLI and a multiple of four. + next->coder->compressed_limit + = block->compressed_size == LZMA_VLI_UNKNOWN + ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) + - block->header_size + - lzma_check_size(block->check) + : block->compressed_size; + + // Initialize the check. It's caller's problem if the Check ID is not + // supported, and the Block decoder cannot verify the Check field. + // Caller can test lzma_check_is_supported(block->check). + next->coder->check_pos = 0; + lzma_check_init(&next->coder->check, block->check); + + // Initialize the filter chain. + return lzma_raw_decoder_init(&next->coder->next, allocator, + block->filters); +} + + +extern LZMA_API(lzma_ret) +lzma_block_decoder(lzma_stream *strm, lzma_block *block) +{ + lzma_next_strm_init(lzma_block_decoder_init, strm, block); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/liblzma/common/block_decoder.h b/liblzma/common/block_decoder.h new file mode 100644 index 0000000..7da9df6 --- /dev/null +++ b/liblzma/common/block_decoder.h @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_decoder.h +/// \brief Decodes .xz Blocks +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BLOCK_DECODER_H +#define LZMA_BLOCK_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_block *block); + +#endif diff --git a/liblzma/common/block_header_decoder.c b/liblzma/common/block_header_decoder.c new file mode 100644 index 0000000..d98eea2 --- /dev/null +++ b/liblzma/common/block_header_decoder.c @@ -0,0 +1,130 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header_decoder.c +/// \brief Decodes Block Header from .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "check.h" + + +static void +free_properties(lzma_block *block, lzma_allocator *allocator) +{ + // Free allocated filter options. The last array member is not + // touched after the initialization in the beginning of + // lzma_block_header_decode(), so we don't need to touch that here. +#if __MOJOSETUP__ + size_t i; + for (i = 0; i < LZMA_FILTERS_MAX; ++i) { +#else + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) { +#endif + lzma_free(block->filters[i].options, allocator); + block->filters[i].id = LZMA_VLI_UNKNOWN; + block->filters[i].options = NULL; + } + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_block_header_decode(lzma_block *block, + lzma_allocator *allocator, const uint8_t *in) +{ + // NOTE: We consider the header to be corrupt not only when the + // CRC32 doesn't match, but also when variable-length integers + // are invalid or over 63 bits, or if the header is too small + // to contain the claimed information. + + // Initialize the filter options array. This way the caller can + // safely free() the options even if an error occurs in this function. +#if __MOJOSETUP__ + size_t i; + for (i = 0; i <= LZMA_FILTERS_MAX; ++i) { +#else + for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) { +#endif + block->filters[i].id = LZMA_VLI_UNKNOWN; + block->filters[i].options = NULL; + } + + // Always zero for now. + block->version = 0; + + // Validate Block Header Size and Check type. The caller must have + // already set these, so it is a programming error if this test fails. + if (lzma_block_header_size_decode(in[0]) != block->header_size + || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + // Exclude the CRC32 field. + const size_t in_size = block->header_size - 4; + + // Verify CRC32 + if (lzma_crc32(in, in_size, 0) != unaligned_read32le(in + in_size)) + return LZMA_DATA_ERROR; + + // Check for unsupported flags. + if (in[1] & 0x3C) + return LZMA_OPTIONS_ERROR; + + // Start after the Block Header Size and Block Flags fields. + size_t in_pos = 2; + + // Compressed Size + if (in[1] & 0x40) { + return_if_error(lzma_vli_decode(&block->compressed_size, + NULL, in, &in_pos, in_size)); + + // Validate Compressed Size. This checks that it isn't zero + // and that the total size of the Block is a valid VLI. + if (lzma_block_unpadded_size(block) == 0) + return LZMA_DATA_ERROR; + } else { + block->compressed_size = LZMA_VLI_UNKNOWN; + } + + // Uncompressed Size + if (in[1] & 0x80) + return_if_error(lzma_vli_decode(&block->uncompressed_size, + NULL, in, &in_pos, in_size)); + else + block->uncompressed_size = LZMA_VLI_UNKNOWN; + + // Filter Flags + const size_t filter_count = (in[1] & 3) + 1; +#if __MOJOSETUP__ + for (i = 0; i < filter_count; ++i) { +#else + for (size_t i = 0; i < filter_count; ++i) { +#endif + const lzma_ret ret = lzma_filter_flags_decode( + &block->filters[i], allocator, + in, &in_pos, in_size); + if (ret != LZMA_OK) { + free_properties(block, allocator); + return ret; + } + } + + // Padding + while (in_pos < in_size) { + if (in[in_pos++] != 0x00) { + free_properties(block, allocator); + + // Possibly some new field present so use + // LZMA_OPTIONS_ERROR instead of LZMA_DATA_ERROR. + return LZMA_OPTIONS_ERROR; + } + } + + return LZMA_OK; +} diff --git a/liblzma/common/block_util.c b/liblzma/common/block_util.c new file mode 100644 index 0000000..62c9345 --- /dev/null +++ b/liblzma/common/block_util.c @@ -0,0 +1,90 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header.c +/// \brief Utility functions to handle lzma_block +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" + + +extern LZMA_API(lzma_ret) +lzma_block_compressed_size(lzma_block *block, lzma_vli unpadded_size) +{ + // Validate everything but Uncompressed Size and filters. + if (lzma_block_unpadded_size(block) == 0) + return LZMA_PROG_ERROR; + + const uint32_t container_size = block->header_size + + lzma_check_size(block->check); + + // Validate that Compressed Size will be greater than zero. + if (unpadded_size <= container_size) + return LZMA_DATA_ERROR; + + // Calculate what Compressed Size is supposed to be. + // If Compressed Size was present in Block Header, + // compare that the new value matches it. + const lzma_vli compressed_size = unpadded_size - container_size; + if (block->compressed_size != LZMA_VLI_UNKNOWN + && block->compressed_size != compressed_size) + return LZMA_DATA_ERROR; + + block->compressed_size = compressed_size; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_vli) +lzma_block_unpadded_size(const lzma_block *block) +{ + // Validate the values that we are interested in i.e. all but + // Uncompressed Size and the filters. + // + // NOTE: This function is used for validation too, so it is + // essential that these checks are always done even if + // Compressed Size is unknown. + if (block == NULL || block->version != 0 + || block->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || block->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (block->header_size & 3) + || !lzma_vli_is_valid(block->compressed_size) + || block->compressed_size == 0 + || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX) + return 0; + + // If Compressed Size is unknown, return that we cannot know + // size of the Block either. + if (block->compressed_size == LZMA_VLI_UNKNOWN) + return LZMA_VLI_UNKNOWN; + + // Calculate Unpadded Size and validate it. + const lzma_vli unpadded_size = block->compressed_size + + block->header_size + + lzma_check_size(block->check); + + assert(unpadded_size >= UNPADDED_SIZE_MIN); + if (unpadded_size > UNPADDED_SIZE_MAX) + return 0; + + return unpadded_size; +} + + +extern LZMA_API(lzma_vli) +lzma_block_total_size(const lzma_block *block) +{ + lzma_vli unpadded_size = lzma_block_unpadded_size(block); + + if (unpadded_size != LZMA_VLI_UNKNOWN) + unpadded_size = vli_ceil4(unpadded_size); + + return unpadded_size; +} diff --git a/liblzma/common/common.c b/liblzma/common/common.c new file mode 100644 index 0000000..0408e15 --- /dev/null +++ b/liblzma/common/common.c @@ -0,0 +1,388 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.h +/// \brief Common functions needed in many places in liblzma +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +///////////// +// Version // +///////////// + +extern LZMA_API(uint32_t) +lzma_version_number(void) +{ + return LZMA_VERSION; +} + + +extern LZMA_API(const char *) +lzma_version_string(void) +{ + return LZMA_VERSION_STRING; +} + + +/////////////////////// +// Memory allocation // +/////////////////////// + +extern void * lzma_attribute((malloc)) +lzma_alloc(size_t size, lzma_allocator *allocator) +{ + // Some malloc() variants return NULL if called with size == 0. + if (size == 0) + size = 1; + + void *ptr; + + if (allocator != NULL && allocator->alloc != NULL) + ptr = allocator->alloc(allocator->opaque, 1, size); + else + ptr = malloc(size); + + return ptr; +} + + +extern void +lzma_free(void *ptr, lzma_allocator *allocator) +{ + if (allocator != NULL && allocator->free != NULL) + allocator->free(allocator->opaque, ptr); + else + free(ptr); + + return; +} + + +////////// +// Misc // +////////// + +extern size_t +lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + const size_t in_avail = in_size - *in_pos; + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = my_min(in_avail, out_avail); + + memcpy(out + *out_pos, in + *in_pos, copy_size); + + *in_pos += copy_size; + *out_pos += copy_size; + + return copy_size; +} + + +extern lzma_ret +lzma_next_filter_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + lzma_next_coder_init(filters[0].init, next, allocator); + next->id = filters[0].id; + return filters[0].init == NULL + ? LZMA_OK : filters[0].init(next, allocator, filters); +} + + +extern lzma_ret +lzma_next_filter_update(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *reversed_filters) +{ + // Check that the application isn't trying to change the Filter ID. + // End of filters is indicated with LZMA_VLI_UNKNOWN in both + // reversed_filters[0].id and next->id. + if (reversed_filters[0].id != next->id) + return LZMA_PROG_ERROR; + + if (reversed_filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_OK; + + assert(next->update != NULL); + return next->update(next->coder, allocator, NULL, reversed_filters); +} + + +extern void +lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator) +{ + if (next->init != (uintptr_t)(NULL)) { + // To avoid tiny end functions that simply call + // lzma_free(coder, allocator), we allow leaving next->end + // NULL and call lzma_free() here. + if (next->end != NULL) + next->end(next->coder, allocator); + else + lzma_free(next->coder, allocator); + + // Reset the variables so the we don't accidentally think + // that it is an already initialized coder. + *next = LZMA_NEXT_CODER_INIT; + } + + return; +} + + +////////////////////////////////////// +// External to internal API wrapper // +////////////////////////////////////// + +extern lzma_ret +lzma_strm_init(lzma_stream *strm) +{ + if (strm == NULL) + return LZMA_PROG_ERROR; + + if (strm->internal == NULL) { + strm->internal = lzma_alloc(sizeof(lzma_internal), + strm->allocator); + if (strm->internal == NULL) + return LZMA_MEM_ERROR; + + strm->internal->next = LZMA_NEXT_CODER_INIT; + } + + strm->internal->supported_actions[LZMA_RUN] = false; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = false; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = false; + strm->internal->supported_actions[LZMA_FINISH] = false; + strm->internal->sequence = ISEQ_RUN; + strm->internal->allow_buf_error = false; + + strm->total_in = 0; + strm->total_out = 0; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_code(lzma_stream *strm, lzma_action action) +{ + // Sanity checks + if ((strm->next_in == NULL && strm->avail_in != 0) + || (strm->next_out == NULL && strm->avail_out != 0) + || strm->internal == NULL + || strm->internal->next.code == NULL + || (unsigned int)(action) > LZMA_FINISH + || !strm->internal->supported_actions[action]) + return LZMA_PROG_ERROR; + + // Check if unsupported members have been set to non-zero or non-NULL, + // which would indicate that some new feature is wanted. + if (strm->reserved_ptr1 != NULL + || strm->reserved_ptr2 != NULL + || strm->reserved_ptr3 != NULL + || strm->reserved_ptr4 != NULL + || strm->reserved_int1 != 0 + || strm->reserved_int2 != 0 + || strm->reserved_int3 != 0 + || strm->reserved_int4 != 0 + || strm->reserved_enum1 != LZMA_RESERVED_ENUM + || strm->reserved_enum2 != LZMA_RESERVED_ENUM) + return LZMA_OPTIONS_ERROR; + + switch (strm->internal->sequence) { + case ISEQ_RUN: + switch (action) { + case LZMA_RUN: + break; + + case LZMA_SYNC_FLUSH: + strm->internal->sequence = ISEQ_SYNC_FLUSH; + break; + + case LZMA_FULL_FLUSH: + strm->internal->sequence = ISEQ_FULL_FLUSH; + break; + + case LZMA_FINISH: + strm->internal->sequence = ISEQ_FINISH; + break; + } + + break; + + case ISEQ_SYNC_FLUSH: + // The same action must be used until we return + // LZMA_STREAM_END, and the amount of input must not change. + if (action != LZMA_SYNC_FLUSH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FULL_FLUSH: + if (action != LZMA_FULL_FLUSH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_FINISH: + if (action != LZMA_FINISH + || strm->internal->avail_in != strm->avail_in) + return LZMA_PROG_ERROR; + + break; + + case ISEQ_END: + return LZMA_STREAM_END; + + case ISEQ_ERROR: + default: + return LZMA_PROG_ERROR; + } + + size_t in_pos = 0; + size_t out_pos = 0; + lzma_ret ret = strm->internal->next.code( + strm->internal->next.coder, strm->allocator, + strm->next_in, &in_pos, strm->avail_in, + strm->next_out, &out_pos, strm->avail_out, action); + + strm->next_in += in_pos; + strm->avail_in -= in_pos; + strm->total_in += in_pos; + + strm->next_out += out_pos; + strm->avail_out -= out_pos; + strm->total_out += out_pos; + + strm->internal->avail_in = strm->avail_in; + + switch (ret) { + case LZMA_OK: + // Don't return LZMA_BUF_ERROR when it happens the first time. + // This is to avoid returning LZMA_BUF_ERROR when avail_out + // was zero but still there was no more data left to written + // to next_out. + if (out_pos == 0 && in_pos == 0) { + if (strm->internal->allow_buf_error) + ret = LZMA_BUF_ERROR; + else + strm->internal->allow_buf_error = true; + } else { + strm->internal->allow_buf_error = false; + } + break; + + case LZMA_STREAM_END: + if (strm->internal->sequence == ISEQ_SYNC_FLUSH + || strm->internal->sequence == ISEQ_FULL_FLUSH) + strm->internal->sequence = ISEQ_RUN; + else + strm->internal->sequence = ISEQ_END; + + // Fall through + + case LZMA_NO_CHECK: + case LZMA_UNSUPPORTED_CHECK: + case LZMA_GET_CHECK: + case LZMA_MEMLIMIT_ERROR: + // Something else than LZMA_OK, but not a fatal error, + // that is, coding may be continued (except if ISEQ_END). + strm->internal->allow_buf_error = false; + break; + + default: + // All the other errors are fatal; coding cannot be continued. + assert(ret != LZMA_BUF_ERROR); + strm->internal->sequence = ISEQ_ERROR; + break; + } + + return ret; +} + + +extern LZMA_API(void) +lzma_end(lzma_stream *strm) +{ + if (strm != NULL && strm->internal != NULL) { + lzma_next_end(&strm->internal->next, strm->allocator); + lzma_free(strm->internal, strm->allocator); + strm->internal = NULL; + } + + return; +} + + +extern LZMA_API(lzma_check) +lzma_get_check(const lzma_stream *strm) +{ + // Return LZMA_CHECK_NONE if we cannot know the check type. + // It's a bug in the application if this happens. + if (strm->internal->next.get_check == NULL) + return LZMA_CHECK_NONE; + + return strm->internal->next.get_check(strm->internal->next.coder); +} + + +extern LZMA_API(uint64_t) +lzma_memusage(const lzma_stream *strm) +{ + uint64_t memusage; + uint64_t old_memlimit; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL + || strm->internal->next.memconfig( + strm->internal->next.coder, + &memusage, &old_memlimit, 0) != LZMA_OK) + return 0; + + return memusage; +} + + +extern LZMA_API(uint64_t) +lzma_memlimit_get(const lzma_stream *strm) +{ + uint64_t old_memlimit; + uint64_t memusage; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL + || strm->internal->next.memconfig( + strm->internal->next.coder, + &memusage, &old_memlimit, 0) != LZMA_OK) + return 0; + + return old_memlimit; +} + + +extern LZMA_API(lzma_ret) +lzma_memlimit_set(lzma_stream *strm, uint64_t new_memlimit) +{ + // Dummy variables to simplify memconfig functions + uint64_t old_memlimit; + uint64_t memusage; + + if (strm == NULL || strm->internal == NULL + || strm->internal->next.memconfig == NULL) + return LZMA_PROG_ERROR; + + if (new_memlimit != 0 && new_memlimit < LZMA_MEMUSAGE_BASE) + return LZMA_MEMLIMIT_ERROR; + + return strm->internal->next.memconfig(strm->internal->next.coder, + &memusage, &old_memlimit, new_memlimit); +} diff --git a/liblzma/common/common.h b/liblzma/common/common.h new file mode 100644 index 0000000..957c534 --- /dev/null +++ b/liblzma/common/common.h @@ -0,0 +1,296 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file common.h +/// \brief Definitions common to the whole liblzma library +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_COMMON_H +#define LZMA_COMMON_H + +#if __MOJOSETUP__ +#define HAVE_DECODER_LZMA1 1 +#define HAVE_DECODER_LZMA2 1 +#define HAVE_DECODER_X86 1 +#define HAVE_DECODER_POWERPC 1 +#define HAVE_DECODER_IA64 1 +#define HAVE_DECODER_ARM 1 +#define HAVE_DECODER_ARMTHUMB 1 +#define HAVE_DECODER_SPARC 1 +#define HAVE_DECODER_DELTA 1 +#endif + +#include "sysdefs.h" +#include "mythread.h" +#include "tuklib_integer.h" + +#if defined(_WIN32) || defined(__CYGWIN__) +# ifdef DLL_EXPORT +# define LZMA_API_EXPORT __declspec(dllexport) +# else +# define LZMA_API_EXPORT +# endif +// Don't use ifdef or defined() below. +#elif HAVE_VISIBILITY +# define LZMA_API_EXPORT __attribute__((__visibility__("default"))) +#else +# define LZMA_API_EXPORT +#endif + +#define LZMA_API(type) LZMA_API_EXPORT type LZMA_API_CALL + +#include "lzma.h" + +// These allow helping the compiler in some often-executed branches, whose +// result is almost always the same. +#ifdef __GNUC__ +# define likely(expr) __builtin_expect(expr, true) +# define unlikely(expr) __builtin_expect(expr, false) +#else +# define likely(expr) (expr) +# define unlikely(expr) (expr) +#endif + + +/// Size of temporary buffers needed in some filters +#define LZMA_BUFFER_SIZE 4096 + + +/// Starting value for memory usage estimates. Instead of calculating size +/// of _every_ structure and taking into account malloc() overhead etc., we +/// add a base size to all memory usage estimates. It's not very accurate +/// but should be easily good enough. +#define LZMA_MEMUSAGE_BASE (UINT64_C(1) << 15) + +/// Start of internal Filter ID space. These IDs must never be used +/// in Streams. +#define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62) + + +/// Supported flags that can be passed to lzma_stream_decoder() +/// or lzma_auto_decoder(). +#define LZMA_SUPPORTED_FLAGS \ + ( LZMA_TELL_NO_CHECK \ + | LZMA_TELL_UNSUPPORTED_CHECK \ + | LZMA_TELL_ANY_CHECK \ + | LZMA_CONCATENATED ) + + +/// Type of encoder/decoder specific data; the actual structure is defined +/// differently in different coders. +typedef struct lzma_coder_s lzma_coder; + +typedef struct lzma_next_coder_s lzma_next_coder; + +typedef struct lzma_filter_info_s lzma_filter_info; + + +/// Type of a function used to initialize a filter encoder or decoder +typedef lzma_ret (*lzma_init_function)( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +/// Type of a function to do some kind of coding work (filters, Stream, +/// Block encoders/decoders etc.). Some special coders use don't use both +/// input and output buffers, but for simplicity they still use this same +/// function prototype. +typedef lzma_ret (*lzma_code_function)( + lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// Type of a function to free the memory allocated for the coder +typedef void (*lzma_end_function)( + lzma_coder *coder, lzma_allocator *allocator); + + +/// Raw coder validates and converts an array of lzma_filter structures to +/// an array of lzma_filter_info structures. This array is used with +/// lzma_next_filter_init to initialize the filter chain. +struct lzma_filter_info_s { + /// Filter ID. This is used only by the encoder + /// with lzma_filters_update(). + lzma_vli id; + + /// Pointer to function used to initialize the filter. + /// This is NULL to indicate end of array. + lzma_init_function init; + + /// Pointer to filter's options structure + void *options; +}; + + +/// Hold data and function pointers of the next filter in the chain. +struct lzma_next_coder_s { + /// Pointer to coder-specific data + lzma_coder *coder; + + /// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't + /// point to a filter coder. + lzma_vli id; + + /// "Pointer" to init function. This is never called here. + /// We need only to detect if we are initializing a coder + /// that was allocated earlier. See lzma_next_coder_init and + /// lzma_next_strm_init macros in this file. + uintptr_t init; + + /// Pointer to function to do the actual coding + lzma_code_function code; + + /// Pointer to function to free lzma_next_coder.coder. This can + /// be NULL; in that case, lzma_free is called to free + /// lzma_next_coder.coder. + lzma_end_function end; + + /// Pointer to function to return the type of the integrity check. + /// Most coders won't support this. + lzma_check (*get_check)(const lzma_coder *coder); + + /// Pointer to function to get and/or change the memory usage limit. + /// If new_memlimit == 0, the limit is not changed. + lzma_ret (*memconfig)(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit); + + /// Update the filter-specific options or the whole filter chain + /// in the encoder. + lzma_ret (*update)(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters, + const lzma_filter *reversed_filters); +}; + + +/// Macro to initialize lzma_next_coder structure +#define LZMA_NEXT_CODER_INIT \ + (lzma_next_coder){ \ + .coder = NULL, \ + .init = (uintptr_t)(NULL), \ + .id = LZMA_VLI_UNKNOWN, \ + .code = NULL, \ + .end = NULL, \ + .get_check = NULL, \ + .memconfig = NULL, \ + .update = NULL, \ + } + + +/// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to +/// this is stored in lzma_stream. +struct lzma_internal_s { + /// The actual coder that should do something useful + lzma_next_coder next; + + /// Track the state of the coder. This is used to validate arguments + /// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH + /// is used on every call to lzma_code until next.code has returned + /// LZMA_STREAM_END. + enum { + ISEQ_RUN, + ISEQ_SYNC_FLUSH, + ISEQ_FULL_FLUSH, + ISEQ_FINISH, + ISEQ_END, + ISEQ_ERROR, + } sequence; + + /// A copy of lzma_stream avail_in. This is used to verify that the + /// amount of input doesn't change once e.g. LZMA_FINISH has been + /// used. + size_t avail_in; + + /// Indicates which lzma_action values are allowed by next.code. + bool supported_actions[4]; + + /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was + /// made (no input consumed and no output produced by next.code). + bool allow_buf_error; +}; + + +/// Allocates memory +extern void *lzma_alloc(size_t size, lzma_allocator *allocator) + lzma_attribute((malloc)); + +/// Frees memory +extern void lzma_free(void *ptr, lzma_allocator *allocator); + + +/// Allocates strm->internal if it is NULL, and initializes *strm and +/// strm->internal. This function is only called via lzma_next_strm_init macro. +extern lzma_ret lzma_strm_init(lzma_stream *strm); + +/// Initializes the next filter in the chain, if any. This takes care of +/// freeing the memory of previously initialized filter if it is different +/// than the filter being initialized now. This way the actual filter +/// initialization functions don't need to use lzma_next_coder_init macro. +extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +/// Update the next filter in the chain, if any. This checks that +/// the application is not trying to change the Filter IDs. +extern lzma_ret lzma_next_filter_update( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *reversed_filters); + +/// Frees the memory allocated for next->coder either using next->end or, +/// if next->end is NULL, using lzma_free. +extern void lzma_next_end(lzma_next_coder *next, lzma_allocator *allocator); + + +/// Copy as much data as possible from in[] to out[] and update *in_pos +/// and *out_pos accordingly. Returns the number of bytes copied. +extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + +/// \brief Return if expression doesn't evaluate to LZMA_OK +/// +/// There are several situations where we want to return immediately +/// with the value of expr if it isn't LZMA_OK. This macro shortens +/// the code a little. +#define return_if_error(expr) \ +do { \ + const lzma_ret ret_ = (expr); \ + if (ret_ != LZMA_OK) \ + return ret_; \ +} while (0) + + +/// If next isn't already initialized, free the previous coder. Then mark +/// that next is _possibly_ initialized for the coder using this macro. +/// "Possibly" means that if e.g. allocation of next->coder fails, the +/// structure isn't actually initialized for this coder, but leaving +/// next->init to func is still OK. +#define lzma_next_coder_init(func, next, allocator) \ +do { \ + if ((uintptr_t)(func) != (next)->init) \ + lzma_next_end(next, allocator); \ + (next)->init = (uintptr_t)(func); \ +} while (0) + + +/// Initializes lzma_strm and calls func() to initialize strm->internal->next. +/// (The function being called will use lzma_next_coder_init()). If +/// initialization fails, memory that wasn't freed by func() is freed +/// along strm->internal. +#define lzma_next_strm_init(func, strm, ...) \ +do { \ + return_if_error(lzma_strm_init(strm)); \ + const lzma_ret ret_ = func(&(strm)->internal->next, \ + (strm)->allocator, __VA_ARGS__); \ + if (ret_ != LZMA_OK) { \ + lzma_end(strm); \ + return ret_; \ + } \ +} while (0) + +#endif diff --git a/liblzma/common/easy_decoder_memusage.c b/liblzma/common/easy_decoder_memusage.c new file mode 100644 index 0000000..20bcd5b --- /dev/null +++ b/liblzma/common/easy_decoder_memusage.c @@ -0,0 +1,24 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_decoder_memusage.c +/// \brief Decoder memory usage calculation to match easy encoder presets +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern LZMA_API(uint64_t) +lzma_easy_decoder_memusage(uint32_t preset) +{ + lzma_options_easy opt_easy; + if (lzma_easy_preset(&opt_easy, preset)) + return UINT32_MAX; + + return lzma_raw_decoder_memusage(opt_easy.filters); +} diff --git a/liblzma/common/easy_preset.c b/liblzma/common/easy_preset.c new file mode 100644 index 0000000..2f98598 --- /dev/null +++ b/liblzma/common/easy_preset.c @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_preset.c +/// \brief Preset handling for easy encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "easy_preset.h" + + +extern bool +lzma_easy_preset(lzma_options_easy *opt_easy, uint32_t preset) +{ + if (lzma_lzma_preset(&opt_easy->opt_lzma, preset)) + return true; + + opt_easy->filters[0].id = LZMA_FILTER_LZMA2; + opt_easy->filters[0].options = &opt_easy->opt_lzma; + opt_easy->filters[1].id = LZMA_VLI_UNKNOWN; + + return false; +} diff --git a/liblzma/common/easy_preset.h b/liblzma/common/easy_preset.h new file mode 100644 index 0000000..382ade8 --- /dev/null +++ b/liblzma/common/easy_preset.h @@ -0,0 +1,32 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy_preset.h +/// \brief Preset handling for easy encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +typedef struct { + /// We need to keep the filters array available in case + /// LZMA_FULL_FLUSH is used. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + /// Options for LZMA2 + lzma_options_lzma opt_lzma; + + // Options for more filters can be added later, so this struct + // is not ready to be put into the public API. + +} lzma_options_easy; + + +/// Set *easy to the settings given by the preset. Returns true on error, +/// false on success. +extern bool lzma_easy_preset(lzma_options_easy *easy, uint32_t preset); diff --git a/liblzma/common/filter_buffer_decoder.c b/liblzma/common/filter_buffer_decoder.c new file mode 100644 index 0000000..2d35ef8 --- /dev/null +++ b/liblzma/common/filter_buffer_decoder.c @@ -0,0 +1,87 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_buffer_decoder.c +/// \brief Single-call raw decoding +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_raw_buffer_decode(const lzma_filter *filters, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + // Validate what isn't validated later in filter_common.c. + if (in == NULL || in_pos == NULL || *in_pos > in_size || out == NULL + || out_pos == NULL || *out_pos > out_size) + return LZMA_PROG_ERROR; + + // Initialize the decoer. + lzma_next_coder next = LZMA_NEXT_CODER_INIT; + return_if_error(lzma_raw_decoder_init(&next, allocator, filters)); + + // Store the positions so that we can restore them if something + // goes wrong. + const size_t in_start = *in_pos; + const size_t out_start = *out_pos; + + // Do the actual decoding and free decoder's memory. + lzma_ret ret = next.code(next.coder, allocator, in, in_pos, in_size, + out, out_pos, out_size, LZMA_FINISH); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + if (ret == LZMA_OK) { + // Either the input was truncated or the + // output buffer was too small. + assert(*in_pos == in_size || *out_pos == out_size); + + if (*in_pos != in_size) { + // Since input wasn't consumed completely, + // the output buffer became full and is + // too small. + ret = LZMA_BUF_ERROR; + + } else if (*out_pos != out_size) { + // Since output didn't became full, the input + // has to be truncated. + ret = LZMA_DATA_ERROR; + + } else { + // All the input was consumed and output + // buffer is full. Now we don't immediately + // know the reason for the error. Try + // decoding one more byte. If it succeeds, + // then the output buffer was too small. If + // we cannot get a new output byte, the input + // is truncated. + uint8_t tmp[1]; + size_t tmp_pos = 0; + (void)next.code(next.coder, allocator, + in, in_pos, in_size, + tmp, &tmp_pos, 1, LZMA_FINISH); + + if (tmp_pos == 1) + ret = LZMA_BUF_ERROR; + else + ret = LZMA_DATA_ERROR; + } + } + + // Restore the positions. + *in_pos = in_start; + *out_pos = out_start; + } + + lzma_next_end(&next, allocator); + + return ret; +} diff --git a/liblzma/common/filter_common.c b/liblzma/common/filter_common.c new file mode 100644 index 0000000..7921027 --- /dev/null +++ b/liblzma/common/filter_common.c @@ -0,0 +1,349 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_common.c +/// \brief Filter-specific stuff common for both encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_common.h" + + +static const struct { + /// Filter ID + lzma_vli id; + + /// Size of the filter-specific options structure + size_t options_size; + + /// True if it is OK to use this filter as non-last filter in + /// the chain. + bool non_last_ok; + + /// True if it is OK to use this filter as the last filter in + /// the chain. + bool last_ok; + + /// True if the filter may change the size of the data (that is, the + /// amount of encoded output can be different than the amount of + /// uncompressed input). + bool changes_size; + +} features[] = { +#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) + { + .id = LZMA_FILTER_LZMA1, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, +#endif +#ifdef HAVE_DECODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .options_size = sizeof(lzma_options_lzma), + .non_last_ok = false, + .last_ok = true, + .changes_size = true, + }, +#endif +#ifdef HAVE_DECODER_X86 + { + .id = LZMA_FILTER_X86, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) + { + .id = LZMA_FILTER_POWERPC, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#ifdef HAVE_DECODER_IA64 + { + .id = LZMA_FILTER_IA64, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) + { + .id = LZMA_FILTER_ARM, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) + { + .id = LZMA_FILTER_ARMTHUMB, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) + { + .id = LZMA_FILTER_SPARC, + .options_size = sizeof(lzma_options_bcj), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + { + .id = LZMA_FILTER_DELTA, + .options_size = sizeof(lzma_options_delta), + .non_last_ok = true, + .last_ok = false, + .changes_size = false, + }, +#endif + { + .id = LZMA_VLI_UNKNOWN + } +}; + + +extern LZMA_API(lzma_ret) +lzma_filters_copy(const lzma_filter *src, lzma_filter *dest, + lzma_allocator *allocator) +{ + if (src == NULL || dest == NULL) + return LZMA_PROG_ERROR; + + lzma_ret ret; + size_t i; + for (i = 0; src[i].id != LZMA_VLI_UNKNOWN; ++i) { + // There must be a maximum of four filters plus + // the array terminator. + if (i == LZMA_FILTERS_MAX) { + ret = LZMA_OPTIONS_ERROR; + goto error; + } + + dest[i].id = src[i].id; + + if (src[i].options == NULL) { + dest[i].options = NULL; + } else { + // See if the filter is supported only when the + // options is not NULL. This might be convenient + // sometimes if the app is actually copying only + // a partial filter chain with a place holder ID. + // + // When options is not NULL, the Filter ID must be + // supported by us, because otherwise we don't know + // how big the options are. + size_t j; + for (j = 0; src[i].id != features[j].id; ++j) { + if (features[j].id == LZMA_VLI_UNKNOWN) { + ret = LZMA_OPTIONS_ERROR; + goto error; + } + } + + // Allocate and copy the options. + dest[i].options = lzma_alloc(features[j].options_size, + allocator); + if (dest[i].options == NULL) { + ret = LZMA_MEM_ERROR; + goto error; + } + + memcpy(dest[i].options, src[i].options, + features[j].options_size); + } + } + + // Terminate the filter array. + assert(i <= LZMA_FILTERS_MAX + 1); + dest[i].id = LZMA_VLI_UNKNOWN; + dest[i].options = NULL; + + return LZMA_OK; + +error: + // Free the options which we have already allocated. + while (i-- > 0) { + lzma_free(dest[i].options, allocator); + dest[i].options = NULL; + } + + return ret; +} + + +static lzma_ret +validate_chain(const lzma_filter *filters, size_t *count) +{ + // There must be at least one filter. + if (filters == NULL || filters[0].id == LZMA_VLI_UNKNOWN) + return LZMA_PROG_ERROR; + + // Number of non-last filters that may change the size of the data + // significantly (that is, more than 1-2 % or so). + size_t changes_size_count = 0; + + // True if it is OK to add a new filter after the current filter. + bool non_last_ok = true; + + // True if the last filter in the given chain is actually usable as + // the last filter. Only filters that support embedding End of Payload + // Marker can be used as the last filter in the chain. + bool last_ok = false; + + size_t i = 0; + do { + size_t j; + for (j = 0; filters[i].id != features[j].id; ++j) + if (features[j].id == LZMA_VLI_UNKNOWN) + return LZMA_OPTIONS_ERROR; + + // If the previous filter in the chain cannot be a non-last + // filter, the chain is invalid. + if (!non_last_ok) + return LZMA_OPTIONS_ERROR; + + non_last_ok = features[j].non_last_ok; + last_ok = features[j].last_ok; + changes_size_count += features[j].changes_size; + + } while (filters[++i].id != LZMA_VLI_UNKNOWN); + + // There must be 1-4 filters. The last filter must be usable as + // the last filter in the chain. A maximum of three filters are + // allowed to change the size of the data. + if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3) + return LZMA_OPTIONS_ERROR; + + *count = i; + return LZMA_OK; +} + + +extern lzma_ret +lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options, + lzma_filter_find coder_find, bool is_encoder) +{ + // Do some basic validation and get the number of filters. + size_t count; + return_if_error(validate_chain(options, &count)); + + // Set the filter functions and copy the options pointer. + lzma_filter_info filters[LZMA_FILTERS_MAX + 1]; +#if __MOJOSETUP__ + size_t i; +#endif + + if (is_encoder) { +#if __MOJOSETUP__ + for (i = 0; i < count; ++i) { +#else + for (size_t i = 0; i < count; ++i) { +#endif + // The order of the filters is reversed in the + // encoder. It allows more efficient handling + // of the uncompressed data. + const size_t j = count - i - 1; + + const lzma_filter_coder *const fc + = coder_find(options[i].id); + if (fc == NULL || fc->init == NULL) + return LZMA_OPTIONS_ERROR; + + filters[j].id = options[i].id; + filters[j].init = fc->init; + filters[j].options = options[i].options; + } + } else { +#if __MOJOSETUP__ + for (i = 0; i < count; ++i) { +#else + for (size_t i = 0; i < count; ++i) { +#endif + const lzma_filter_coder *const fc + = coder_find(options[i].id); + if (fc == NULL || fc->init == NULL) + return LZMA_OPTIONS_ERROR; + + filters[i].id = options[i].id; + filters[i].init = fc->init; + filters[i].options = options[i].options; + } + } + + // Terminate the array. + filters[count].id = LZMA_VLI_UNKNOWN; + filters[count].init = NULL; + + // Initialize the filters. + const lzma_ret ret = lzma_next_filter_init(next, allocator, filters); + if (ret != LZMA_OK) + lzma_next_end(next, allocator); + + return ret; +} + + +extern uint64_t +lzma_raw_coder_memusage(lzma_filter_find coder_find, + const lzma_filter *filters) +{ + // The chain has to have at least one filter. + { + size_t tmp; + if (validate_chain(filters, &tmp) != LZMA_OK) + return UINT64_MAX; + } + + uint64_t total = 0; + size_t i = 0; + + do { + const lzma_filter_coder *const fc + = coder_find(filters[i].id); + if (fc == NULL) + return UINT64_MAX; // Unsupported Filter ID + + if (fc->memusage == NULL) { + // This filter doesn't have a function to calculate + // the memory usage and validate the options. Such + // filters need only little memory, so we use 1 KiB + // as a good estimate. They also accept all possible + // options, so there's no need to worry about lack + // of validation. + total += 1024; + } else { + // Call the filter-specific memory usage calculation + // function. + const uint64_t usage + = fc->memusage(filters[i].options); + if (usage == UINT64_MAX) + return UINT64_MAX; // Invalid options + + total += usage; + } + } while (filters[++i].id != LZMA_VLI_UNKNOWN); + + // Add some fixed amount of extra. It's to compensate memory usage + // of Stream, Block etc. coders, malloc() overhead, stack etc. + return total + LZMA_MEMUSAGE_BASE; +} diff --git a/liblzma/common/filter_common.h b/liblzma/common/filter_common.h new file mode 100644 index 0000000..cd61fc0 --- /dev/null +++ b/liblzma/common/filter_common.h @@ -0,0 +1,48 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_common.c +/// \brief Filter-specific stuff common for both encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_COMMON_H +#define LZMA_FILTER_COMMON_H + +#include "common.h" + + +/// Both lzma_filter_encoder and lzma_filter_decoder begin with these members. +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + +} lzma_filter_coder; + + +typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id); + + +extern lzma_ret lzma_raw_coder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *filters, + lzma_filter_find coder_find, bool is_encoder); + + +extern uint64_t lzma_raw_coder_memusage(lzma_filter_find coder_find, + const lzma_filter *filters); + + +#endif diff --git a/liblzma/common/filter_decoder.c b/liblzma/common/filter_decoder.c new file mode 100644 index 0000000..c520add --- /dev/null +++ b/liblzma/common/filter_decoder.c @@ -0,0 +1,188 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" +#include "filter_common.h" +#include "lzma_decoder.h" +#include "lzma2_decoder.h" +#include "simple_decoder.h" +#include "delta_decoder.h" + + +typedef struct { + /// Filter ID + lzma_vli id; + + /// Initializes the filter encoder and calls lzma_next_filter_init() + /// for filters + 1. + lzma_init_function init; + + /// Calculates memory usage of the encoder. If the options are + /// invalid, UINT64_MAX is returned. + uint64_t (*memusage)(const void *options); + + /// Decodes Filter Properties. + /// + /// \return - LZMA_OK: Properties decoded successfully. + /// - LZMA_OPTIONS_ERROR: Unsupported properties + /// - LZMA_MEM_ERROR: Memory allocation failed. + lzma_ret (*props_decode)(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +} lzma_filter_decoder; + + +static const lzma_filter_decoder decoders[] = { +#ifdef HAVE_DECODER_LZMA1 + { + .id = LZMA_FILTER_LZMA1, + .init = &lzma_lzma_decoder_init, + .memusage = &lzma_lzma_decoder_memusage, + .props_decode = &lzma_lzma_props_decode, + }, +#endif +#ifdef HAVE_DECODER_LZMA2 + { + .id = LZMA_FILTER_LZMA2, + .init = &lzma_lzma2_decoder_init, + .memusage = &lzma_lzma2_decoder_memusage, + .props_decode = &lzma_lzma2_props_decode, + }, +#endif +#ifdef HAVE_DECODER_X86 + { + .id = LZMA_FILTER_X86, + .init = &lzma_simple_x86_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_POWERPC + { + .id = LZMA_FILTER_POWERPC, + .init = &lzma_simple_powerpc_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_IA64 + { + .id = LZMA_FILTER_IA64, + .init = &lzma_simple_ia64_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARM + { + .id = LZMA_FILTER_ARM, + .init = &lzma_simple_arm_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_ARMTHUMB + { + .id = LZMA_FILTER_ARMTHUMB, + .init = &lzma_simple_armthumb_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_SPARC + { + .id = LZMA_FILTER_SPARC, + .init = &lzma_simple_sparc_decoder_init, + .memusage = NULL, + .props_decode = &lzma_simple_props_decode, + }, +#endif +#ifdef HAVE_DECODER_DELTA + { + .id = LZMA_FILTER_DELTA, + .init = &lzma_delta_decoder_init, + .memusage = &lzma_delta_coder_memusage, + .props_decode = &lzma_delta_props_decode, + }, +#endif +}; + + +static const lzma_filter_decoder * +decoder_find(lzma_vli id) +{ +#if __MOJOSETUP__ + size_t i; + for (i = 0; i < ARRAY_SIZE(decoders); ++i) +#else + for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i) +#endif + if (decoders[i].id == id) + return decoders + i; + + return NULL; +} + + +extern LZMA_API(lzma_bool) +lzma_filter_decoder_is_supported(lzma_vli id) +{ + return decoder_find(id) != NULL; +} + + +extern lzma_ret +lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options) +{ + return lzma_raw_coder_init(next, allocator, + options, (lzma_filter_find)(&decoder_find), false); +} + + +extern LZMA_API(lzma_ret) +lzma_raw_decoder(lzma_stream *strm, const lzma_filter *options) +{ + lzma_next_strm_init(lzma_raw_decoder_init, strm, options); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(uint64_t) +lzma_raw_decoder_memusage(const lzma_filter *filters) +{ + return lzma_raw_coder_memusage( + (lzma_filter_find)(&decoder_find), filters); +} + + +extern LZMA_API(lzma_ret) +lzma_properties_decode(lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + // Make it always NULL so that the caller can always safely free() it. + filter->options = NULL; + + const lzma_filter_decoder *const fd = decoder_find(filter->id); + if (fd == NULL) + return LZMA_OPTIONS_ERROR; + + if (fd->props_decode == NULL) + return props_size == 0 ? LZMA_OK : LZMA_OPTIONS_ERROR; + + return fd->props_decode( + &filter->options, allocator, props, props_size); +} diff --git a/liblzma/common/filter_decoder.h b/liblzma/common/filter_decoder.h new file mode 100644 index 0000000..d5c68bd --- /dev/null +++ b/liblzma/common/filter_decoder.h @@ -0,0 +1,23 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_decoder.c +/// \brief Filter ID mapping to filter-specific functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FILTER_DECODER_H +#define LZMA_FILTER_DECODER_H + +#include "common.h" + + +extern lzma_ret lzma_raw_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter *options); + +#endif diff --git a/liblzma/common/filter_flags_decoder.c b/liblzma/common/filter_flags_decoder.c new file mode 100644 index 0000000..caae10c --- /dev/null +++ b/liblzma/common/filter_flags_decoder.c @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file filter_flags_decoder.c +/// \brief Decodes a Filter Flags field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "filter_decoder.h" + + +extern LZMA_API(lzma_ret) +lzma_filter_flags_decode( + lzma_filter *filter, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // Set the pointer to NULL so the caller can always safely free it. + filter->options = NULL; + + // Filter ID + return_if_error(lzma_vli_decode(&filter->id, NULL, + in, in_pos, in_size)); + + if (filter->id >= LZMA_FILTER_RESERVED_START) + return LZMA_DATA_ERROR; + + // Size of Properties + lzma_vli props_size; + return_if_error(lzma_vli_decode(&props_size, NULL, + in, in_pos, in_size)); + + // Filter Properties + if (in_size - *in_pos < props_size) + return LZMA_DATA_ERROR; + + const lzma_ret ret = lzma_properties_decode( + filter, allocator, in + *in_pos, props_size); + + *in_pos += props_size; + + return ret; +} diff --git a/liblzma/common/index.c b/liblzma/common/index.c new file mode 100644 index 0000000..ddb9d36 --- /dev/null +++ b/liblzma/common/index.c @@ -0,0 +1,1241 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.c +/// \brief Handling of .xz Indexes and some other Stream information +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "stream_flags_common.h" + + +/// \brief How many Records to allocate at once +/// +/// This should be big enough to avoid making lots of tiny allocations +/// but small enough to avoid too much unused memory at once. +#define INDEX_GROUP_SIZE 512 + + +/// \brief How many Records can be allocated at once at maximum +#define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record)) + + +/// \brief Base structure for index_stream and index_group structures +typedef struct index_tree_node_s index_tree_node; +struct index_tree_node_s { + /// Uncompressed start offset of this Stream (relative to the + /// beginning of the file) or Block (relative to the beginning + /// of the Stream) + lzma_vli uncompressed_base; + + /// Compressed start offset of this Stream or Block + lzma_vli compressed_base; + + index_tree_node *parent; + index_tree_node *left; + index_tree_node *right; +}; + + +/// \brief AVL tree to hold index_stream or index_group structures +typedef struct { + /// Root node + index_tree_node *root; + + /// Leftmost node. Since the tree will be filled sequentially, + /// this won't change after the first node has been added to + /// the tree. + index_tree_node *leftmost; + + /// The rightmost node in the tree. Since the tree is filled + /// sequentially, this is always the node where to add the new data. + index_tree_node *rightmost; + + /// Number of nodes in the tree + uint32_t count; + +} index_tree; + + +typedef struct { + lzma_vli uncompressed_sum; + lzma_vli unpadded_sum; +} index_record; + + +typedef struct { + /// Every Record group is part of index_stream.groups tree. + index_tree_node node; + + /// Number of Blocks in this Stream before this group. + lzma_vli number_base; + + /// Number of Records that can be put in records[]. + size_t allocated; + + /// Index of the last Record in use. + size_t last; + + /// The sizes in this array are stored as cumulative sums relative + /// to the beginning of the Stream. This makes it possible to + /// use binary search in lzma_index_locate(). + /// + /// Note that the cumulative summing is done specially for + /// unpadded_sum: The previous value is rounded up to the next + /// multiple of four before adding the Unpadded Size of the new + /// Block. The total encoded size of the Blocks in the Stream + /// is records[last].unpadded_sum in the last Record group of + /// the Stream. + /// + /// For example, if the Unpadded Sizes are 39, 57, and 81, the + /// stored values are 39, 97 (40 + 57), and 181 (100 + 181). + /// The total encoded size of these Blocks is 184. + /// + /// This is a flexible array, because it makes easy to optimize + /// memory usage in case someone concatenates many Streams that + /// have only one or few Blocks. + index_record records[]; + +} index_group; + + +typedef struct { + /// Every index_stream is a node in the tree of Sreams. + index_tree_node node; + + /// Number of this Stream (first one is 1) + uint32_t number; + + /// Total number of Blocks before this Stream + lzma_vli block_number_base; + + /// Record groups of this Stream are stored in a tree. + /// It's a T-tree with AVL-tree balancing. There are + /// INDEX_GROUP_SIZE Records per node by default. + /// This keeps the number of memory allocations reasonable + /// and finding a Record is fast. + index_tree groups; + + /// Number of Records in this Stream + lzma_vli record_count; + + /// Size of the List of Records field in this Stream. This is used + /// together with record_count to calculate the size of the Index + /// field and thus the total size of the Stream. + lzma_vli index_list_size; + + /// Stream Flags of this Stream. This is meaningful only if + /// the Stream Flags have been told us with lzma_index_stream_flags(). + /// Initially stream_flags.version is set to UINT32_MAX to indicate + /// that the Stream Flags are unknown. + lzma_stream_flags stream_flags; + + /// Amount of Stream Padding after this Stream. This defaults to + /// zero and can be set with lzma_index_stream_padding(). + lzma_vli stream_padding; + +} index_stream; + + +struct lzma_index_s { + /// AVL-tree containing the Stream(s). Often there is just one + /// Stream, but using a tree keeps lookups fast even when there + /// are many concatenated Streams. + index_tree streams; + + /// Uncompressed size of all the Blocks in the Stream(s) + lzma_vli uncompressed_size; + + /// Total size of all the Blocks in the Stream(s) + lzma_vli total_size; + + /// Total number of Records in all Streams in this lzma_index + lzma_vli record_count; + + /// Size of the List of Records field if all the Streams in this + /// lzma_index were packed into a single Stream (makes it simpler to + /// take many .xz files and combine them into a single Stream). + /// + /// This value together with record_count is needed to calculate + /// Backward Size that is stored into Stream Footer. + lzma_vli index_list_size; + + /// How many Records to allocate at once in lzma_index_append(). + /// This defaults to INDEX_GROUP_SIZE but can be overriden with + /// lzma_index_prealloc(). + size_t prealloc; + + /// Bitmask indicating what integrity check types have been used + /// as set by lzma_index_stream_flags(). The bit of the last Stream + /// is not included here, since it is possible to change it by + /// calling lzma_index_stream_flags() again. + uint32_t checks; +}; + + +static void +index_tree_init(index_tree *tree) +{ + tree->root = NULL; + tree->leftmost = NULL; + tree->rightmost = NULL; + tree->count = 0; + return; +} + + +/// Helper for index_tree_end() +static void +index_tree_node_end(index_tree_node *node, lzma_allocator *allocator, + void (*free_func)(void *node, lzma_allocator *allocator)) +{ + // The tree won't ever be very huge, so recursion should be fine. + // 20 levels in the tree is likely quite a lot already in practice. + if (node->left != NULL) + index_tree_node_end(node->left, allocator, free_func); + + if (node->right != NULL) + index_tree_node_end(node->right, allocator, free_func); + + if (free_func != NULL) + free_func(node, allocator); + + lzma_free(node, allocator); + return; +} + + +/// Free the meory allocated for a tree. If free_func is not NULL, +/// it is called on each node before freeing the node. This is used +/// to free the Record groups from each index_stream before freeing +/// the index_stream itself. +static void +index_tree_end(index_tree *tree, lzma_allocator *allocator, + void (*free_func)(void *node, lzma_allocator *allocator)) +{ + if (tree->root != NULL) + index_tree_node_end(tree->root, allocator, free_func); + + return; +} + + +/// Add a new node to the tree. node->uncompressed_base and +/// node->compressed_base must have been set by the caller already. +static void +index_tree_append(index_tree *tree, index_tree_node *node) +{ + node->parent = tree->rightmost; + node->left = NULL; + node->right = NULL; + + ++tree->count; + + // Handle the special case of adding the first node. + if (tree->root == NULL) { + tree->root = node; + tree->leftmost = node; + tree->rightmost = node; + return; + } + + // The tree is always filled sequentially. + assert(tree->rightmost->uncompressed_base <= node->uncompressed_base); + assert(tree->rightmost->compressed_base < node->compressed_base); + + // Add the new node after the rightmost node. It's the correct + // place due to the reason above. + tree->rightmost->right = node; + tree->rightmost = node; + + // Balance the AVL-tree if needed. We don't need to keep the balance + // factors in nodes, because we always fill the tree sequentially, + // and thus know the state of the tree just by looking at the node + // count. From the node count we can calculate how many steps to go + // up in the tree to find the rotation root. + uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count)); + if (up != 0) { + // Locate the root node for the rotation. + up = ctz32(tree->count) + 2; + do { + node = node->parent; + } while (--up > 0); + + // Rotate left using node as the rotation root. + index_tree_node *pivot = node->right; + + if (node->parent == NULL) { + tree->root = pivot; + } else { + assert(node->parent->right == node); + node->parent->right = pivot; + } + + pivot->parent = node->parent; + + node->right = pivot->left; + if (node->right != NULL) + node->right->parent = node; + + pivot->left = node; + node->parent = pivot; + } + + return; +} + + +/// Get the next node in the tree. Return NULL if there are no more nodes. +static void * +index_tree_next(const index_tree_node *node) +{ + if (node->right != NULL) { + node = node->right; + while (node->left != NULL) + node = node->left; + + return (void *)(node); + } + + while (node->parent != NULL && node->parent->right == node) + node = node->parent; + + return (void *)(node->parent); +} + + +/// Locate a node that contains the given uncompressed offset. It is +/// caller's job to check that target is not bigger than the uncompressed +/// size of the tree (the last node would be returned in that case still). +static void * +index_tree_locate(const index_tree *tree, lzma_vli target) +{ + const index_tree_node *result = NULL; + const index_tree_node *node = tree->root; + + assert(tree->leftmost == NULL + || tree->leftmost->uncompressed_base == 0); + + // Consecutive nodes may have the same uncompressed_base. + // We must pick the rightmost one. + while (node != NULL) { + if (node->uncompressed_base > target) { + node = node->left; + } else { + result = node; + node = node->right; + } + } + + return (void *)(result); +} + + +/// Allocate and initialize a new Stream using the given base offsets. +static index_stream * +index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base, + lzma_vli stream_number, lzma_vli block_number_base, + lzma_allocator *allocator) +{ + index_stream *s = lzma_alloc(sizeof(index_stream), allocator); + if (s == NULL) + return NULL; + + s->node.uncompressed_base = uncompressed_base; + s->node.compressed_base = compressed_base; + s->node.parent = NULL; + s->node.left = NULL; + s->node.right = NULL; + + s->number = stream_number; + s->block_number_base = block_number_base; + + index_tree_init(&s->groups); + + s->record_count = 0; + s->index_list_size = 0; + s->stream_flags.version = UINT32_MAX; + s->stream_padding = 0; + + return s; +} + + +/// Free the memory allocated for a Stream and its Record groups. +static void +index_stream_end(void *node, lzma_allocator *allocator) +{ + index_stream *s = node; + index_tree_end(&s->groups, allocator, NULL); + return; +} + + +static lzma_index * +index_init_plain(lzma_allocator *allocator) +{ + lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); + if (i != NULL) { + index_tree_init(&i->streams); + i->uncompressed_size = 0; + i->total_size = 0; + i->record_count = 0; + i->index_list_size = 0; + i->prealloc = INDEX_GROUP_SIZE; + i->checks = 0; + } + + return i; +} + + +extern LZMA_API(lzma_index *) +lzma_index_init(lzma_allocator *allocator) +{ + lzma_index *i = index_init_plain(allocator); + index_stream *s = index_stream_init(0, 0, 1, 0, allocator); + if (i == NULL || s == NULL) { + index_stream_end(s, allocator); + lzma_free(i, allocator); + } + + index_tree_append(&i->streams, &s->node); + + return i; +} + + +extern LZMA_API(void) +lzma_index_end(lzma_index *i, lzma_allocator *allocator) +{ + // NOTE: If you modify this function, check also the bottom + // of lzma_index_cat(). + if (i != NULL) { + index_tree_end(&i->streams, allocator, &index_stream_end); + lzma_free(i, allocator); + } + + return; +} + + +extern void +lzma_index_prealloc(lzma_index *i, lzma_vli records) +{ + if (records > PREALLOC_MAX) + records = PREALLOC_MAX; + + i->prealloc = (size_t)(records); + return; +} + + +extern LZMA_API(uint64_t) +lzma_index_memusage(lzma_vli streams, lzma_vli blocks) +{ + // This calculates an upper bound that is only a little bit + // bigger than the exact maximum memory usage with the given + // parameters. + + // Typical malloc() overhead is 2 * sizeof(void *) but we take + // a little bit extra just in case. Using LZMA_MEMUSAGE_BASE + // instead would give too inaccurate estimate. + const size_t alloc_overhead = 4 * sizeof(void *); + + // Amount of memory needed for each Stream base structures. + // We assume that every Stream has at least one Block and + // thus at least one group. + const size_t stream_base = sizeof(index_stream) + + sizeof(index_group) + 2 * alloc_overhead; + + // Amount of memory needed per group. + const size_t group_base = sizeof(index_group) + + INDEX_GROUP_SIZE * sizeof(index_record) + + alloc_overhead; + + // Number of groups. There may actually be more, but that overhead + // has been taken into account in stream_base already. + const lzma_vli groups + = (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE; + + // Memory used by index_stream and index_group structures. + const uint64_t streams_mem = streams * stream_base; + const uint64_t groups_mem = groups * group_base; + + // Memory used by the base structure. + const uint64_t index_base = sizeof(lzma_index) + alloc_overhead; + + // Validate the arguments and catch integer overflows. + // Maximum number of Streams is "only" UINT32_MAX, because + // that limit is used by the tree containing the Streams. + const uint64_t limit = UINT64_MAX - index_base; + if (streams == 0 || streams > UINT32_MAX || blocks > LZMA_VLI_MAX + || streams > limit / stream_base + || groups > limit / group_base + || limit - streams_mem < groups_mem) + return UINT64_MAX; + + return index_base + streams_mem + groups_mem; +} + + +extern LZMA_API(uint64_t) +lzma_index_memused(const lzma_index *i) +{ + return lzma_index_memusage(i->streams.count, i->record_count); +} + + +extern LZMA_API(lzma_vli) +lzma_index_block_count(const lzma_index *i) +{ + return i->record_count; +} + + +extern LZMA_API(lzma_vli) +lzma_index_stream_count(const lzma_index *i) +{ + return i->streams.count; +} + + +extern LZMA_API(lzma_vli) +lzma_index_size(const lzma_index *i) +{ + return index_size(i->record_count, i->index_list_size); +} + + +extern LZMA_API(lzma_vli) +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API(lzma_vli) +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->record_count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +static lzma_vli +index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum, + lzma_vli record_count, lzma_vli index_list_size, + lzma_vli stream_padding) +{ + // Earlier Streams and Stream Paddings + Stream Header + // + Blocks + Index + Stream Footer + Stream Padding + // + // This might go over LZMA_VLI_MAX due to too big unpadded_sum + // when this function is used in lzma_index_append(). + lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE + + stream_padding + vli_ceil4(unpadded_sum); + if (file_size > LZMA_VLI_MAX) + return LZMA_VLI_UNKNOWN; + + // The same applies here. + file_size += index_size(record_count, index_list_size); + if (file_size > LZMA_VLI_MAX) + return LZMA_VLI_UNKNOWN; + + return file_size; +} + + +extern LZMA_API(lzma_vli) +lzma_index_file_size(const lzma_index *i) +{ + const index_stream *s = (const index_stream *)(i->streams.rightmost); + const index_group *g = (const index_group *)(s->groups.rightmost); + return index_file_size(s->node.compressed_base, + g == NULL ? 0 : g->records[g->last].unpadded_sum, + s->record_count, s->index_list_size, + s->stream_padding); +} + + +extern LZMA_API(lzma_vli) +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern LZMA_API(uint32_t) +lzma_index_checks(const lzma_index *i) +{ + uint32_t checks = i->checks; + + // Get the type of the Check of the last Stream too. + const index_stream *s = (const index_stream *)(i->streams.rightmost); + if (s->stream_flags.version != UINT32_MAX) + checks |= UINT32_C(1) << s->stream_flags.check; + + return checks; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) - index_size_unpadded( + i->record_count, i->index_list_size)) & 3; +} + + +extern LZMA_API(lzma_ret) +lzma_index_stream_flags(lzma_index *i, const lzma_stream_flags *stream_flags) +{ + if (i == NULL || stream_flags == NULL) + return LZMA_PROG_ERROR; + + // Validate the Stream Flags. + return_if_error(lzma_stream_flags_compare( + stream_flags, stream_flags)); + + index_stream *s = (index_stream *)(i->streams.rightmost); + s->stream_flags = *stream_flags; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding) +{ + if (i == NULL || stream_padding > LZMA_VLI_MAX + || (stream_padding & 3) != 0) + return LZMA_PROG_ERROR; + + index_stream *s = (index_stream *)(i->streams.rightmost); + + // Check that the new value won't make the file grow too big. + const lzma_vli old_stream_padding = s->stream_padding; + s->stream_padding = 0; + if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) { + s->stream_padding = old_stream_padding; + return LZMA_DATA_ERROR; + } + + s->stream_padding = stream_padding; + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) +{ + // Validate. + if (i == NULL || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX + || uncompressed_size > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + index_stream *s = (index_stream *)(i->streams.rightmost); + index_group *g = (index_group *)(s->groups.rightmost); + + const lzma_vli compressed_base = g == NULL ? 0 + : vli_ceil4(g->records[g->last].unpadded_sum); + const lzma_vli uncompressed_base = g == NULL ? 0 + : g->records[g->last].uncompressed_sum; + const uint32_t index_list_size_add = lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); + + // Check that the file size will stay within limits. + if (index_file_size(s->node.compressed_base, + compressed_base + unpadded_size, s->record_count + 1, + s->index_list_size + index_list_size_add, + s->stream_padding) == LZMA_VLI_UNKNOWN) + return LZMA_DATA_ERROR; + + // The size of the Index field must not exceed the maximum value + // that can be stored in the Backward Size field. + if (index_size(i->record_count + 1, + i->index_list_size + index_list_size_add) + > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + + if (g != NULL && g->last + 1 < g->allocated) { + // There is space in the last group at least for one Record. + ++g->last; + } else { + // We need to allocate a new group. + g = lzma_alloc(sizeof(index_group) + + i->prealloc * sizeof(index_record), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + g->last = 0; + g->allocated = i->prealloc; + + // Reset prealloc so that if the application happens to + // add new Records, the allocation size will be sane. + i->prealloc = INDEX_GROUP_SIZE; + + // Set the start offsets of this group. + g->node.uncompressed_base = uncompressed_base; + g->node.compressed_base = compressed_base; + g->number_base = s->record_count + 1; + + // Add the new group to the Stream. + index_tree_append(&s->groups, &g->node); + } + + // Add the new Record to the group. + g->records[g->last].uncompressed_sum + = uncompressed_base + uncompressed_size; + g->records[g->last].unpadded_sum + = compressed_base + unpadded_size; + + // Update the totals. + ++s->record_count; + s->index_list_size += index_list_size_add; + + i->total_size += vli_ceil4(unpadded_size); + i->uncompressed_size += uncompressed_size; + ++i->record_count; + i->index_list_size += index_list_size_add; + + return LZMA_OK; +} + + +/// Structure to pass info to index_cat_helper() +typedef struct { + /// Uncompressed size of the destination + lzma_vli uncompressed_size; + + /// Compressed file size of the destination + lzma_vli file_size; + + /// Same as above but for Block numbers + lzma_vli block_number_add; + + /// Number of Streams that were in the destination index before we + /// started appending new Streams from the source index. This is + /// used to fix the Stream numbering. + uint32_t stream_number_add; + + /// Destination index' Stream tree + index_tree *streams; + +} index_cat_info; + + +/// Add the Stream nodes from the source index to dest using recursion. +/// Simplest iterative traversal of the source tree wouldn't work, because +/// we update the pointers in nodes when moving them to the destination tree. +static void +index_cat_helper(const index_cat_info *info, index_stream *this) +{ + index_stream *left = (index_stream *)(this->node.left); + index_stream *right = (index_stream *)(this->node.right); + + if (left != NULL) + index_cat_helper(info, left); + + this->node.uncompressed_base += info->uncompressed_size; + this->node.compressed_base += info->file_size; + this->number += info->stream_number_add; + this->block_number_base += info->block_number_add; + index_tree_append(info->streams, &this->node); + + if (right != NULL) + index_cat_helper(info, right); + + return; +} + + +extern LZMA_API(lzma_ret) +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + lzma_allocator *allocator) +{ + const lzma_vli dest_file_size = lzma_index_file_size(dest); + + // Check that we don't exceed the file size limits. + if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX + || dest->uncompressed_size + src->uncompressed_size + > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + // Check that the encoded size of the combined lzma_indexes stays + // within limits. In theory, this should be done only if we know + // that the user plans to actually combine the Streams and thus + // construct a single Index (probably rare). However, exceeding + // this limit is quite theoretical, so we do this check always + // to simplify things elsewhere. + { + const lzma_vli dest_size = index_size_unpadded( + dest->record_count, dest->index_list_size); + const lzma_vli src_size = index_size_unpadded( + src->record_count, src->index_list_size); + if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + } + + // Optimize the last group to minimize memory usage. Allocation has + // to be done before modifying dest or src. + { + index_stream *s = (index_stream *)(dest->streams.rightmost); + index_group *g = (index_group *)(s->groups.rightmost); + if (g != NULL && g->last + 1 < g->allocated) { + assert(g->node.left == NULL); + assert(g->node.right == NULL); + + index_group *newg = lzma_alloc(sizeof(index_group) + + (g->last + 1) + * sizeof(index_record), + allocator); + if (newg == NULL) + return LZMA_MEM_ERROR; + + newg->node = g->node; + newg->allocated = g->last + 1; + newg->last = g->last; + newg->number_base = g->number_base; + + memcpy(newg->records, g->records, newg->allocated + * sizeof(index_record)); + + if (g->node.parent != NULL) { + assert(g->node.parent->right == &g->node); + g->node.parent->right = &newg->node; + } + + if (s->groups.leftmost == &g->node) { + assert(s->groups.root == &g->node); + s->groups.leftmost = &newg->node; + s->groups.root = &newg->node; + } + + if (s->groups.rightmost == &g->node) + s->groups.rightmost = &newg->node; + + lzma_free(g, allocator); + } + } + + // Add all the Streams from src to dest. Update the base offsets + // of each Stream from src. + const index_cat_info info = { + .uncompressed_size = dest->uncompressed_size, + .file_size = dest_file_size, + .stream_number_add = dest->streams.count, + .block_number_add = dest->record_count, + .streams = &dest->streams, + }; + index_cat_helper(&info, (index_stream *)(src->streams.root)); + + // Update info about all the combined Streams. + dest->uncompressed_size += src->uncompressed_size; + dest->total_size += src->total_size; + dest->record_count += src->record_count; + dest->index_list_size += src->index_list_size; + dest->checks = lzma_index_checks(dest) | src->checks; + + // There's nothing else left in src than the base structure. + lzma_free(src, allocator); + + return LZMA_OK; +} + + +/// Duplicate an index_stream. +static index_stream * +index_dup_stream(const index_stream *src, lzma_allocator *allocator) +{ + // Catch a somewhat theoretical integer overflow. + if (src->record_count > PREALLOC_MAX) + return NULL; + + // Allocate and initialize a new Stream. + index_stream *dest = index_stream_init(src->node.compressed_base, + src->node.uncompressed_base, src->number, + src->block_number_base, allocator); + + // Return immediately if allocation failed or if there are + // no groups to duplicate. + if (dest == NULL || src->groups.leftmost == NULL) + return dest; + + // Copy the overall information. + dest->record_count = src->record_count; + dest->index_list_size = src->index_list_size; + dest->stream_flags = src->stream_flags; + dest->stream_padding = src->stream_padding; + + // Allocate memory for the Records. We put all the Records into + // a single group. It's simplest and also tends to make + // lzma_index_locate() a little bit faster with very big Indexes. + index_group *destg = lzma_alloc(sizeof(index_group) + + src->record_count * sizeof(index_record), + allocator); + if (destg == NULL) { + index_stream_end(dest, allocator); + return NULL; + } + + // Initialize destg. + destg->node.uncompressed_base = 0; + destg->node.compressed_base = 0; + destg->number_base = 1; + destg->allocated = src->record_count; + destg->last = src->record_count - 1; + + // Go through all the groups in src and copy the Records into destg. + const index_group *srcg = (const index_group *)(src->groups.leftmost); + size_t i = 0; + do { + memcpy(destg->records + i, srcg->records, + (srcg->last + 1) * sizeof(index_record)); + i += srcg->last + 1; + srcg = index_tree_next(&srcg->node); + } while (srcg != NULL); + + assert(i == destg->allocated); + + // Add the group to the new Stream. + index_tree_append(&dest->groups, &destg->node); + + return dest; +} + + +extern LZMA_API(lzma_index *) +lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +{ + // Allocate the base structure (no initial Stream). + lzma_index *dest = index_init_plain(allocator); + if (dest == NULL) + return NULL; + + // Copy the totals. + dest->uncompressed_size = src->uncompressed_size; + dest->total_size = src->total_size; + dest->record_count = src->record_count; + dest->index_list_size = src->index_list_size; + + // Copy the Streams and the groups in them. + const index_stream *srcstream + = (const index_stream *)(src->streams.leftmost); + do { + index_stream *deststream = index_dup_stream( + srcstream, allocator); + if (deststream == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + index_tree_append(&dest->streams, &deststream->node); + + srcstream = index_tree_next(&srcstream->node); + } while (srcstream != NULL); + + return dest; +} + + +/// Indexing for lzma_index_iter.internal[] +enum { + ITER_INDEX, + ITER_STREAM, + ITER_GROUP, + ITER_RECORD, + ITER_METHOD, +}; + + +/// Values for lzma_index_iter.internal[ITER_METHOD].s +enum { + ITER_METHOD_NORMAL, + ITER_METHOD_NEXT, + ITER_METHOD_LEFTMOST, +}; + + +static void +iter_set_info(lzma_index_iter *iter) +{ + const lzma_index *i = iter->internal[ITER_INDEX].p; + const index_stream *stream = iter->internal[ITER_STREAM].p; + const index_group *group = iter->internal[ITER_GROUP].p; + const size_t record = iter->internal[ITER_RECORD].s; + + // lzma_index_iter.internal must not contain a pointer to the last + // group in the index, because that may be reallocated by + // lzma_index_cat(). + if (group == NULL) { + // There are no groups. + assert(stream->groups.root == NULL); + iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; + + } else if (i->streams.rightmost != &stream->node + || stream->groups.rightmost != &group->node) { + // The group is not not the last group in the index. + iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; + + } else if (stream->groups.leftmost != &group->node) { + // The group isn't the only group in the Stream, thus we + // know that it must have a parent group i.e. it's not + // the root node. + assert(stream->groups.root != &group->node); + assert(group->node.parent->right == &group->node); + iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT; + iter->internal[ITER_GROUP].p = group->node.parent; + + } else { + // The Stream has only one group. + assert(stream->groups.root == &group->node); + assert(group->node.parent == NULL); + iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; + iter->internal[ITER_GROUP].p = NULL; + } + + iter->stream.number = stream->number; + iter->stream.block_count = stream->record_count; + iter->stream.compressed_offset = stream->node.compressed_base; + iter->stream.uncompressed_offset = stream->node.uncompressed_base; + + // iter->stream.flags will be NULL if the Stream Flags haven't been + // set with lzma_index_stream_flags(). + iter->stream.flags = stream->stream_flags.version == UINT32_MAX + ? NULL : &stream->stream_flags; + iter->stream.padding = stream->stream_padding; + + if (stream->groups.rightmost == NULL) { + // Stream has no Blocks. + iter->stream.compressed_size = index_size(0, 0) + + 2 * LZMA_STREAM_HEADER_SIZE; + iter->stream.uncompressed_size = 0; + } else { + const index_group *g = (const index_group *)( + stream->groups.rightmost); + + // Stream Header + Stream Footer + Index + Blocks + iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE + + index_size(stream->record_count, + stream->index_list_size) + + vli_ceil4(g->records[g->last].unpadded_sum); + iter->stream.uncompressed_size + = g->records[g->last].uncompressed_sum; + } + + if (group != NULL) { + iter->block.number_in_stream = group->number_base + record; + iter->block.number_in_file = iter->block.number_in_stream + + stream->block_number_base; + + iter->block.compressed_stream_offset + = record == 0 ? group->node.compressed_base + : vli_ceil4(group->records[ + record - 1].unpadded_sum); + iter->block.uncompressed_stream_offset + = record == 0 ? group->node.uncompressed_base + : group->records[record - 1].uncompressed_sum; + + iter->block.uncompressed_size + = group->records[record].uncompressed_sum + - iter->block.uncompressed_stream_offset; + iter->block.unpadded_size + = group->records[record].unpadded_sum + - iter->block.compressed_stream_offset; + iter->block.total_size = vli_ceil4(iter->block.unpadded_size); + + iter->block.compressed_stream_offset + += LZMA_STREAM_HEADER_SIZE; + + iter->block.compressed_file_offset + = iter->block.compressed_stream_offset + + iter->stream.compressed_offset; + iter->block.uncompressed_file_offset + = iter->block.uncompressed_stream_offset + + iter->stream.uncompressed_offset; + } + + return; +} + + +extern LZMA_API(void) +lzma_index_iter_init(lzma_index_iter *iter, const lzma_index *i) +{ + iter->internal[ITER_INDEX].p = i; + lzma_index_iter_rewind(iter); + return; +} + + +extern LZMA_API(void) +lzma_index_iter_rewind(lzma_index_iter *iter) +{ + iter->internal[ITER_STREAM].p = NULL; + iter->internal[ITER_GROUP].p = NULL; + iter->internal[ITER_RECORD].s = 0; + iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; + return; +} + + +extern LZMA_API(lzma_bool) +lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode) +{ + // Catch unsupported mode values. + if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK) + return true; + + const lzma_index *i = iter->internal[ITER_INDEX].p; + const index_stream *stream = iter->internal[ITER_STREAM].p; + const index_group *group = NULL; + size_t record = iter->internal[ITER_RECORD].s; + + // If we are being asked for the next Stream, leave group to NULL + // so that the rest of the this function thinks that this Stream + // has no groups and will thus go to the next Stream. + if (mode != LZMA_INDEX_ITER_STREAM) { + // Get the pointer to the current group. See iter_set_inf() + // for explanation. + switch (iter->internal[ITER_METHOD].s) { + case ITER_METHOD_NORMAL: + group = iter->internal[ITER_GROUP].p; + break; + + case ITER_METHOD_NEXT: + group = index_tree_next(iter->internal[ITER_GROUP].p); + break; + + case ITER_METHOD_LEFTMOST: + group = (const index_group *)( + stream->groups.leftmost); + break; + } + } + +again: + if (stream == NULL) { + // We at the beginning of the lzma_index. + // Locate the first Stream. + stream = (const index_stream *)(i->streams.leftmost); + if (mode >= LZMA_INDEX_ITER_BLOCK) { + // Since we are being asked to return information + // about the first a Block, skip Streams that have + // no Blocks. + while (stream->groups.leftmost == NULL) { + stream = index_tree_next(&stream->node); + if (stream == NULL) + return true; + } + } + + // Start from the first Record in the Stream. + group = (const index_group *)(stream->groups.leftmost); + record = 0; + + } else if (group != NULL && record < group->last) { + // The next Record is in the same group. + ++record; + + } else { + // This group has no more Records or this Stream has + // no Blocks at all. + record = 0; + + // If group is not NULL, this Stream has at least one Block + // and thus at least one group. Find the next group. + if (group != NULL) + group = index_tree_next(&group->node); + + if (group == NULL) { + // This Stream has no more Records. Find the next + // Stream. If we are being asked to return information + // about a Block, we skip empty Streams. + do { + stream = index_tree_next(&stream->node); + if (stream == NULL) + return true; + } while (mode >= LZMA_INDEX_ITER_BLOCK + && stream->groups.leftmost == NULL); + + group = (const index_group *)( + stream->groups.leftmost); + } + } + + if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) { + // We need to look for the next Block again if this Block + // is empty. + if (record == 0) { + if (group->node.uncompressed_base + == group->records[0].uncompressed_sum) + goto again; + } else if (group->records[record - 1].uncompressed_sum + == group->records[record].uncompressed_sum) { + goto again; + } + } + + iter->internal[ITER_STREAM].p = stream; + iter->internal[ITER_GROUP].p = group; + iter->internal[ITER_RECORD].s = record; + + iter_set_info(iter); + + return false; +} + + +extern LZMA_API(lzma_bool) +lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target) +{ + const lzma_index *i = iter->internal[ITER_INDEX].p; + + // If the target is past the end of the file, return immediately. + if (i->uncompressed_size <= target) + return true; + + // Locate the Stream containing the target offset. + const index_stream *stream = index_tree_locate(&i->streams, target); + assert(stream != NULL); + target -= stream->node.uncompressed_base; + + // Locate the group containing the target offset. + const index_group *group = index_tree_locate(&stream->groups, target); + assert(group != NULL); + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sum is greater than target. + // This is because we want the rightmost Record that fullfills the + // search criterion. It is possible that there are empty Blocks; + // we don't want to return them. + size_t left = 0; + size_t right = group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (group->records[pos].uncompressed_sum <= target) + left = pos + 1; + else + right = pos; + } + + iter->internal[ITER_STREAM].p = stream; + iter->internal[ITER_GROUP].p = group; + iter->internal[ITER_RECORD].s = left; + + iter_set_info(iter); + + return false; +} diff --git a/liblzma/common/index.h b/liblzma/common/index.h new file mode 100644 index 0000000..64e9724 --- /dev/null +++ b/liblzma/common/index.h @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.h +/// \brief Handling of Index +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_H +#define LZMA_INDEX_H + +#include "common.h" + + +/// Minimum Unpadded Size +#define UNPADDED_SIZE_MIN LZMA_VLI_C(5) + +/// Maximum Unpadded Size +#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) + + +/// Get the size of the Index Padding field. This is needed by Index encoder +/// and decoder, but applications should have no use for this. +extern uint32_t lzma_index_padding_size(const lzma_index *i); + + +/// Set for how many Records to allocate memory the next time +/// lzma_index_append() needs to allocate space for a new Record. +/// This is used only by the Index decoder. +extern void lzma_index_prealloc(lzma_index *i, lzma_vli records); + + +/// Round the variable-length integer to the next multiple of four. +static inline lzma_vli +vli_ceil4(lzma_vli vli) +{ + assert(vli <= LZMA_VLI_MAX); + return (vli + 3) & ~LZMA_VLI_C(3); +} + + +/// Calculate the size of the Index field excluding Index Padding +static inline lzma_vli +index_size_unpadded(lzma_vli count, lzma_vli index_list_size) +{ + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + lzma_vli_size(count) + index_list_size + 4; +} + + +/// Calculate the size of the Index field including Index Padding +static inline lzma_vli +index_size(lzma_vli count, lzma_vli index_list_size) +{ + return vli_ceil4(index_size_unpadded(count, index_list_size)); +} + + +/// Calculate the total size of the Stream +static inline lzma_vli +index_stream_size(lzma_vli blocks_size, + lzma_vli count, lzma_vli index_list_size) +{ + return LZMA_STREAM_HEADER_SIZE + blocks_size + + index_size(count, index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + +#endif diff --git a/liblzma/common/index_decoder.c b/liblzma/common/index_decoder.c new file mode 100644 index 0000000..a6bc650 --- /dev/null +++ b/liblzma/common/index_decoder.c @@ -0,0 +1,344 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_MEMUSAGE, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Memory usage limit + uint64_t memlimit; + + /// Target Index + lzma_index *index; + + /// Pointer give by the application, which is set after + /// successful decoding. + lzma_index **index_ptr; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Unpadded Size field + lzma_vli unpadded_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + coder->pos = 0; + coder->sequence = SEQ_MEMUSAGE; + + // Fall through + + case SEQ_MEMUSAGE: + if (lzma_index_memusage(1, coder->count) > coder->memlimit) { + ret = LZMA_MEMLIMIT_ERROR; + goto out; + } + + // Tell the Index handling code how many Records this + // Index has to allow it to allocate memory more efficiently. + lzma_index_prealloc(coder->index, coder->count); + + ret = LZMA_OK; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + break; + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_UNPADDED + ? &coder->unpadded_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_UNPADDED) { + // Validate that encoded Unpadded Size isn't too small + // or too big. + if (coder->unpadded_size < UNPADDED_SIZE_MIN + || coder->unpadded_size + > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->unpadded_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_UNPADDED; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++coder->pos < 4); + + // Decoding was successful, now we can let the application + // see the decoded Index. + *coder->index_ptr = coder->index; + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + return ret; +} + + +static void +index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = lzma_index_memusage(1, coder->count); + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < *memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +static lzma_ret +index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit) +{ + // Remember the pointer given by the application. We will set it + // to point to the decoded Index only if decoding is successful. + // Before that, keep it NULL so that applications can always safely + // pass it to lzma_index_end() no matter did decoding succeed or not. + coder->index_ptr = i; + *i = NULL; + + // We always allocate a new lzma_index. + coder->index = lzma_index_init(allocator); + if (coder->index == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + coder->sequence = SEQ_INDICATOR; + coder->memlimit = memlimit; + coder->count = 0; // Needs to be initialized due to _memconfig(). + coder->pos = 0; + coder->crc32 = 0; + + return LZMA_OK; +} + + +static lzma_ret +index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i, uint64_t memlimit) +{ + lzma_next_coder_init(&index_decoder_init, next, allocator); + + if (i == NULL || memlimit == 0) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_decode; + next->end = &index_decoder_end; + next->memconfig = &index_decoder_memconfig; + next->coder->index = NULL; + } else { + lzma_index_end(next->coder->index, allocator); + } + + return index_decoder_reset(next->coder, allocator, i, memlimit); +} + + +extern LZMA_API(lzma_ret) +lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) +{ + lzma_next_strm_init(index_decoder_init, strm, i, memlimit); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_buffer_decode( + lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + // Sanity checks + if (i == NULL || memlimit == NULL + || in == NULL || in_pos == NULL || *in_pos > in_size) + return LZMA_PROG_ERROR; + + // Initialize the decoder. + lzma_coder coder; + return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); + + // Store the input start position so that we can restore it in case + // of an error. + const size_t in_start = *in_pos; + + // Do the actual decoding. + lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, + NULL, NULL, 0, LZMA_RUN); + + if (ret == LZMA_STREAM_END) { + ret = LZMA_OK; + } else { + // Something went wrong, free the Index structure and restore + // the input position. + lzma_index_end(coder.index, allocator); + *in_pos = in_start; + + if (ret == LZMA_OK) { + // The input is truncated or otherwise corrupt. + // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR + // like lzma_vli_decode() does in single-call mode. + ret = LZMA_DATA_ERROR; + + } else if (ret == LZMA_MEMLIMIT_ERROR) { + // Tell the caller how much memory would have + // been needed. + *memlimit = lzma_index_memusage(1, coder.count); + } + } + + return ret; +} diff --git a/liblzma/common/index_hash.c b/liblzma/common/index_hash.c new file mode 100644 index 0000000..e3e9386 --- /dev/null +++ b/liblzma/common/index_hash.c @@ -0,0 +1,332 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_hash.c +/// \brief Validates Index by using a hash function +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "check.h" + + +typedef struct { + /// Sum of the Block sizes (including Block Padding) + lzma_vli blocks_size; + + /// Sum of the Uncompressed Size fields + lzma_vli uncompressed_size; + + /// Number of Records + lzma_vli count; + + /// Size of the List of Index Records as bytes + lzma_vli index_list_size; + + /// Check calculated from Unpadded Sizes and Uncompressed Sizes. + lzma_check_state check; + +} lzma_index_hash_info; + + +struct lzma_index_hash_s { + enum { + SEQ_BLOCK, + SEQ_COUNT, + SEQ_UNPADDED, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Information collected while decoding the actual Blocks. + lzma_index_hash_info blocks; + + /// Information collected from the Index field. + lzma_index_hash_info records; + + /// Number of Records not fully decoded + lzma_vli remaining; + + /// Unpadded Size currently being read from an Index Record. + lzma_vli unpadded_size; + + /// Uncompressed Size currently being read from an Index Record. + lzma_vli uncompressed_size; + + /// Position in variable-length integers when decoding them from + /// the List of Records. + size_t pos; + + /// CRC32 of the Index + uint32_t crc32; +}; + + +extern LZMA_API(lzma_index_hash *) +lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + if (index_hash == NULL) { + index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); + if (index_hash == NULL) + return NULL; + } + + index_hash->sequence = SEQ_BLOCK; + index_hash->blocks.blocks_size = 0; + index_hash->blocks.uncompressed_size = 0; + index_hash->blocks.count = 0; + index_hash->blocks.index_list_size = 0; + index_hash->records.blocks_size = 0; + index_hash->records.uncompressed_size = 0; + index_hash->records.count = 0; + index_hash->records.index_list_size = 0; + index_hash->unpadded_size = 0; + index_hash->uncompressed_size = 0; + index_hash->pos = 0; + index_hash->crc32 = 0; + + // These cannot fail because LZMA_CHECK_BEST is known to be supported. + (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); + (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); + + return index_hash; +} + + +extern LZMA_API(void) +lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + lzma_free(index_hash, allocator); + return; +} + + +extern LZMA_API(lzma_vli) +lzma_index_hash_size(const lzma_index_hash *index_hash) +{ + // Get the size of the Index from ->blocks instead of ->records for + // cases where application wants to know the Index Size before + // decoding the Index. + return index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size); +} + + +/// Updates the sizes and the hash without any validation. +static lzma_ret +hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, + lzma_vli uncompressed_size) +{ + info->blocks_size += vli_ceil4(unpadded_size); + info->uncompressed_size += uncompressed_size; + info->index_list_size += lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); + ++info->count; + + const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; + lzma_check_update(&info->check, LZMA_CHECK_BEST, + (const uint8_t *)(sizes), sizeof(sizes)); + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, + lzma_vli uncompressed_size) +{ + // Validate the arguments. + if (index_hash->sequence != SEQ_BLOCK + || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX + || uncompressed_size > LZMA_VLI_MAX) + return LZMA_PROG_ERROR; + + // Update the hash. + return_if_error(hash_append(&index_hash->blocks, + unpadded_size, uncompressed_size)); + + // Validate the properties of *info are still in allowed limits. + if (index_hash->blocks.blocks_size > LZMA_VLI_MAX + || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX + || index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_BACKWARD_SIZE_MAX + || index_stream_size(index_hash->blocks.blocks_size, + index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + // Catch zero input buffer here, because in contrast to Index encoder + // and decoder functions, applications call this function directly + // instead of via lzma_code(), which does the buffer checking. + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + // NOTE: This function has many similarities to index_encode() and + // index_decode() functions found from index_encoder.c and + // index_decoder.c. See the comments especially in index_encoder.c. + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (index_hash->sequence) { + case SEQ_BLOCK: + // Check the Index Indicator is present. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&index_hash->remaining, + &index_hash->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + // The count must match the count of the Blocks decoded. + if (index_hash->remaining != index_hash->blocks.count) + return LZMA_DATA_ERROR; + + ret = LZMA_OK; + index_hash->pos = 0; + + // Handle the special case when there are no Blocks. + index_hash->sequence = index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + break; + } + + case SEQ_UNPADDED: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = index_hash->sequence == SEQ_UNPADDED + ? &index_hash->unpadded_size + : &index_hash->uncompressed_size; + + ret = lzma_vli_decode(size, &index_hash->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + index_hash->pos = 0; + + if (index_hash->sequence == SEQ_UNPADDED) { + if (index_hash->unpadded_size < UNPADDED_SIZE_MIN + || index_hash->unpadded_size + > UNPADDED_SIZE_MAX) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_UNCOMPRESSED; + } else { + // Update the hash. + return_if_error(hash_append(&index_hash->records, + index_hash->unpadded_size, + index_hash->uncompressed_size)); + + // Verify that we don't go over the known sizes. Note + // that this validation is simpler than the one used + // in lzma_index_hash_append(), because here we know + // that values in index_hash->blocks are already + // validated and we are fine as long as we don't + // exceed them in index_hash->records. + if (index_hash->blocks.blocks_size + < index_hash->records.blocks_size + || index_hash->blocks.uncompressed_size + < index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + < index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Check if this was the last Record. + index_hash->sequence = --index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_UNPADDED; + } + + break; + } + + case SEQ_PADDING_INIT: + index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( + index_hash->records.count, + index_hash->records.index_list_size)) & 3; + index_hash->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (index_hash->pos > 0) { + --index_hash->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Compare the sizes. + if (index_hash->blocks.blocks_size + != index_hash->records.blocks_size + || index_hash->blocks.uncompressed_size + != index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + != index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Finish the hashes and compare them. + lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); + lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); + if (memcmp(index_hash->blocks.check.buffer.u8, + index_hash->records.check.buffer.u8, + lzma_check_size(LZMA_CHECK_BEST)) != 0) + return LZMA_DATA_ERROR; + + // Finish the CRC32 calculation. + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + index_hash->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((index_hash->crc32 >> (index_hash->pos * 8)) + & 0xFF) != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++index_hash->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + return ret; +} diff --git a/liblzma/common/mythread.h b/liblzma/common/mythread.h new file mode 100644 index 0000000..476c2fc --- /dev/null +++ b/liblzma/common/mythread.h @@ -0,0 +1,42 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mythread.h +/// \brief Wrappers for threads +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" + + +#ifdef HAVE_PTHREAD +# include + +# define mythread_once(func) \ + do { \ + static pthread_once_t once_ = PTHREAD_ONCE_INIT; \ + pthread_once(&once_, &func); \ + } while (0) + +# define mythread_sigmask(how, set, oset) \ + pthread_sigmask(how, set, oset) + +#else + +# define mythread_once(func) \ + do { \ + static bool once_ = false; \ + if (!once_) { \ + func(); \ + once_ = true; \ + } \ + } while (0) + +# define mythread_sigmask(how, set, oset) \ + sigprocmask(how, set, oset) + +#endif diff --git a/liblzma/common/stream_decoder.c b/liblzma/common/stream_decoder.c new file mode 100644 index 0000000..d8c45ff --- /dev/null +++ b/liblzma/common/stream_decoder.c @@ -0,0 +1,458 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.c +/// \brief Decodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_decoder.h" +#include "block_decoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, + SEQ_STREAM_PADDING, + } sequence; + + /// Block or Metadata decoder. This takes little memory and the same + /// data structure can be used to decode every Block Header, so it's + /// a good idea to have a separate lzma_next_coder structure for it. + lzma_next_coder block_decoder; + + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. + lzma_block block_options; + + /// Stream Flags from Stream Header + lzma_stream_flags stream_flags; + + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; + + /// Memory usage limit + uint64_t memlimit; + + /// Amount of memory actually needed (only an estimate) + uint64_t memusage; + + /// If true, LZMA_NO_CHECK is returned if the Stream has + /// no integrity check. + bool tell_no_check; + + /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has + /// an integrity check that isn't supported by this liblzma build. + bool tell_unsupported_check; + + /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. + bool tell_any_check; + + /// If true, we will decode concatenated Streams that possibly have + /// Stream Padding between or after them. LZMA_STREAM_END is returned + /// once the application isn't giving us any new input, and we aren't + /// in the middle of a Stream, and possible Stream Padding is a + /// multiple of four bytes. + bool concatenated; + + /// When decoding concatenated Streams, this is true as long as we + /// are decoding the first Stream. This is needed to avoid misleading + /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic + /// bytes. + bool first_stream; + + /// Write position in buffer[] and position in Stream Padding + size_t pos; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +stream_decoder_reset(lzma_coder *coder, lzma_allocator *allocator) +{ + // Initialize the Index hash used to verify the Index. + coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); + if (coder->index_hash == NULL) + return LZMA_MEM_ERROR; + + // Reset the rest of the variables. + coder->sequence = SEQ_STREAM_HEADER; + coder->pos = 0; + + return LZMA_OK; +} + + +static lzma_ret +stream_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. + while (true) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Header. + const lzma_ret ret = lzma_stream_header_decode( + &coder->stream_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR && !coder->first_stream + ? LZMA_DATA_ERROR : ret; + + // If we are decoding concatenated Streams, and the later + // Streams have invalid Header Magic Bytes, we give + // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. + coder->first_stream = false; + + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_*_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; + + // Detect if there's no integrity check or if it is + // unsupported if those were requested by the application. + if (coder->tell_no_check && coder->stream_flags.check + == LZMA_CHECK_NONE) + return LZMA_NO_CHECK; + + if (coder->tell_unsupported_check + && !lzma_check_is_supported( + coder->stream_flags.check)) + return LZMA_UNSUPPORTED_CHECK; + + if (coder->tell_any_check) + return LZMA_GET_CHECK; + } + + // Fall through + + case SEQ_BLOCK_HEADER: { + if (*in_pos >= in_size) + return LZMA_OK; + + if (coder->pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == 0x00) { + coder->sequence = SEQ_INDEX; + break; + } + + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); + } + + // Copy the Block Header to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + coder->block_options.header_size); + + // Return if we didn't get the whole Block Header yet. + if (coder->pos < coder->block_options.header_size) + return LZMA_OK; + + coder->pos = 0; + + // Version 0 is currently the only possible version. + coder->block_options.version = 0; + + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + coder->block_options.filters = filters; + + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); + + // Check the memory usage limit. + const uint64_t memusage = lzma_raw_decoder_memusage(filters); + lzma_ret ret; +#if __MOJOSETUP__ + size_t i; +#endif + + if (memusage == UINT64_MAX) { + // One or more unknown Filter IDs. + ret = LZMA_OPTIONS_ERROR; + } else { + // Now we can set coder->memusage since we know that + // the filter chain is valid. We don't want + // lzma_memusage() to return UINT64_MAX in case of + // invalid filter chain. + coder->memusage = memusage; + + if (memusage > coder->memlimit) { + // The chain would need too much memory. + ret = LZMA_MEMLIMIT_ERROR; + } else { + // Memory usage is OK. + // Initialize the Block decoder. + ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, + &coder->block_options); + } + } + + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. +#if __MOJOSETUP__ + for (i = 0; i < LZMA_FILTERS_MAX; ++i) +#else + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) +#endif + lzma_free(filters[i].options, allocator); + + coder->block_options.filters = NULL; + + // Check if memory usage calculation and Block enocoder + // initialization succeeded. + if (ret != LZMA_OK) + return ret; + + coder->sequence = SEQ_BLOCK; + } + + // Fall through + + case SEQ_BLOCK: { + const lzma_ret ret = coder->block_decoder.code( + coder->block_decoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + if (ret != LZMA_STREAM_END) + return ret; + + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_unpadded_size( + &coder->block_options), + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_INDEX: { + // If we don't have any input, don't call + // lzma_index_hash_decode() since it would return + // LZMA_BUF_ERROR, which we must not do here. + if (*in_pos >= in_size) + return LZMA_OK; + + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + return ret; + + coder->sequence = SEQ_STREAM_FOOTER; + } + + // Fall through + + case SEQ_STREAM_FOOTER: { + // Copy the Stream Footer to the internal buffer. + lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Footer yet. + if (coder->pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->pos = 0; + + // Decode the Stream Footer. The decoder gives + // LZMA_FORMAT_ERROR if the magic bytes don't match, + // so convert that return code to LZMA_DATA_ERROR. + lzma_stream_flags footer_flags; + const lzma_ret ret = lzma_stream_footer_decode( + &footer_flags, coder->buffer); + if (ret != LZMA_OK) + return ret == LZMA_FORMAT_ERROR + ? LZMA_DATA_ERROR : ret; + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + return_if_error(lzma_stream_flags_compare( + &coder->stream_flags, &footer_flags)); + + if (!coder->concatenated) + return LZMA_STREAM_END; + + coder->sequence = SEQ_STREAM_PADDING; + } + + // Fall through + + case SEQ_STREAM_PADDING: + assert(coder->concatenated); + + // Skip over possible Stream Padding. + while (true) { + if (*in_pos >= in_size) { + // Unless LZMA_FINISH was used, we cannot + // know if there's more input coming later. + if (action != LZMA_FINISH) + return LZMA_OK; + + // Stream Padding must be a multiple of + // four bytes. + return coder->pos == 0 + ? LZMA_STREAM_END + : LZMA_DATA_ERROR; + } + + // If the byte is not zero, it probably indicates + // beginning of a new Stream (or the file is corrupt). + if (in[*in_pos] != 0x00) + break; + + ++*in_pos; + coder->pos = (coder->pos + 1) & 3; + } + + // Stream Padding must be a multiple of four bytes (empty + // Stream Padding is OK). + if (coder->pos != 0) { + ++*in_pos; + return LZMA_DATA_ERROR; + } + + // Prepare to decode the next Stream. + return_if_error(stream_decoder_reset(coder, allocator)); + break; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + // Never reached +} + + +static void +stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->block_decoder, allocator); + lzma_index_hash_end(coder->index_hash, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_check +stream_decoder_get_check(const lzma_coder *coder) +{ + return coder->stream_flags.check; +} + + +static lzma_ret +stream_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, + uint64_t *old_memlimit, uint64_t new_memlimit) +{ + *memusage = coder->memusage; + *old_memlimit = coder->memlimit; + + if (new_memlimit != 0) { + if (new_memlimit < coder->memusage) + return LZMA_MEMLIMIT_ERROR; + + coder->memlimit = new_memlimit; + } + + return LZMA_OK; +} + + +extern lzma_ret +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit, uint32_t flags) +{ + lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator); + + if (memlimit == 0) + return LZMA_PROG_ERROR; + + if (flags & ~LZMA_SUPPORTED_FLAGS) + return LZMA_OPTIONS_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_decode; + next->end = &stream_decoder_end; + next->get_check = &stream_decoder_get_check; + next->memconfig = &stream_decoder_memconfig; + + next->coder->block_decoder = LZMA_NEXT_CODER_INIT; + next->coder->index_hash = NULL; + } + + next->coder->memlimit = memlimit; + next->coder->memusage = LZMA_MEMUSAGE_BASE; + next->coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0; + next->coder->tell_unsupported_check + = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; + next->coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; + next->coder->concatenated = (flags & LZMA_CONCATENATED) != 0; + next->coder->first_stream = true; + + return stream_decoder_reset(next->coder, allocator); +} + + +extern LZMA_API(lzma_ret) +lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) +{ + lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/liblzma/common/stream_decoder.h b/liblzma/common/stream_decoder.h new file mode 100644 index 0000000..e54ac28 --- /dev/null +++ b/liblzma/common/stream_decoder.h @@ -0,0 +1,21 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.h +/// \brief Decodes .xz Streams +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_DECODER_H +#define LZMA_STREAM_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_stream_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, uint64_t memlimit, uint32_t flags); + +#endif diff --git a/liblzma/common/stream_flags_common.c b/liblzma/common/stream_flags_common.c new file mode 100644 index 0000000..fbe8eb8 --- /dev/null +++ b/liblzma/common/stream_flags_common.c @@ -0,0 +1,47 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_common.c +/// \brief Common stuff for Stream flags coders +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +const uint8_t lzma_header_magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; +const uint8_t lzma_footer_magic[2] = { 0x59, 0x5A }; + + +extern LZMA_API(lzma_ret) +lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) +{ + // We can compare only version 0 structures. + if (a->version != 0 || b->version != 0) + return LZMA_OPTIONS_ERROR; + + // Check type + if ((unsigned int)(a->check) > LZMA_CHECK_ID_MAX + || (unsigned int)(b->check) > LZMA_CHECK_ID_MAX) + return LZMA_PROG_ERROR; + + if (a->check != b->check) + return LZMA_DATA_ERROR; + + // Backward Sizes are compared only if they are known in both. + if (a->backward_size != LZMA_VLI_UNKNOWN + && b->backward_size != LZMA_VLI_UNKNOWN) { + if (!is_backward_size_valid(a) || !is_backward_size_valid(b)) + return LZMA_PROG_ERROR; + + if (a->backward_size != b->backward_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} diff --git a/liblzma/common/stream_flags_common.h b/liblzma/common/stream_flags_common.h new file mode 100644 index 0000000..9f3122a --- /dev/null +++ b/liblzma/common/stream_flags_common.h @@ -0,0 +1,33 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_common.h +/// \brief Common stuff for Stream flags coders +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_FLAGS_COMMON_H +#define LZMA_STREAM_FLAGS_COMMON_H + +#include "common.h" + +/// Size of the Stream Flags field +#define LZMA_STREAM_FLAGS_SIZE 2 + +extern const uint8_t lzma_header_magic[6]; +extern const uint8_t lzma_footer_magic[2]; + + +static inline bool +is_backward_size_valid(const lzma_stream_flags *options) +{ + return options->backward_size >= LZMA_BACKWARD_SIZE_MIN + && options->backward_size <= LZMA_BACKWARD_SIZE_MAX + && (options->backward_size & 3) == 0; +} + +#endif diff --git a/liblzma/common/stream_flags_decoder.c b/liblzma/common/stream_flags_decoder.c new file mode 100644 index 0000000..1bc2f97 --- /dev/null +++ b/liblzma/common/stream_flags_decoder.c @@ -0,0 +1,82 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_decoder.c +/// \brief Decodes Stream Header and Stream Footer from .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_flags_common.h" + + +static bool +stream_flags_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Reserved bits must be unset. + if (in[0] != 0x00 || (in[1] & 0xF0)) + return true; + + options->version = 0; + options->check = in[1] & 0x0F; + + return false; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Magic + if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // Verify the CRC32 so we can distinguish between corrupt + // and unsupported files. + const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != unaligned_read32le(in + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(lzma_header_magic))) + return LZMA_OPTIONS_ERROR; + + // Set Backward Size to indicate unknown value. That way + // lzma_stream_flags_compare() can be used to compare Stream Header + // and Stream Footer while keeping it useful also for comparing + // two Stream Footers. + options->backward_size = LZMA_VLI_UNKNOWN; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) +{ + // Magic + if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), + sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != unaligned_read32le(in)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(uint32_t) * 2)) + return LZMA_OPTIONS_ERROR; + + // Backward Size + options->backward_size = unaligned_read32le(in + sizeof(uint32_t)); + options->backward_size = (options->backward_size + 1) * 4; + + return LZMA_OK; +} diff --git a/liblzma/common/sysdefs.h b/liblzma/common/sysdefs.h new file mode 100644 index 0000000..c6cb2d0 --- /dev/null +++ b/liblzma/common/sysdefs.h @@ -0,0 +1,184 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sysdefs.h +/// \brief Common includes, definitions, system-specific things etc. +/// +/// This file is used also by the lzma command line tool, that's why this +/// file is separate from common.h. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SYSDEFS_H +#define LZMA_SYSDEFS_H + +#if __MOJOSETUP__ + #ifndef _MSC_VER + #define HAVE_STDINT_H 1 + #endif + #define HAVE_INTTYPES_H 1 + #define HAVE_LIMITS_H 1 + #define HAVE_STRING_H 1 + #define HAVE_STDBOOL_H 1 +#endif + +////////////// +// Includes // +////////////// + +#ifdef HAVE_CONFIG_H +# include +#endif + +// Get standard-compliant stdio functions under MinGW and MinGW-w64. +#ifdef __MINGW32__ +# define __USE_MINGW_ANSI_STDIO 1 +#endif + +// size_t and NULL +#include + +#ifdef HAVE_INTTYPES_H +# include +#endif + +// C99 says that inttypes.h always includes stdint.h, but some systems +// don't do that, and require including stdint.h separately. +#ifdef HAVE_STDINT_H +# include +#endif + +// Some pre-C99 systems have SIZE_MAX in limits.h instead of stdint.h. The +// limits are also used to figure out some macros missing from pre-C99 systems. +#ifdef HAVE_LIMITS_H +# include +#endif + +// Be more compatible with systems that have non-conforming inttypes.h. +// We assume that int is 32-bit and that long is either 32-bit or 64-bit. +// Full Autoconf test could be more correct, but this should work well enough. +// Note that this duplicates some code from lzma.h, but this is better since +// we can work without inttypes.h thanks to Autoconf tests. +#ifndef UINT32_C +# if UINT_MAX != 4294967295U +# error UINT32_C is not defined and unsigned int is not 32-bit. +# endif +# define UINT32_C(n) n ## U +#endif +#ifndef UINT32_MAX +# define UINT32_MAX UINT32_C(4294967295) +#endif +#ifndef PRIu32 +# define PRIu32 "u" +#endif +#ifndef PRIX32 +# define PRIX32 "X" +#endif + +#if ULONG_MAX == 4294967295UL +# ifndef UINT64_C +# define UINT64_C(n) n ## ULL +# endif +# ifndef PRIu64 +# define PRIu64 "llu" +# endif +# ifndef PRIX64 +# define PRIX64 "llX" +# endif +#else +# ifndef UINT64_C +# define UINT64_C(n) n ## UL +# endif +# ifndef PRIu64 +# define PRIu64 "lu" +# endif +# ifndef PRIX64 +# define PRIX64 "lX" +# endif +#endif +#ifndef UINT64_MAX +# define UINT64_MAX UINT64_C(18446744073709551615) +#endif + +// Interix has broken header files, which typedef size_t to unsigned long, +// but a few lines later define SIZE_MAX to INT32_MAX. +#ifdef __INTERIX +# undef SIZE_MAX +#endif + +// The code currently assumes that size_t is either 32-bit or 64-bit. +#ifndef SIZE_MAX +# if SIZEOF_SIZE_T == 4 +# define SIZE_MAX UINT32_MAX +# elif SIZEOF_SIZE_T == 8 +# define SIZE_MAX UINT64_MAX +# else +# error size_t is not 32-bit or 64-bit +# endif +#endif +#if SIZE_MAX != UINT32_MAX && SIZE_MAX != UINT64_MAX +# error size_t is not 32-bit or 64-bit +#endif + +#include +#include + +// Pre-C99 systems lack stdbool.h. All the code in LZMA Utils must be written +// so that it works with fake bool type, for example: +// +// bool foo = (flags & 0x100) != 0; +// bool bar = !!(flags & 0x100); +// +// This works with the real C99 bool but breaks with fake bool: +// +// bool baz = (flags & 0x100); +// +#ifdef HAVE_STDBOOL_H +# include +#else +# if ! HAVE__BOOL +typedef unsigned char _Bool; +# endif +# define bool _Bool +# define false 0 +# define true 1 +# define __bool_true_false_are_defined 1 +#endif + +// string.h should be enough but let's include strings.h and memory.h too if +// they exists, since that shouldn't do any harm, but may improve portability. +#ifdef HAVE_STRING_H +# include +#endif + +#ifdef HAVE_STRINGS_H +# include +#endif + +#ifdef HAVE_MEMORY_H +# include +#endif + + +//////////// +// Macros // +//////////// + +#undef memzero +#define memzero(s, n) memset(s, 0, n) + +// NOTE: Avoid using MIN() and MAX(), because even conditionally defining +// those macros can cause some portability trouble, since on some systems +// the system headers insist defining their own versions. +#define my_min(x, y) ((x) < (y) ? (x) : (y)) +#define my_max(x, y) ((x) > (y) ? (x) : (y)) + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) +#endif + +#endif diff --git a/liblzma/common/tuklib_common.h b/liblzma/common/tuklib_common.h new file mode 100644 index 0000000..31fbab5 --- /dev/null +++ b/liblzma/common/tuklib_common.h @@ -0,0 +1,71 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_common.h +/// \brief Common definitions for tuklib modules +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_COMMON_H +#define TUKLIB_COMMON_H + +// The config file may be replaced by a package-specific file. +// It should include at least stddef.h, inttypes.h, and limits.h. +#include "tuklib_config.h" + +// TUKLIB_SYMBOL_PREFIX is prefixed to all symbols exported by +// the tuklib modules. If you use a tuklib module in a library, +// you should use TUKLIB_SYMBOL_PREFIX to make sure that there +// are no symbol conflicts in case someone links your library +// into application that also uses the same tuklib module. +#ifndef TUKLIB_SYMBOL_PREFIX +# define TUKLIB_SYMBOL_PREFIX +#endif + +#define TUKLIB_CAT_X(a, b) a ## b +#define TUKLIB_CAT(a, b) TUKLIB_CAT_X(a, b) + +#ifndef TUKLIB_SYMBOL +# define TUKLIB_SYMBOL(sym) TUKLIB_CAT(TUKLIB_SYMBOL_PREFIX, sym) +#endif + +#ifndef TUKLIB_DECLS_BEGIN +# ifdef __cplusplus +# define TUKLIB_DECLS_BEGIN extern "C" { +# else +# define TUKLIB_DECLS_BEGIN +# endif +#endif + +#ifndef TUKLIB_DECLS_END +# ifdef __cplusplus +# define TUKLIB_DECLS_END } +# else +# define TUKLIB_DECLS_END +# endif +#endif + +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define TUKLIB_GNUC_REQ(major, minor) \ + ((__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) \ + || __GNUC__ > (major)) +#else +# define TUKLIB_GNUC_REQ(major, minor) 0 +#endif + +#if TUKLIB_GNUC_REQ(2, 5) +# define tuklib_attr_noreturn __attribute__((__noreturn__)) +#else +# define tuklib_attr_noreturn +#endif + +#if (defined(_WIN32) && !defined(__CYGWIN__)) \ + || defined(__OS2__) || defined(__MSDOS__) +# define TUKLIB_DOSLIKE 1 +#endif + +#endif diff --git a/liblzma/common/tuklib_config.h b/liblzma/common/tuklib_config.h new file mode 100644 index 0000000..549cb24 --- /dev/null +++ b/liblzma/common/tuklib_config.h @@ -0,0 +1,7 @@ +#ifdef HAVE_CONFIG_H +# include "sysdefs.h" +#else +# include +# include +# include +#endif diff --git a/liblzma/common/tuklib_integer.h b/liblzma/common/tuklib_integer.h new file mode 100644 index 0000000..e6daa77 --- /dev/null +++ b/liblzma/common/tuklib_integer.h @@ -0,0 +1,523 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file tuklib_integer.h +/// \brief Various integer and bit operations +/// +/// This file provides macros or functions to do some basic integer and bit +/// operations. +/// +/// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l): +/// - Byte swapping: bswapXX(num) +/// - Byte order conversions to/from native: convXXYe(num) +/// - Aligned reads: readXXYe(ptr) +/// - Aligned writes: writeXXYe(ptr, num) +/// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr) +/// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num) +/// +/// Since they can macros, the arguments should have no side effects since +/// they may be evaluated more than once. +/// +/// \todo PowerPC and possibly some other architectures support +/// byte swapping load and store instructions. This file +/// doesn't take advantage of those instructions. +/// +/// Bit scan operations for non-zero 32-bit integers: +/// - Bit scan reverse (find highest non-zero bit): bsr32(num) +/// - Count leading zeros: clz32(num) +/// - Count trailing zeros: ctz32(num) +/// - Bit scan forward (simply an alias for ctz32()): bsf32(num) +/// +/// The above bit scan operations return 0-31. If num is zero, +/// the result is undefined. +// +// Authors: Lasse Collin +// Joachim Henke +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef TUKLIB_INTEGER_H +#define TUKLIB_INTEGER_H + +#include "tuklib_common.h" + + +//////////////////////////////////////// +// Operating system specific features // +//////////////////////////////////////// + +#if defined(HAVE_BYTESWAP_H) + // glibc, uClibc, dietlibc +# include +# ifdef HAVE_BSWAP_16 +# define bswap16(num) bswap_16(num) +# endif +# ifdef HAVE_BSWAP_32 +# define bswap32(num) bswap_32(num) +# endif +# ifdef HAVE_BSWAP_64 +# define bswap64(num) bswap_64(num) +# endif + +#elif defined(HAVE_SYS_ENDIAN_H) + // *BSDs and Darwin +# include + +#elif defined(HAVE_SYS_BYTEORDER_H) + // Solaris +# include +# ifdef BSWAP_16 +# define bswap16(num) BSWAP_16(num) +# endif +# ifdef BSWAP_32 +# define bswap32(num) BSWAP_32(num) +# endif +# ifdef BSWAP_64 +# define bswap64(num) BSWAP_64(num) +# endif +# ifdef BE_16 +# define conv16be(num) BE_16(num) +# endif +# ifdef BE_32 +# define conv32be(num) BE_32(num) +# endif +# ifdef BE_64 +# define conv64be(num) BE_64(num) +# endif +# ifdef LE_16 +# define conv16le(num) LE_16(num) +# endif +# ifdef LE_32 +# define conv32le(num) LE_32(num) +# endif +# ifdef LE_64 +# define conv64le(num) LE_64(num) +# endif +#endif + + +/////////////////// +// Byte swapping // +/////////////////// + +#ifndef bswap16 +# define bswap16(num) \ + (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8)) +#endif + +#ifndef bswap32 +# define bswap32(num) \ + ( (((uint32_t)(num) << 24) ) \ + | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \ + | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \ + | (((uint32_t)(num) >> 24) ) ) +#endif + +#ifndef bswap64 +# define bswap64(num) \ + ( (((uint64_t)(num) << 56) ) \ + | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \ + | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \ + | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \ + | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \ + | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \ + | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \ + | (((uint64_t)(num) >> 56) ) ) +#endif + +// Define conversion macros using the basic byte swapping macros. +#ifdef WORDS_BIGENDIAN +# ifndef conv16be +# define conv16be(num) ((uint16_t)(num)) +# endif +# ifndef conv32be +# define conv32be(num) ((uint32_t)(num)) +# endif +# ifndef conv64be +# define conv64be(num) ((uint64_t)(num)) +# endif +# ifndef conv16le +# define conv16le(num) bswap16(num) +# endif +# ifndef conv32le +# define conv32le(num) bswap32(num) +# endif +# ifndef conv64le +# define conv64le(num) bswap64(num) +# endif +#else +# ifndef conv16be +# define conv16be(num) bswap16(num) +# endif +# ifndef conv32be +# define conv32be(num) bswap32(num) +# endif +# ifndef conv64be +# define conv64be(num) bswap64(num) +# endif +# ifndef conv16le +# define conv16le(num) ((uint16_t)(num)) +# endif +# ifndef conv32le +# define conv32le(num) ((uint32_t)(num)) +# endif +# ifndef conv64le +# define conv64le(num) ((uint64_t)(num)) +# endif +#endif + + +////////////////////////////// +// Aligned reads and writes // +////////////////////////////// + +static inline uint16_t +read16be(const uint8_t *buf) +{ + uint16_t num = *(const uint16_t *)buf; + return conv16be(num); +} + + +static inline uint16_t +read16le(const uint8_t *buf) +{ + uint16_t num = *(const uint16_t *)buf; + return conv16le(num); +} + + +static inline uint32_t +read32be(const uint8_t *buf) +{ + uint32_t num = *(const uint32_t *)buf; + return conv32be(num); +} + + +static inline uint32_t +read32le(const uint8_t *buf) +{ + uint32_t num = *(const uint32_t *)buf; + return conv32le(num); +} + + +static inline uint64_t +read64be(const uint8_t *buf) +{ + uint64_t num = *(const uint64_t *)buf; + return conv64be(num); +} + + +static inline uint64_t +read64le(const uint8_t *buf) +{ + uint64_t num = *(const uint64_t *)buf; + return conv64le(num); +} + + +// NOTE: Possible byte swapping must be done in a macro to allow GCC +// to optimize byte swapping of constants when using glibc's or *BSD's +// byte swapping macros. The actual write is done in an inline function +// to make type checking of the buf pointer possible similarly to readXXYe() +// functions. + +#define write16be(buf, num) write16ne((buf), conv16be(num)) +#define write16le(buf, num) write16ne((buf), conv16le(num)) +#define write32be(buf, num) write32ne((buf), conv32be(num)) +#define write32le(buf, num) write32ne((buf), conv32le(num)) +#define write64be(buf, num) write64ne((buf), conv64be(num)) +#define write64le(buf, num) write64ne((buf), conv64le(num)) + + +static inline void +write16ne(uint8_t *buf, uint16_t num) +{ + *(uint16_t *)buf = num; + return; +} + + +static inline void +write32ne(uint8_t *buf, uint32_t num) +{ + *(uint32_t *)buf = num; + return; +} + + +static inline void +write64ne(uint8_t *buf, uint64_t num) +{ + *(uint64_t *)buf = num; + return; +} + + +//////////////////////////////// +// Unaligned reads and writes // +//////////////////////////////// + +// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and +// 32-bit unaligned integer loads and stores. It's possible that 64-bit +// unaligned access doesn't work or is slower than byte-by-byte access. +// Since unaligned 64-bit is probably not needed as often as 16-bit or +// 32-bit, we simply don't support 64-bit unaligned access for now. +#ifdef TUKLIB_FAST_UNALIGNED_ACCESS +# define unaligned_read16be read16be +# define unaligned_read16le read16le +# define unaligned_read32be read32be +# define unaligned_read32le read32le +# define unaligned_write16be write16be +# define unaligned_write16le write16le +# define unaligned_write32be write32be +# define unaligned_write32le write32le + +#else + +static inline uint16_t +unaligned_read16be(const uint8_t *buf) +{ + uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; + return num; +} + + +static inline uint16_t +unaligned_read16le(const uint8_t *buf) +{ + uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); + return num; +} + + +static inline uint32_t +unaligned_read32be(const uint8_t *buf) +{ + uint32_t num = (uint32_t)buf[0] << 24; + num |= (uint32_t)buf[1] << 16; + num |= (uint32_t)buf[2] << 8; + num |= (uint32_t)buf[3]; + return num; +} + + +static inline uint32_t +unaligned_read32le(const uint8_t *buf) +{ + uint32_t num = (uint32_t)buf[0]; + num |= (uint32_t)buf[1] << 8; + num |= (uint32_t)buf[2] << 16; + num |= (uint32_t)buf[3] << 24; + return num; +} + + +static inline void +unaligned_write16be(uint8_t *buf, uint16_t num) +{ + buf[0] = num >> 8; + buf[1] = num; + return; +} + + +static inline void +unaligned_write16le(uint8_t *buf, uint16_t num) +{ + buf[0] = num; + buf[1] = num >> 8; + return; +} + + +static inline void +unaligned_write32be(uint8_t *buf, uint32_t num) +{ + buf[0] = num >> 24; + buf[1] = num >> 16; + buf[2] = num >> 8; + buf[3] = num; + return; +} + + +static inline void +unaligned_write32le(uint8_t *buf, uint32_t num) +{ + buf[0] = num; + buf[1] = num >> 8; + buf[2] = num >> 16; + buf[3] = num >> 24; + return; +} + +#endif + + +static inline uint32_t +bsr32(uint32_t n) +{ + // Check for ICC first, since it tends to define __GNUC__ too. +#if defined(__INTEL_COMPILER) + return _bit_scan_reverse(n); + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX + // GCC >= 3.4 has __builtin_clz(), which gives good results on + // multiple architectures. On x86, __builtin_clz() ^ 31U becomes + // either plain BSR (so the XOR gets optimized away) or LZCNT and + // XOR (if -march indicates that SSE4a instructions are supported). + return __builtin_clz(n) ^ 31U; + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + // MSVC isn't supported by tuklib, but since this code exists, + // it doesn't hurt to have it here anyway. + uint32_t i; + _BitScanReverse((DWORD *)&i, n); + return i; + +#else + uint32_t i = 31; + + if ((n & UINT32_C(0xFFFF0000)) == 0) { + n <<= 16; + i = 15; + } + + if ((n & UINT32_C(0xFF000000)) == 0) { + n <<= 8; + i -= 8; + } + + if ((n & UINT32_C(0xF0000000)) == 0) { + n <<= 4; + i -= 4; + } + + if ((n & UINT32_C(0xC0000000)) == 0) { + n <<= 2; + i -= 2; + } + + if ((n & UINT32_C(0x80000000)) == 0) + --i; + + return i; +#endif +} + + +static inline uint32_t +clz32(uint32_t n) +{ +#if defined(__INTEL_COMPILER) + return _bit_scan_reverse(n) ^ 31U; + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX + return __builtin_clz(n); + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsrl %1, %0\n\t" + "xorl $31, %0" + : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + uint32_t i; + _BitScanReverse((DWORD *)&i, n); + return i ^ 31U; + +#else + uint32_t i = 0; + + if ((n & UINT32_C(0xFFFF0000)) == 0) { + n <<= 16; + i = 16; + } + + if ((n & UINT32_C(0xFF000000)) == 0) { + n <<= 8; + i += 8; + } + + if ((n & UINT32_C(0xF0000000)) == 0) { + n <<= 4; + i += 4; + } + + if ((n & UINT32_C(0xC0000000)) == 0) { + n <<= 2; + i += 2; + } + + if ((n & UINT32_C(0x80000000)) == 0) + ++i; + + return i; +#endif +} + + +static inline uint32_t +ctz32(uint32_t n) +{ +#if defined(__INTEL_COMPILER) + return _bit_scan_forward(n); + +#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX + return __builtin_ctz(n); + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + uint32_t i; + __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); + return i; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + uint32_t i; + _BitScanForward((DWORD *)&i, n); + return i; + +#else + uint32_t i = 0; + + if ((n & UINT32_C(0x0000FFFF)) == 0) { + n >>= 16; + i = 16; + } + + if ((n & UINT32_C(0x000000FF)) == 0) { + n >>= 8; + i += 8; + } + + if ((n & UINT32_C(0x0000000F)) == 0) { + n >>= 4; + i += 4; + } + + if ((n & UINT32_C(0x00000003)) == 0) { + n >>= 2; + i += 2; + } + + if ((n & UINT32_C(0x00000001)) == 0) + ++i; + + return i; +#endif +} + +#define bsf32 ctz32 + +#endif diff --git a/liblzma/common/vli_decoder.c b/liblzma/common/vli_decoder.c new file mode 100644 index 0000000..c181828 --- /dev/null +++ b/liblzma/common/vli_decoder.c @@ -0,0 +1,86 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_decoder.c +/// \brief Decodes variable-length integers +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_ret) +lzma_vli_decode(lzma_vli *restrict vli, size_t *vli_pos, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) { + vli_pos = &vli_pos_internal; + *vli = 0; + + // If there's no input, use LZMA_DATA_ERROR. This way it is + // easy to decode VLIs from buffers that have known size, + // and get the correct error code in case the buffer is + // too short. + if (*in_pos >= in_size) + return LZMA_DATA_ERROR; + + } else { + // Initialize *vli when starting to decode a new integer. + if (*vli_pos == 0) + *vli = 0; + + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX + || (*vli >> (*vli_pos * 7)) != 0) + return LZMA_PROG_ERROR;; + + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + } + + do { + // Read the next byte. Use a temporary variable so that we + // can update *in_pos immediately. + const uint8_t byte = in[*in_pos]; + ++*in_pos; + + // Add the newly read byte to *vli. + *vli += (lzma_vli)(byte & 0x7F) << (*vli_pos * 7); + ++*vli_pos; + + // Check if this is the last byte of a multibyte integer. + if ((byte & 0x80) == 0) { + // We don't allow using variable-length integers as + // padding i.e. the encoding must use the most the + // compact form. + if (byte == 0x00 && *vli_pos > 1) + return LZMA_DATA_ERROR; + + return vli_pos == &vli_pos_internal + ? LZMA_OK : LZMA_STREAM_END; + } + + // There is at least one more byte coming. If we have already + // read maximum number of bytes, the integer is considered + // corrupt. + // + // If we need bigger integers in future, old versions liblzma + // will confusingly indicate the file being corrupt istead of + // unsupported. I suppose it's still better this way, because + // in the foreseeable future (writing this in 2008) the only + // reason why files would appear having over 63-bit integers + // is that the files are simply corrupt. + if (*vli_pos == LZMA_VLI_BYTES_MAX) + return LZMA_DATA_ERROR; + + } while (*in_pos < in_size); + + return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK; +} diff --git a/liblzma/common/vli_size.c b/liblzma/common/vli_size.c new file mode 100644 index 0000000..ec1b4fa --- /dev/null +++ b/liblzma/common/vli_size.c @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file vli_size.c +/// \brief Calculates the encoded size of a variable-length integer +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(uint32_t) +lzma_vli_size(lzma_vli vli) +{ + if (vli > LZMA_VLI_MAX) + return 0; + + uint32_t i = 0; + do { + vli >>= 7; + ++i; + } while (vli != 0); + + assert(i <= LZMA_VLI_BYTES_MAX); + return i; +} diff --git a/liblzma/delta/delta_common.c b/liblzma/delta/delta_common.c new file mode 100644 index 0000000..930ad21 --- /dev/null +++ b/liblzma/delta/delta_common.c @@ -0,0 +1,70 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_common.c +/// \brief Common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_common.h" +#include "delta_private.h" + + +static void +delta_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // Allocate memory for the decoder if needed. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + // End function is the same for encoder and decoder. + next->end = &delta_coder_end; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Validate the options. + if (lzma_delta_coder_memusage(filters[0].options) == UINT64_MAX) + return LZMA_OPTIONS_ERROR; + + // Set the delta distance. + const lzma_options_delta *opt = filters[0].options; + next->coder->distance = opt->dist; + + // Initialize the rest of the variables. + next->coder->pos = 0; + memzero(next->coder->history, LZMA_DELTA_DIST_MAX); + + // Initialize the next decoder in the chain, if any. + return lzma_next_filter_init(&next->coder->next, + allocator, filters + 1); +} + + +extern uint64_t +lzma_delta_coder_memusage(const void *options) +{ + const lzma_options_delta *opt = options; + + if (opt == NULL || opt->type != LZMA_DELTA_TYPE_BYTE + || opt->dist < LZMA_DELTA_DIST_MIN + || opt->dist > LZMA_DELTA_DIST_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder); +} diff --git a/liblzma/delta/delta_common.h b/liblzma/delta/delta_common.h new file mode 100644 index 0000000..7e7e1ba --- /dev/null +++ b/liblzma/delta/delta_common.h @@ -0,0 +1,20 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_common.h +/// \brief Common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_COMMON_H +#define LZMA_DELTA_COMMON_H + +#include "common.h" + +extern uint64_t lzma_delta_coder_memusage(const void *options); + +#endif diff --git a/liblzma/delta/delta_decoder.c b/liblzma/delta/delta_decoder.c new file mode 100644 index 0000000..95b3dbc --- /dev/null +++ b/liblzma/delta/delta_decoder.c @@ -0,0 +1,81 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_decoder.c +/// \brief Delta filter decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "delta_decoder.h" +#include "delta_private.h" + + +static void +decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t distance = coder->distance; + +#if __MOJOSETUP__ + size_t i; + for (i = 0; i < size; ++i) { +#else + for (size_t i = 0; i < size; ++i) { +#endif + buffer[i] += coder->history[(distance + coder->pos) & 0xFF]; + coder->history[coder->pos-- & 0xFF] = buffer[i]; + } +} + + +static lzma_ret +delta_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(coder->next.code != NULL); + + const size_t out_start = *out_pos; + + const lzma_ret ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); + + decode_buffer(coder, out + out_start, *out_pos - out_start); + + return ret; +} + + +extern lzma_ret +lzma_delta_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + next->code = &delta_decode; + return lzma_delta_coder_init(next, allocator, filters); +} + + +extern lzma_ret +lzma_delta_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + lzma_options_delta *opt + = lzma_alloc(sizeof(lzma_options_delta), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->type = LZMA_DELTA_TYPE_BYTE; + opt->dist = props[0] + 1; + + *options = opt; + + return LZMA_OK; +} diff --git a/liblzma/delta/delta_decoder.h b/liblzma/delta/delta_decoder.h new file mode 100644 index 0000000..ae89acc --- /dev/null +++ b/liblzma/delta/delta_decoder.h @@ -0,0 +1,25 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_decoder.h +/// \brief Delta filter decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_DECODER_H +#define LZMA_DELTA_DECODER_H + +#include "delta_common.h" + +extern lzma_ret lzma_delta_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_delta_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/liblzma/delta/delta_private.h b/liblzma/delta/delta_private.h new file mode 100644 index 0000000..62b7fed --- /dev/null +++ b/liblzma/delta/delta_private.h @@ -0,0 +1,37 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file delta_private.h +/// \brief Private common stuff for Delta encoder and decoder +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_DELTA_PRIVATE_H +#define LZMA_DELTA_PRIVATE_H + +#include "delta_common.h" + +struct lzma_coder_s { + /// Next coder in the chain + lzma_next_coder next; + + /// Delta distance + size_t distance; + + /// Position in history[] + uint8_t pos; + + /// Buffer to hold history of the original data + uint8_t history[LZMA_DELTA_DIST_MAX]; +}; + + +extern lzma_ret lzma_delta_coder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +#endif diff --git a/liblzma/lz/lz_decoder.c b/liblzma/lz/lz_decoder.c new file mode 100644 index 0000000..2c57355 --- /dev/null +++ b/liblzma/lz/lz_decoder.c @@ -0,0 +1,300 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.c +/// \brief LZ out window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +// liblzma supports multiple LZ77-based filters. The LZ part is shared +// between these filters. The LZ code takes care of dictionary handling +// and passing the data between filters in the chain. The filter-specific +// part decodes from the input buffer to the dictionary. + + +#include "lz_decoder.h" + + +struct lzma_coder_s { + /// Dictionary (history buffer) + lzma_dict dict; + + /// The actual LZ-based decoder e.g. LZMA + lzma_lz_decoder lz; + + /// Next filter in the chain, if any. Note that LZMA and LZMA2 are + /// only allowed as the last filter, but the long-range filter in + /// future can be in the middle of the chain. + lzma_next_coder next; + + /// True if the next filter in the chain has returned LZMA_STREAM_END. + bool next_finished; + + /// True if the LZ decoder (e.g. LZMA) has detected end of payload + /// marker. This may become true before next_finished becomes true. + bool this_finished; + + /// Temporary buffer needed when the LZ-based filter is not the last + /// filter in the chain. The output of the next filter is first + /// decoded into buffer[], which is then used as input for the actual + /// LZ-based decoder. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; +}; + + +static void +lz_decoder_reset(lzma_coder *coder) +{ + coder->dict.pos = 0; + coder->dict.full = 0; + coder->dict.buf[coder->dict.size - 1] = '\0'; + coder->dict.need_reset = false; + return; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + while (true) { + // Wrap the dictionary if needed. + if (coder->dict.pos == coder->dict.size) + coder->dict.pos = 0; + + // Store the current dictionary position. It is needed to know + // where to start copying to the out[] buffer. + const size_t dict_start = coder->dict.pos; + + // Calculate how much we allow coder->lz.code() to decode. + // It must not decode past the end of the dictionary + // buffer, and we don't want it to decode more than is + // actually needed to fill the out[] buffer. + coder->dict.limit = coder->dict.pos + + my_min(out_size - *out_pos, + coder->dict.size - coder->dict.pos); + + // Call the coder->lz.code() to do the actual decoding. + const lzma_ret ret = coder->lz.code( + coder->lz.coder, &coder->dict, + in, in_pos, in_size); + + // Copy the decoded data from the dictionary to the out[] + // buffer. + const size_t copy_size = coder->dict.pos - dict_start; + assert(copy_size <= out_size - *out_pos); + memcpy(out + *out_pos, coder->dict.buf + dict_start, + copy_size); + *out_pos += copy_size; + + // Reset the dictionary if so requested by coder->lz.code(). + if (coder->dict.need_reset) { + lz_decoder_reset(coder); + + // Since we reset dictionary, we don't check if + // dictionary became full. + if (ret != LZMA_OK || *out_pos == out_size) + return ret; + } else { + // Return if everything got decoded or an error + // occurred, or if there's no more data to decode. + // + // Note that detecting if there's something to decode + // is done by looking if dictionary become full + // instead of looking if *in_pos == in_size. This + // is because it is possible that all the input was + // consumed already but some data is pending to be + // written to the dictionary. + if (ret != LZMA_OK || *out_pos == out_size + || coder->dict.pos < coder->dict.size) + return ret; + } + } +} + + +static lzma_ret +lz_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + if (coder->next.code == NULL) + return decode_buffer(coder, in, in_pos, in_size, + out, out_pos, out_size); + + // We aren't the last coder in the chain, we need to decode + // our input to a temporary buffer. + while (*out_pos < out_size) { + // Fill the temporary buffer if it is empty. + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->temp.buffer, &coder->temp.size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->next_finished = true; + else if (ret != LZMA_OK || coder->temp.size == 0) + return ret; + } + + if (coder->this_finished) { + if (coder->temp.size != 0) + return LZMA_DATA_ERROR; + + if (coder->next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, + &coder->temp.pos, coder->temp.size, + out, out_pos, out_size); + + if (ret == LZMA_STREAM_END) + coder->this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +static void +lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder->dict.buf, allocator); + + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + + lzma_free(coder, allocator); + return; +} + + +extern lzma_ret +lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)) +{ + // Allocate the base structure if it isn't already allocated. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &lz_decode; + next->end = &lz_decoder_end; + + next->coder->dict.buf = NULL; + next->coder->dict.size = 0; + next->coder->lz = LZMA_LZ_DECODER_INIT; + next->coder->next = LZMA_NEXT_CODER_INIT; + } + + // Allocate and initialize the LZ-based decoder. It will also give + // us the dictionary size. + lzma_lz_options lz_options; + return_if_error(lz_init(&next->coder->lz, allocator, + filters[0].options, &lz_options)); + + // If the dictionary size is very small, increase it to 4096 bytes. + // This is to prevent constant wrapping of the dictionary, which + // would slow things down. The downside is that since we don't check + // separately for the real dictionary size, we may happily accept + // corrupt files. + if (lz_options.dict_size < 4096) + lz_options.dict_size = 4096; + + // Make dictionary size a multipe of 16. Some LZ-based decoders like + // LZMA use the lowest bits lzma_dict.pos to know the alignment of the + // data. Aligned buffer is also good when memcpying from the + // dictionary to the output buffer, since applications are + // recommended to give aligned buffers to liblzma. + // + // Avoid integer overflow. + if (lz_options.dict_size > SIZE_MAX - 15) + return LZMA_MEM_ERROR; + + lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); + + // Allocate and initialize the dictionary. + if (next->coder->dict.size != lz_options.dict_size) { + lzma_free(next->coder->dict.buf, allocator); + next->coder->dict.buf + = lzma_alloc(lz_options.dict_size, allocator); + if (next->coder->dict.buf == NULL) + return LZMA_MEM_ERROR; + + next->coder->dict.size = lz_options.dict_size; + } + + lz_decoder_reset(next->coder); + + // Use the preset dictionary if it was given to us. + if (lz_options.preset_dict != NULL + && lz_options.preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, copy only the tail. + const size_t copy_size = my_min(lz_options.preset_dict_size, + lz_options.dict_size); + const size_t offset = lz_options.preset_dict_size - copy_size; + memcpy(next->coder->dict.buf, lz_options.preset_dict + offset, + copy_size); + next->coder->dict.pos = copy_size; + next->coder->dict.full = copy_size; + } + + // Miscellaneous initializations + next->coder->next_finished = false; + next->coder->this_finished = false; + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&next->coder->next, allocator, + filters + 1); +} + + +extern uint64_t +lzma_lz_decoder_memusage(size_t dictionary_size) +{ + return sizeof(lzma_coder) + (uint64_t)(dictionary_size); +} + + +extern void +lzma_lz_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) +{ + coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size); +} diff --git a/liblzma/lz/lz_decoder.h b/liblzma/lz/lz_decoder.h new file mode 100644 index 0000000..7266e80 --- /dev/null +++ b/liblzma/lz/lz_decoder.h @@ -0,0 +1,234 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.h +/// \brief LZ out window +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_DECODER_H +#define LZMA_LZ_DECODER_H + +#include "common.h" + + +typedef struct { + /// Pointer to the dictionary buffer. It can be an allocated buffer + /// internal to liblzma, or it can a be a buffer given by the + /// application when in single-call mode (not implemented yet). + uint8_t *buf; + + /// Write position in dictionary. The next byte will be written to + /// buf[pos]. + size_t pos; + + /// Indicates how full the dictionary is. This is used by + /// dict_is_distance_valid() to detect corrupt files that would + /// read beyond the beginning of the dictionary. + size_t full; + + /// Write limit + size_t limit; + + /// Size of the dictionary + size_t size; + + /// True when dictionary should be reset before decoding more data. + bool need_reset; + +} lzma_dict; + + +typedef struct { + size_t dict_size; + const uint8_t *preset_dict; + size_t preset_dict_size; +} lzma_lz_options; + + +typedef struct { + /// Data specific to the LZ-based decoder + lzma_coder *coder; + + /// Function to decode from in[] to *dict + lzma_ret (*code)(lzma_coder *restrict coder, + lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size); + + void (*reset)(lzma_coder *coder, const void *options); + + /// Set the uncompressed size + void (*set_uncompressed)(lzma_coder *coder, + lzma_vli uncompressed_size); + + /// Free allocated resources + void (*end)(lzma_coder *coder, lzma_allocator *allocator); + +} lzma_lz_decoder; + + +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ \ + .coder = NULL, \ + .code = NULL, \ + .reset = NULL, \ + .set_uncompressed = NULL, \ + .end = NULL, \ + } + + +extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)); + +extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); + +extern void lzma_lz_decoder_uncompressed( + lzma_coder *coder, lzma_vli uncompressed_size); + + +////////////////////// +// Inline functions // +////////////////////// + +/// Get a byte from the history buffer. +static inline uint8_t +dict_get(const lzma_dict *const dict, const uint32_t distance) +{ + return dict->buf[dict->pos - distance - 1 + + (distance < dict->pos ? 0 : dict->size)]; +} + + +/// Test if dictionary is empty. +static inline bool +dict_is_empty(const lzma_dict *const dict) +{ + return dict->full == 0; +} + + +/// Validate the match distance +static inline bool +dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) +{ + return dict->full > distance; +} + + +/// Repeat *len bytes at distance. +static inline bool +dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) +{ + // Don't write past the end of the dictionary. + const size_t dict_avail = dict->limit - dict->pos; + uint32_t left = my_min(dict_avail, *len); + *len -= left; + + // Repeat a block of data from the history. Because memcpy() is faster + // than copying byte by byte in a loop, the copying process gets split + // into three cases. + if (distance < left) { + // Source and target areas overlap, thus we can't use + // memcpy() nor even memmove() safely. + do { + dict->buf[dict->pos] = dict_get(dict, distance); + ++dict->pos; + } while (--left > 0); + + } else if (distance < dict->pos) { + // The easiest and fastest case + memcpy(dict->buf + dict->pos, + dict->buf + dict->pos - distance - 1, + left); + dict->pos += left; + + } else { + // The bigger the dictionary, the more rare this + // case occurs. We need to "wrap" the dict, thus + // we might need two memcpy() to copy all the data. + assert(dict->full == dict->size); + const uint32_t copy_pos + = dict->pos - distance - 1 + dict->size; + uint32_t copy_size = dict->size - copy_pos; + + if (copy_size < left) { + memmove(dict->buf + dict->pos, dict->buf + copy_pos, + copy_size); + dict->pos += copy_size; + copy_size = left - copy_size; + memcpy(dict->buf + dict->pos, dict->buf, copy_size); + dict->pos += copy_size; + } else { + memmove(dict->buf + dict->pos, dict->buf + copy_pos, + left); + dict->pos += left; + } + } + + // Update how full the dictionary is. + if (dict->full < dict->pos) + dict->full = dict->pos; + + return unlikely(*len != 0); +} + + +/// Puts one byte into the dictionary. Returns true if the dictionary was +/// already full and the byte couldn't be added. +static inline bool +dict_put(lzma_dict *dict, uint8_t byte) +{ + if (unlikely(dict->pos == dict->limit)) + return true; + + dict->buf[dict->pos++] = byte; + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return false; +} + + +/// Copies arbitrary amount of data into the dictionary. +static inline void +dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + size_t *restrict left) +{ + // NOTE: If we are being given more data than the size of the + // dictionary, it could be possible to optimize the LZ decoder + // so that not everything needs to go through the dictionary. + // This shouldn't be very common thing in practice though, and + // the slowdown of one extra memcpy() isn't bad compared to how + // much time it would have taken if the data were compressed. + + if (in_size - *in_pos > *left) + in_size = *in_pos + *left; + + *left -= lzma_bufcpy(in, in_pos, in_size, + dict->buf, &dict->pos, dict->limit); + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return; +} + + +static inline void +dict_reset(lzma_dict *dict) +{ + dict->need_reset = true; + return; +} + +#endif diff --git a/liblzma/lzma/lzma2_decoder.c b/liblzma/lzma/lzma2_decoder.c new file mode 100644 index 0000000..f38879c --- /dev/null +++ b/liblzma/lzma/lzma2_decoder.c @@ -0,0 +1,304 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.c +/// \brief LZMA2 decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_decoder.h" +#include "lz_decoder.h" +#include "lzma_decoder.h" + + +struct lzma_coder_s { + enum sequence { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA, + SEQ_COPY, + } sequence; + + /// Sequence after the size fields have been decoded. + enum sequence next_sequence; + + /// LZMA decoder + lzma_lz_decoder lzma; + + /// Uncompressed size of LZMA chunk + size_t uncompressed_size; + + /// Compressed size of the chunk (naturally equals to uncompressed + /// size of uncompressed chunk) + size_t compressed_size; + + /// True if properties are needed. This is false before the + /// first LZMA chunk. + bool need_properties; + + /// True if dictionary reset is needed. This is false before the + /// first chunk (LZMA or uncompressed). + bool need_dictionary_reset; + + lzma_options_lzma options; +}; + + +static lzma_ret +lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // With SEQ_LZMA it is possible that no new input is needed to do + // some progress. The rest of the sequences assume that there is + // at least one byte of input. + while (*in_pos < in_size || coder->sequence == SEQ_LZMA) + switch (coder->sequence) { + case SEQ_CONTROL: { + const uint32_t control = in[*in_pos]; + ++*in_pos; + + if (control >= 0xE0 || control == 1) { + // Dictionary reset implies that next LZMA chunk has + // to set new properties. + coder->need_properties = true; + coder->need_dictionary_reset = true; + } else if (coder->need_dictionary_reset) { + return LZMA_DATA_ERROR; + } + + if (control >= 0x80) { + // LZMA chunk. The highest five bits of the + // uncompressed size are taken from the control byte. + coder->uncompressed_size = (control & 0x1F) << 16; + coder->sequence = SEQ_UNCOMPRESSED_1; + + // See if there are new properties or if we need to + // reset the state. + if (control >= 0xC0) { + // When there are new properties, state reset + // is done at SEQ_PROPERTIES. + coder->need_properties = false; + coder->next_sequence = SEQ_PROPERTIES; + + } else if (coder->need_properties) { + return LZMA_DATA_ERROR; + + } else { + coder->next_sequence = SEQ_LZMA; + + // If only state reset is wanted with old + // properties, do the resetting here for + // simplicity. + if (control >= 0xA0) + coder->lzma.reset(coder->lzma.coder, + &coder->options); + } + } else { + // End marker + if (control == 0x00) + return LZMA_STREAM_END; + + // Invalid control values + if (control > 2) + return LZMA_DATA_ERROR; + + // It's uncompressed chunk + coder->sequence = SEQ_COMPRESSED_0; + coder->next_sequence = SEQ_COPY; + } + + if (coder->need_dictionary_reset) { + // Finish the dictionary reset and let the caller + // flush the dictionary to the actual output buffer. + coder->need_dictionary_reset = false; + dict_reset(dict); + return LZMA_OK; + } + + break; + } + + case SEQ_UNCOMPRESSED_1: + coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + coder->uncompressed_size += in[(*in_pos)++] + 1; + coder->sequence = SEQ_COMPRESSED_0; + coder->lzma.set_uncompressed(coder->lzma.coder, + coder->uncompressed_size); + break; + + case SEQ_COMPRESSED_0: + coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + coder->compressed_size += in[(*in_pos)++] + 1; + coder->sequence = coder->next_sequence; + break; + + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) + return LZMA_DATA_ERROR; + + coder->lzma.reset(coder->lzma.coder, &coder->options); + + coder->sequence = SEQ_LZMA; + break; + + case SEQ_LZMA: { + // Store the start offset so that we can update + // coder->compressed_size later. + const size_t in_start = *in_pos; + + // Decode from in[] to *dict. + const lzma_ret ret = coder->lzma.code(coder->lzma.coder, + dict, in, in_pos, in_size); + + // Validate and update coder->compressed_size. + const size_t in_used = *in_pos - in_start; + if (in_used > coder->compressed_size) + return LZMA_DATA_ERROR; + + coder->compressed_size -= in_used; + + // Return if we didn't finish the chunk, or an error occurred. + if (ret != LZMA_STREAM_END) + return ret; + + // The LZMA decoder must have consumed the whole chunk now. + // We don't need to worry about uncompressed size since it + // is checked by the LZMA decoder. + if (coder->compressed_size != 0) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_CONTROL; + break; + } + + case SEQ_COPY: { + // Copy from input to the dictionary as is. + dict_write(dict, in, in_pos, in_size, &coder->compressed_size); + if (coder->compressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_CONTROL; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + assert(coder->lzma.end == NULL); + lzma_free(coder->lzma.coder, allocator); + + lzma_free(coder, allocator); + + return; +} + + +static lzma_ret +lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_decode; + lz->end = &lzma2_decoder_end; + + lz->coder->lzma = LZMA_LZ_DECODER_INIT; + } + + const lzma_options_lzma *options = opt; + + lz->coder->sequence = SEQ_CONTROL; + lz->coder->need_properties = true; + lz->coder->need_dictionary_reset = options->preset_dict == NULL + || options->preset_dict_size == 0; + + return lzma_lzma_decoder_create(&lz->coder->lzma, + allocator, options, lz_options); +} + + +extern lzma_ret +lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA2 can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma2_decoder_init); +} + + +extern uint64_t +lzma_lzma2_decoder_memusage(const void *options) +{ + return sizeof(lzma_coder) + + lzma_lzma_decoder_memusage_nocheck(options); +} + + +extern lzma_ret +lzma_lzma2_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_OPTIONS_ERROR; + + // Check that reserved bits are unset. + if (props[0] & 0xC0) + return LZMA_OPTIONS_ERROR; + + // Decode the dictionary size. + if (props[0] > 40) + return LZMA_OPTIONS_ERROR; + + lzma_options_lzma *opt = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (props[0] == 40) { + opt->dict_size = UINT32_MAX; + } else { + opt->dict_size = 2 | (props[0] & 1); + opt->dict_size <<= props[0] / 2 + 11; + } + + opt->preset_dict = NULL; + opt->preset_dict_size = 0; + + *options = opt; + + return LZMA_OK; +} diff --git a/liblzma/lzma/lzma2_decoder.h b/liblzma/lzma/lzma2_decoder.h new file mode 100644 index 0000000..fac4ac4 --- /dev/null +++ b/liblzma/lzma/lzma2_decoder.h @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.h +/// \brief LZMA2 decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_DECODER_H +#define LZMA_LZMA2_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/liblzma/lzma/lzma_common.h b/liblzma/lzma/lzma_common.h new file mode 100644 index 0000000..b4ba043 --- /dev/null +++ b/liblzma/lzma/lzma_common.h @@ -0,0 +1,230 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_common.h +/// \brief Private definitions common to LZMA encoder and decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_COMMON_H +#define LZMA_LZMA_COMMON_H + +#include "common.h" +#include "range_common.h" + + +/////////////////// +// Miscellaneous // +/////////////////// + +/// Maximum number of position states. A position state is the lowest pos bits +/// number of bits of the current uncompressed offset. In some places there +/// are different sets of probabilities for different pos states. +#define POS_STATES_MAX (1 << LZMA_PB_MAX) + + +/// Validates lc, lp, and pb. +static inline bool +is_lclppb_valid(const lzma_options_lzma *options) +{ + return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX + && options->lc + options->lp <= LZMA_LCLP_MAX + && options->pb <= LZMA_PB_MAX; +} + + +/////////// +// State // +/////////// + +/// This enum is used to track which events have occurred most recently and +/// in which order. This information is used to predict the next event. +/// +/// Events: +/// - Literal: One 8-bit byte +/// - Match: Repeat a chunk of data at some distance +/// - Long repeat: Multi-byte match at a recently seen distance +/// - Short repeat: One-byte repeat at a recently seen distance +/// +/// The event names are in from STATE_oldest_older_previous. REP means +/// either short or long repeated match, and NONLIT means any non-literal. +typedef enum { + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_LIT_MATCH, + STATE_LIT_LONGREP, + STATE_LIT_SHORTREP, + STATE_NONLIT_MATCH, + STATE_NONLIT_REP, +} lzma_lzma_state; + + +/// Total number of states +#define STATES 12 + +/// The lowest 7 states indicate that the previous state was a literal. +#define LIT_STATES 7 + + +/// Indicate that the latest state was a literal. +#define update_literal(state) \ + state = ((state) <= STATE_SHORTREP_LIT_LIT \ + ? STATE_LIT_LIT \ + : ((state) <= STATE_LIT_SHORTREP \ + ? (state) - 3 \ + : (state) - 6)) + +/// Indicate that the latest state was a match. +#define update_match(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH) + +/// Indicate that the latest state was a long repeated match. +#define update_long_rep(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP) + +/// Indicate that the latest state was a short match. +#define update_short_rep(state) \ + state = ((state) < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP) + +/// Test if the previous state was a literal. +#define is_literal_state(state) \ + ((state) < LIT_STATES) + + +///////////// +// Literal // +///////////// + +/// Each literal coder is divided in three sections: +/// - 0x001-0x0FF: Without match byte +/// - 0x101-0x1FF: With match byte; match bit is 0 +/// - 0x201-0x2FF: With match byte; match bit is 1 +/// +/// Match byte is used when the previous LZMA symbol was something else than +/// a literal (that is, it was some kind of match). +#define LITERAL_CODER_SIZE 0x300 + +/// Maximum number of literal coders +#define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX) + +/// Locate the literal coder for the next literal byte. The choice depends on +/// - the lowest literal_pos_bits bits of the position of the current +/// byte; and +/// - the highest literal_context_bits bits of the previous byte. +#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \ + ((probs)[(((pos) & lp_mask) << lc) + ((prev_byte) >> (8 - lc))]) + + +static inline void +literal_init(probability (*probs)[LITERAL_CODER_SIZE], + uint32_t lc, uint32_t lp) +{ + assert(lc + lp <= LZMA_LCLP_MAX); + + const uint32_t coders = 1U << (lc + lp); + +#if __MOJOSETUP__ + uint32_t i, j; + for (i = 0; i < coders; ++i) + for (j = 0; j < LITERAL_CODER_SIZE; ++j) + bit_reset(probs[i][j]); +#else + for (uint32_t i = 0; i < coders; ++i) + for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j) + bit_reset(probs[i][j]); +#endif + + return; +} + + +////////////////// +// Match length // +////////////////// + +// Minimum length of a match is two bytes. +#define MATCH_LEN_MIN 2 + +// Match length is encoded with 4, 5, or 10 bits. +// +// Length Bits +// 2-9 4 = Choice=0 + 3 bits +// 10-17 5 = Choice=1 + Choice2=0 + 3 bits +// 18-273 10 = Choice=1 + Choice2=1 + 8 bits +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +// Maximum length of a match is 273 which is a result of the encoding +// described above. +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + + +//////////////////// +// Match distance // +//////////////////// + +// Different set of probabilities is used for match distances that have very +// short match length: Lengths of 2, 3, and 4 bytes have a separate set of +// probabilities for each length. The matches with longer length use a shared +// set of probabilities. +#define LEN_TO_POS_STATES 4 + +// Macro to get the index of the appropriate probability array. +#define get_len_to_pos_state(len) \ + ((len) < LEN_TO_POS_STATES + MATCH_LEN_MIN \ + ? (len) - MATCH_LEN_MIN \ + : LEN_TO_POS_STATES - 1) + +// The highest two bits of a match distance (pos slot) are encoded using six +// bits. See fastpos.h for more explanation. +#define POS_SLOT_BITS 6 +#define POS_SLOTS (1 << POS_SLOT_BITS) + +// Match distances up to 127 are fully encoded using probabilities. Since +// the highest two bits (pos slot) are always encoded using six bits, the +// distances 0-3 don't need any additional bits to encode, since the pos +// slot itself is the same as the actual distance. START_POS_MODEL_INDEX +// indicates the first pos slot where at least one additional bit is needed. +#define START_POS_MODEL_INDEX 4 + +// Match distances greater than 127 are encoded in three pieces: +// - pos slot: the highest two bits +// - direct bits: 2-26 bits below the highest two bits +// - alignment bits: four lowest bits +// +// Direct bits don't use any probabilities. +// +// The pos slot value of 14 is for distances 128-191 (see the table in +// fastpos.h to understand why). +#define END_POS_MODEL_INDEX 14 + +// Pos slots that indicate a distance <= 127. +#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +// For match distances greater than 127, only the highest two bits and the +// lowest four bits (alignment) is encoded using probabilities. +#define ALIGN_BITS 4 +#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1) + +// LZMA remembers the four most recent match distances. Reusing these distances +// tends to take less space than re-encoding the actual distance value. +#define REP_DISTANCES 4 + +#endif diff --git a/liblzma/lzma/lzma_decoder.c b/liblzma/lzma/lzma_decoder.c new file mode 100644 index 0000000..1bf1e76 --- /dev/null +++ b/liblzma/lzma/lzma_decoder.c @@ -0,0 +1,1084 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.c +/// \brief LZMA decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_decoder.h" +#include "lzma_common.h" +#include "lzma_decoder.h" +#include "range_decoder.h" + + +#ifdef HAVE_SMALL + +// Macros for (somewhat) size-optimized code. +#define seq_4(seq) seq + +#define seq_6(seq) seq + +#define seq_8(seq) seq + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _CHOICE2, \ + seq ## _BITTREE + +#define len_decode(target, ld, pos_state, seq) \ +do { \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + probs = ld.low[pos_state];\ + limit = LEN_LOW_SYMBOLS; \ + target = MATCH_LEN_MIN; \ + } else { \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + probs = ld.mid[pos_state]; \ + limit = LEN_MID_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + } else { \ + rc_update_1(ld.choice2); \ + probs = ld.high; \ + limit = LEN_HIGH_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ + } \ + } \ + symbol = 1; \ +case seq ## _BITTREE: \ + do { \ + rc_bit(probs[symbol], , , seq ## _BITTREE); \ + } while (symbol < limit); \ + target += symbol - limit; \ +} while (0) + +#else // HAVE_SMALL + +// Unrolled versions +#define seq_4(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3 + +#define seq_6(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5 + +#define seq_8(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5, \ + seq ## 6, \ + seq ## 7 + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _LOW0, \ + seq ## _LOW1, \ + seq ## _LOW2, \ + seq ## _CHOICE2, \ + seq ## _MID0, \ + seq ## _MID1, \ + seq ## _MID2, \ + seq ## _HIGH0, \ + seq ## _HIGH1, \ + seq ## _HIGH2, \ + seq ## _HIGH3, \ + seq ## _HIGH4, \ + seq ## _HIGH5, \ + seq ## _HIGH6, \ + seq ## _HIGH7 + +#define len_decode(target, ld, pos_state, seq) \ +do { \ + symbol = 1; \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ + target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ + } else { \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID0); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID1); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID2); \ + target = symbol - LEN_MID_SYMBOLS \ + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ + } else { \ + rc_update_1(ld.choice2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ + target = symbol - LEN_HIGH_SYMBOLS \ + + MATCH_LEN_MIN \ + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ + } \ + } \ +} while (0) + +#endif // HAVE_SMALL + + +/// Length decoder probabilities; see comments in lzma_common.h. +typedef struct { + probability choice; + probability choice2; + probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + probability high[LEN_HIGH_SYMBOLS]; +} lzma_length_decoder; + + +struct lzma_coder_s { + /////////////////// + // Probabilities // + /////////////////// + + /// Literals; see comments in lzma_common.h. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; + + /// If 1, it's a match. Otherwise it's a single 8-bit literal. + probability is_match[STATES][POS_STATES_MAX]; + + /// If 1, it's a repeated match. The distance is one of rep0 .. rep3. + probability is_rep[STATES]; + + /// If 0, distance of a repeated match is rep0. + /// Otherwise check is_rep1. + probability is_rep0[STATES]; + + /// If 0, distance of a repeated match is rep1. + /// Otherwise check is_rep2. + probability is_rep1[STATES]; + + /// If 0, distance of a repeated match is rep2. Otherwise it is rep3. + probability is_rep2[STATES]; + + /// If 1, the repeated match has length of one byte. Otherwise + /// the length is decoded from rep_len_decoder. + probability is_rep0_long[STATES][POS_STATES_MAX]; + + /// Probability tree for the highest two bits of the match distance. + /// There is a separate probability tree for match lengths of + /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; + + /// Probability trees for additional bits for match distance when the + /// distance is in the range [4, 127]. + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; + + /// Probability tree for the lowest four bits of a match distance + /// that is equal to or greater than 128. + probability pos_align[ALIGN_TABLE_SIZE]; + + /// Length of a normal match + lzma_length_decoder match_len_decoder; + + /// Length of a repeated match + lzma_length_decoder rep_len_decoder; + + /////////////////// + // Decoder state // + /////////////////// + + // Range coder + lzma_range_decoder rc; + + // Types of the most recently seen LZMA symbols + lzma_lzma_state state; + + uint32_t rep0; ///< Distance of the latest match + uint32_t rep1; ///< Distance of second latest match + uint32_t rep2; ///< Distance of third latest match + uint32_t rep3; ///< Distance of fourth latest match + + uint32_t pos_mask; // (1U << pb) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; + + /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of + /// payload marker is expected. + lzma_vli uncompressed_size; + + //////////////////////////////// + // State of incomplete symbol // + //////////////////////////////// + + /// Position where to continue the decoder loop + enum { + SEQ_NORMALIZE, + SEQ_IS_MATCH, + seq_8(SEQ_LITERAL), + seq_8(SEQ_LITERAL_MATCHED), + SEQ_LITERAL_WRITE, + SEQ_IS_REP, + seq_len(SEQ_MATCH_LEN), + seq_6(SEQ_POS_SLOT), + SEQ_POS_MODEL, + SEQ_DIRECT, + seq_4(SEQ_ALIGN), + SEQ_EOPM, + SEQ_IS_REP0, + SEQ_SHORTREP, + SEQ_IS_REP0_LONG, + SEQ_IS_REP1, + SEQ_IS_REP2, + seq_len(SEQ_REP_LEN), + SEQ_COPY, + } sequence; + + /// Base of the current probability tree + probability *probs; + + /// Symbol being decoded. This is also used as an index variable in + /// bittree decoders: probs[symbol] + uint32_t symbol; + + /// Used as a loop termination condition on bittree decoders and + /// direct bits decoder. + uint32_t limit; + + /// Matched literal decoder: 0x100 or 0 to help avoiding branches. + /// Bittree reverse decoders: Offset of the next bit: 1 << offset + uint32_t offset; + + /// If decoding a literal: match byte. + /// If decoding a match: length of the match. + uint32_t len; +}; + + +static lzma_ret +lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr, + const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + //////////////////// + // Initialization // + //////////////////// + + if (!rc_read_init(&coder->rc, in, in_pos, in_size)) + return LZMA_OK; + + /////////////// + // Variables // + /////////////// + + // Making local copies of often-used variables improves both + // speed and readability. + + lzma_dict dict = *dictptr; + + const size_t dict_start = dict.pos; + + // Range decoder + rc_to_local(coder->rc, *in_pos); + + // State + uint32_t state = coder->state; + uint32_t rep0 = coder->rep0; + uint32_t rep1 = coder->rep1; + uint32_t rep2 = coder->rep2; + uint32_t rep3 = coder->rep3; + + const uint32_t pos_mask = coder->pos_mask; + + // These variables are actually needed only if we last time ran + // out of input in the middle of the decoder loop. + probability *probs = coder->probs; + uint32_t symbol = coder->symbol; + uint32_t limit = coder->limit; + uint32_t offset = coder->offset; + uint32_t len = coder->len; + + const uint32_t literal_pos_mask = coder->literal_pos_mask; + const uint32_t literal_context_bits = coder->literal_context_bits; + + // Temporary variables + uint32_t pos_state = dict.pos & pos_mask; + + lzma_ret ret = LZMA_OK; + + // If uncompressed size is known, there must be no end of payload + // marker. + const bool no_eopm = coder->uncompressed_size + != LZMA_VLI_UNKNOWN; + if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos) + dict.limit = dict.pos + (size_t)(coder->uncompressed_size); + + // The main decoder loop. The "switch" is used to restart the decoder at + // correct location. Once restarted, the "switch" is no longer used. + switch (coder->sequence) + while (true) { + // Calculate new pos_state. This is skipped on the first loop + // since we already calculated it when setting up the local + // variables. + pos_state = dict.pos & pos_mask; + + case SEQ_NORMALIZE: + case SEQ_IS_MATCH: + if (unlikely(no_eopm && dict.pos == dict.limit)) + break; + + rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { + rc_update_0(coder->is_match[state][pos_state]); + + // It's a literal i.e. a single 8-bit byte. + + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_pos_mask, + dict.pos, dict_get(&dict, 0)); + symbol = 1; + + if (is_literal_state(state)) { + // Decode literal without match byte. +#ifdef HAVE_SMALL + case SEQ_LITERAL: + do { + rc_bit(probs[symbol], , , SEQ_LITERAL); + } while (symbol < (1 << 8)); +#else + rc_bit_case(probs[symbol], , , SEQ_LITERAL0); + rc_bit_case(probs[symbol], , , SEQ_LITERAL1); + rc_bit_case(probs[symbol], , , SEQ_LITERAL2); + rc_bit_case(probs[symbol], , , SEQ_LITERAL3); + rc_bit_case(probs[symbol], , , SEQ_LITERAL4); + rc_bit_case(probs[symbol], , , SEQ_LITERAL5); + rc_bit_case(probs[symbol], , , SEQ_LITERAL6); + rc_bit_case(probs[symbol], , , SEQ_LITERAL7); +#endif + } else { + // Decode literal with match byte. + // + // We store the byte we compare against + // ("match byte") to "len" to minimize the + // number of variables we need to store + // between decoder calls. + len = dict_get(&dict, rep0) << 1; + + // The usage of "offset" allows omitting some + // branches, which should give tiny speed + // improvement on some CPUs. "offset" gets + // set to zero if match_bit didn't match. + offset = 0x100; + +#ifdef HAVE_SMALL + case SEQ_LITERAL_MATCHED: + do { + const uint32_t match_bit + = len & offset; + const uint32_t subcoder_index + = offset + match_bit + + symbol; + + rc_bit(probs[subcoder_index], + offset &= ~match_bit, + offset &= match_bit, + SEQ_LITERAL_MATCHED); + + // It seems to be faster to do this + // here instead of putting it to the + // beginning of the loop and then + // putting the "case" in the middle + // of the loop. + len <<= 1; + + } while (symbol < (1 << 8)); +#else + // Unroll the loop. + uint32_t match_bit; + uint32_t subcoder_index; + +# define d(seq) \ + case seq: \ + match_bit = len & offset; \ + subcoder_index = offset + match_bit + symbol; \ + rc_bit(probs[subcoder_index], \ + offset &= ~match_bit, \ + offset &= match_bit, \ + seq) + + d(SEQ_LITERAL_MATCHED0); + len <<= 1; + d(SEQ_LITERAL_MATCHED1); + len <<= 1; + d(SEQ_LITERAL_MATCHED2); + len <<= 1; + d(SEQ_LITERAL_MATCHED3); + len <<= 1; + d(SEQ_LITERAL_MATCHED4); + len <<= 1; + d(SEQ_LITERAL_MATCHED5); + len <<= 1; + d(SEQ_LITERAL_MATCHED6); + len <<= 1; + d(SEQ_LITERAL_MATCHED7); +# undef d +#endif + } + + //update_literal(state); + // Use a lookup table to update to literal state, + // since compared to other state updates, this would + // need two branches. + static const lzma_lzma_state next_state[] = { + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT + }; + state = next_state[state]; + + case SEQ_LITERAL_WRITE: + if (unlikely(dict_put(&dict, symbol))) { + coder->sequence = SEQ_LITERAL_WRITE; + goto out; + } + + continue; + } + + // Instead of a new byte we are going to get a byte range + // (distance and length) which will be repeated from our + // output history. + + rc_update_1(coder->is_match[state][pos_state]); + + case SEQ_IS_REP: + rc_if_0(coder->is_rep[state], SEQ_IS_REP) { + // Not a repeated match + rc_update_0(coder->is_rep[state]); + update_match(state); + + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + len_decode(len, coder->match_len_decoder, + pos_state, SEQ_MATCH_LEN); + + // Prepare to decode the highest two bits of the + // match distance. + probs = coder->pos_slot[get_len_to_pos_state(len)]; + symbol = 1; + +#ifdef HAVE_SMALL + case SEQ_POS_SLOT: + do { + rc_bit(probs[symbol], , , SEQ_POS_SLOT); + } while (symbol < POS_SLOTS); +#else + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5); +#endif + // Get rid of the highest bit that was needed for + // indexing of the probability array. + symbol -= POS_SLOTS; + assert(symbol <= 63); + + if (symbol < START_POS_MODEL_INDEX) { + // Match distances [0, 3] have only two bits. + rep0 = symbol; + } else { + // Decode the lowest [1, 29] bits of + // the match distance. + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < END_POS_MODEL_INDEX) { + // Prepare to decode the low bits for + // a distance of [4, 127]. + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 0; + case SEQ_POS_MODEL: +#ifdef HAVE_SMALL + do { + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } while (++offset < limit); +#else + switch (limit) { + case 5: + assert(offset == 0); + rc_bit(probs[symbol], , + rep0 += 1, + SEQ_POS_MODEL); + ++offset; + --limit; + case 4: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 3: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 2: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 1: + // We need "symbol" only for + // indexing the probability + // array, thus we can use + // rc_bit_last() here to omit + // the unneeded updating of + // "symbol". + rc_bit_last(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } +#endif + } else { + // The distance is >= 128. Decode the + // lower bits without probabilities + // except the lowest four bits. + assert(symbol >= 14); + assert(limit >= 6); + limit -= ALIGN_BITS; + assert(limit >= 2); + case SEQ_DIRECT: + // Not worth manual unrolling + do { + rc_direct(rep0, SEQ_DIRECT); + } while (--limit > 0); + + // Decode the lowest four bits using + // probabilities. + rep0 <<= ALIGN_BITS; + symbol = 1; +#ifdef HAVE_SMALL + offset = 0; + case SEQ_ALIGN: + do { + rc_bit(coder->pos_align[ + symbol], , + rep0 += 1 << offset, + SEQ_ALIGN); + } while (++offset < ALIGN_BITS); +#else + case SEQ_ALIGN0: + rc_bit(coder->pos_align[symbol], , + rep0 += 1, SEQ_ALIGN0); + case SEQ_ALIGN1: + rc_bit(coder->pos_align[symbol], , + rep0 += 2, SEQ_ALIGN1); + case SEQ_ALIGN2: + rc_bit(coder->pos_align[symbol], , + rep0 += 4, SEQ_ALIGN2); + case SEQ_ALIGN3: + // Like in SEQ_POS_MODEL, we don't + // need "symbol" for anything else + // than indexing the probability array. + rc_bit_last(coder->pos_align[symbol], , + rep0 += 8, SEQ_ALIGN3); +#endif + + if (rep0 == UINT32_MAX) { + // End of payload marker was + // found. It must not be + // present if uncompressed + // size is known. + if (coder->uncompressed_size + != LZMA_VLI_UNKNOWN) { + ret = LZMA_DATA_ERROR; + goto out; + } + + case SEQ_EOPM: + // LZMA1 stream with + // end-of-payload marker. + rc_normalize(SEQ_EOPM); + ret = LZMA_STREAM_END; + goto out; + } + } + } + + // Validate the distance we just decoded. + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + } else { + rc_update_1(coder->is_rep[state]); + + // Repeated match + // + // The match distance is a value that we have had + // earlier. The latest four match distances are + // available as rep0, rep1, rep2 and rep3. We will + // now decode which of them is the new distance. + // + // There cannot be a match if we haven't produced + // any output, so check that first. + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } + + case SEQ_IS_REP0: + rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { + rc_update_0(coder->is_rep0[state]); + // The distance is rep0. + + case SEQ_IS_REP0_LONG: + rc_if_0(coder->is_rep0_long[state][pos_state], + SEQ_IS_REP0_LONG) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); + + update_short_rep(state); + + case SEQ_SHORTREP: + if (unlikely(dict_put(&dict, dict_get( + &dict, rep0)))) { + coder->sequence = SEQ_SHORTREP; + goto out; + } + + continue; + } + + // Repeating more than one byte at + // distance of rep0. + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + + } else { + rc_update_1(coder->is_rep0[state]); + + case SEQ_IS_REP1: + // The distance is rep1, rep2 or rep3. Once + // we find out which one of these three, it + // is stored to rep0 and rep1, rep2 and rep3 + // are updated accordingly. + rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { + rc_update_0(coder->is_rep1[state]); + + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep1[state]); + case SEQ_IS_REP2: + rc_if_0(coder->is_rep2[state], + SEQ_IS_REP2) { + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + + } else { + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; + } + } + } + + update_long_rep(state); + + // Decode the length of the repeated match. + len_decode(len, coder->rep_len_decoder, + pos_state, SEQ_REP_LEN); + } + + ///////////////////////////////// + // Repeat from history buffer. // + ///////////////////////////////// + + // The length is always between these limits. There is no way + // to trigger the algorithm to set len outside this range. + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + + case SEQ_COPY: + // Repeat len bytes from distance of rep0. + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } + } + + rc_normalize(SEQ_NORMALIZE); + coder->sequence = SEQ_IS_MATCH; + +out: + // Save state + + // NOTE: Must not copy dict.limit. + dictptr->pos = dict.pos; + dictptr->full = dict.full; + + rc_from_local(coder->rc, *in_pos); + + coder->state = state; + coder->rep0 = rep0; + coder->rep1 = rep1; + coder->rep2 = rep2; + coder->rep3 = rep3; + + coder->probs = probs; + coder->symbol = symbol; + coder->limit = limit; + coder->offset = offset; + coder->len = len; + + // Update the remaining amount of uncompressed data if uncompressed + // size was known. + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) { + coder->uncompressed_size -= dict.pos - dict_start; + + // Since there cannot be end of payload marker if the + // uncompressed size was known, we check here if we + // finished decoding. + if (coder->uncompressed_size == 0 && ret == LZMA_OK + && coder->sequence != SEQ_NORMALIZE) + ret = coder->sequence == SEQ_IS_MATCH + ? LZMA_STREAM_END : LZMA_DATA_ERROR; + } + + // We can do an additional check in the range decoder to catch some + // corrupted files. + if (ret == LZMA_STREAM_END) { + if (!rc_is_finished(coder->rc)) + ret = LZMA_DATA_ERROR; + + // Reset the range decoder so that it is ready to reinitialize + // for a new LZMA2 chunk. + rc_reset(coder->rc); + } + + return ret; +} + + + +static void +lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) +{ + coder->uncompressed_size = uncompressed_size; +} + +/* +extern void +lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size) +{ + // This is hack. + (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size; +} +*/ + +static void +lzma_decoder_reset(lzma_coder *coder, const void *opt) +{ + const lzma_options_lzma *options = opt; +#if __MOJOSETUP__ + uint32_t i, j, pos_state; +#endif + + // NOTE: We assume that lc/lp/pb are valid since they were + // successfully decoded with lzma_lzma_decode_properties(). + + // Calculate pos_mask. We don't need pos_bits as is for anything. + coder->pos_mask = (1U << options->pb) - 1; + + // Initialize the literal decoder. + literal_init(coder->literal, options->lc, options->lp); + + coder->literal_context_bits = options->lc; + coder->literal_pos_mask = (1U << options->lp) - 1; + + // State + coder->state = STATE_LIT_LIT; + coder->rep0 = 0; + coder->rep1 = 0; + coder->rep2 = 0; + coder->rep3 = 0; + coder->pos_mask = (1U << options->pb) - 1; + + // Range decoder + rc_reset(coder->rc); + + // Bit and bittree decoders +#if __MOJOSETUP__ + for (i = 0; i < STATES; ++i) { + for (j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); + } + + for (i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); + + for (i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(coder->pos_special[i]); +#else + for (uint32_t i = 0; i < STATES; ++i) { + for (uint32_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); + } + + for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); + + for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(coder->pos_special[i]); +#endif + + bittree_reset(coder->pos_align, ALIGN_BITS); + + // Len decoders (also bit/bittree) + const uint32_t num_pos_states = 1U << options->pb; + bit_reset(coder->match_len_decoder.choice); + bit_reset(coder->match_len_decoder.choice2); + bit_reset(coder->rep_len_decoder.choice); + bit_reset(coder->rep_len_decoder.choice2); + +#if __MOJOSETUP__ + for (pos_state = 0; pos_state < num_pos_states; ++pos_state) { +#else + for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { +#endif + bittree_reset(coder->match_len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(coder->match_len_decoder.mid[pos_state], + LEN_MID_BITS); + + bittree_reset(coder->rep_len_decoder.low[pos_state], + LEN_LOW_BITS); + bittree_reset(coder->rep_len_decoder.mid[pos_state], + LEN_MID_BITS); + } + + bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); + + coder->sequence = SEQ_IS_MATCH; + coder->probs = NULL; + coder->symbol = 0; + coder->limit = 0; + coder->offset = 0; + coder->len = 0; + + return; +} + + +extern lzma_ret +lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma_decode; + lz->reset = &lzma_decoder_reset; + lz->set_uncompressed = &lzma_decoder_uncompressed; + } + + // All dictionary sizes are OK here. LZ decoder will take care of + // the special cases. + const lzma_options_lzma *options = opt; + lz_options->dict_size = options->dict_size; + lz_options->preset_dict = options->preset_dict; + lz_options->preset_dict_size = options->preset_dict_size; + + return LZMA_OK; +} + + +/// Allocate and initialize LZMA decoder. This is used only via LZ +/// initialization (lzma_lzma_decoder_init() passes function pointer to +/// the LZ initialization). +static lzma_ret +lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + if (!is_lclppb_valid(options)) + return LZMA_PROG_ERROR; + + return_if_error(lzma_lzma_decoder_create( + lz, allocator, options, lz_options)); + + lzma_decoder_reset(lz->coder, options); + lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN); + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma_decoder_init); +} + + +extern bool +lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) +{ + if (byte > (4 * 5 + 4) * 9 + 8) + return true; + + // See the file format specification to understand this. + options->pb = byte / (9 * 5); + byte -= options->pb * 9 * 5; + options->lp = byte / 9; + options->lc = byte - options->lp * 9; + + return options->lc + options->lp > LZMA_LCLP_MAX; +} + + +extern uint64_t +lzma_lzma_decoder_memusage_nocheck(const void *options) +{ + const lzma_options_lzma *const opt = options; + return sizeof(lzma_coder) + lzma_lz_decoder_memusage(opt->dict_size); +} + + +extern uint64_t +lzma_lzma_decoder_memusage(const void *options) +{ + if (!is_lclppb_valid(options)) + return UINT64_MAX; + + return lzma_lzma_decoder_memusage_nocheck(options); +} + + +extern lzma_ret +lzma_lzma_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 5) + return LZMA_OPTIONS_ERROR; + + lzma_options_lzma *opt + = lzma_alloc(sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (lzma_lzma_lclppb_decode(opt, props[0])) + goto error; + + // All dictionary sizes are accepted, including zero. LZ decoder + // will automatically use a dictionary at least a few KiB even if + // a smaller dictionary is requested. + opt->dict_size = unaligned_read32le(props + 1); + + opt->preset_dict = NULL; + opt->preset_dict_size = 0; + + *options = opt; + + return LZMA_OK; + +error: + lzma_free(opt, allocator); + return LZMA_OPTIONS_ERROR; +} diff --git a/liblzma/lzma/lzma_decoder.h b/liblzma/lzma/lzma_decoder.h new file mode 100644 index 0000000..a463a76 --- /dev/null +++ b/liblzma/lzma/lzma_decoder.h @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_decoder.h +/// \brief LZMA decoder API +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA_DECODER_H +#define LZMA_LZMA_DECODER_H + +#include "common.h" + + +/// Allocates and initializes LZMA decoder +extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern uint64_t lzma_lzma_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + + +/// \brief Decodes the LZMA Properties byte (lc/lp/pb) +/// +/// \return true if error occurred, false on success +/// +extern bool lzma_lzma_lclppb_decode( + lzma_options_lzma *options, uint8_t byte); + + +#ifdef LZMA_LZ_DECODER_H +/// Allocate and setup function pointers only. This is used by LZMA1 and +/// LZMA2 decoders. +extern lzma_ret lzma_lzma_decoder_create( + lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, lzma_lz_options *lz_options); + +/// Gets memory usage without validating lc/lp/pb. This is used by LZMA2 +/// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb. +extern uint64_t lzma_lzma_decoder_memusage_nocheck(const void *options); + +#endif + +#endif diff --git a/liblzma/lzma/lzma_encoder_presets.c b/liblzma/lzma/lzma_encoder_presets.c new file mode 100644 index 0000000..21e427a --- /dev/null +++ b/liblzma/lzma/lzma_encoder_presets.c @@ -0,0 +1,61 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_presets.c +/// \brief Encoder presets +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API(lzma_bool) +lzma_lzma_preset(lzma_options_lzma *options, uint32_t preset) +{ + const uint32_t level = preset & LZMA_PRESET_LEVEL_MASK; + const uint32_t flags = preset & ~LZMA_PRESET_LEVEL_MASK; + const uint32_t supported_flags = LZMA_PRESET_EXTREME; + + if (level > 9 || (flags & ~supported_flags)) + return true; + + options->preset_dict = NULL; + options->preset_dict_size = 0; + + options->lc = LZMA_LC_DEFAULT; + options->lp = LZMA_LP_DEFAULT; + options->pb = LZMA_PB_DEFAULT; + + options->dict_size = UINT32_C(1) << (uint8_t []){ + 18, 20, 21, 22, 22, 23, 23, 24, 25, 26 }[level]; + + if (level <= 3) { + options->mode = LZMA_MODE_FAST; + options->mf = level == 0 ? LZMA_MF_HC3 : LZMA_MF_HC4; + options->nice_len = level <= 1 ? 128 : 273; + options->depth = (uint8_t []){ 4, 8, 24, 48 }[level]; + } else { + options->mode = LZMA_MODE_NORMAL; + options->mf = LZMA_MF_BT4; + options->nice_len = level == 4 ? 16 : level == 5 ? 32 : 64; + options->depth = 0; + } + + if (flags & LZMA_PRESET_EXTREME) { + options->mode = LZMA_MODE_NORMAL; + options->mf = LZMA_MF_BT4; + if (level == 3 || level == 5) { + options->nice_len = 192; + options->depth = 0; + } else { + options->nice_len = 273; + options->depth = 512; + } + } + + return false; +} diff --git a/liblzma/rangecoder/range_common.h b/liblzma/rangecoder/range_common.h new file mode 100644 index 0000000..7c12ab0 --- /dev/null +++ b/liblzma/rangecoder/range_common.h @@ -0,0 +1,85 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_common.h +/// \brief Common things for range encoder and decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_COMMON_H +#define LZMA_RANGE_COMMON_H + +#if __MOJOSETUP__ +#include "common.h" +#else +#ifdef HAVE_CONFIG_H +# include "common.h" +#endif +#endif + +/////////////// +// Constants // +/////////////// + +#define RC_SHIFT_BITS 8 +#define RC_TOP_BITS 24 +#define RC_TOP_VALUE (UINT32_C(1) << RC_TOP_BITS) +#define RC_BIT_MODEL_TOTAL_BITS 11 +#define RC_BIT_MODEL_TOTAL (UINT32_C(1) << RC_BIT_MODEL_TOTAL_BITS) +#define RC_MOVE_BITS 5 + + +//////////// +// Macros // +//////////// + +// Resets the probability so that both 0 and 1 have probability of 50 % +#define bit_reset(prob) \ + prob = RC_BIT_MODEL_TOTAL >> 1 + +// This does the same for a complete bit tree. +// (A tree represented as an array.) +#if __MOJOSETUP__ +#define bittree_reset(probs, bit_levels) \ + do { \ + uint32_t bt_i; \ + for (bt_i = 0; bt_i < (1 << (bit_levels)); ++bt_i) \ + bit_reset((probs)[bt_i]); \ + } while (0) +#else +#define bittree_reset(probs, bit_levels) \ + for (uint32_t bt_i = 0; bt_i < (1 << (bit_levels)); ++bt_i) \ + bit_reset((probs)[bt_i]) +#endif + + +////////////////////// +// Type definitions // +////////////////////// + +/// \brief Type of probabilities used with range coder +/// +/// This needs to be at least 12-bit integer, so uint16_t is a logical choice. +/// However, on some architecture and compiler combinations, a bigger type +/// may give better speed, because the probability variables are accessed +/// a lot. On the other hand, bigger probability type increases cache +/// footprint, since there are 2 to 14 thousand probability variables in +/// LZMA (assuming the limit of lc + lp <= 4; with lc + lp <= 12 there +/// would be about 1.5 million variables). +/// +/// With malicious files, the initialization speed of the LZMA decoder can +/// become important. In that case, smaller probability variables mean that +/// there is less bytes to write to RAM, which makes initialization faster. +/// With big probability type, the initialization can become so slow that it +/// can be a problem e.g. for email servers doing virus scanning. +/// +/// I will be sticking to uint16_t unless some specific architectures +/// are *much* faster (20-50 %) with uint32_t. +typedef uint16_t probability; + +#endif diff --git a/liblzma/rangecoder/range_decoder.h b/liblzma/rangecoder/range_decoder.h new file mode 100644 index 0000000..fb96180 --- /dev/null +++ b/liblzma/rangecoder/range_decoder.h @@ -0,0 +1,179 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_decoder.h +/// \brief Range Decoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_DECODER_H +#define LZMA_RANGE_DECODER_H + +#include "range_common.h" + + +typedef struct { + uint32_t range; + uint32_t code; + uint32_t init_bytes_left; +} lzma_range_decoder; + + +/// Reads the first five bytes to initialize the range decoder. +static inline bool +rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + while (rc->init_bytes_left > 0) { + if (*in_pos == in_size) + return false; + + rc->code = (rc->code << 8) | in[*in_pos]; + ++*in_pos; + --rc->init_bytes_left; + } + + return true; +} + + +/// Makes local copies of range decoder and *in_pos variables. Doing this +/// improves speed significantly. The range decoder macros expect also +/// variables `in' and `in_size' to be defined. +#define rc_to_local(range_decoder, in_pos) \ + lzma_range_decoder rc = range_decoder; \ + size_t rc_in_pos = (in_pos); \ + uint32_t rc_bound + + +/// Stores the local copes back to the range decoder structure. +#define rc_from_local(range_decoder, in_pos) \ +do { \ + range_decoder = rc; \ + in_pos = rc_in_pos; \ +} while (0) + + +/// Resets the range decoder structure. +#define rc_reset(range_decoder) \ +do { \ + (range_decoder).range = UINT32_MAX; \ + (range_decoder).code = 0; \ + (range_decoder).init_bytes_left = 5; \ +} while (0) + + +/// When decoding has been properly finished, rc.code is always zero unless +/// the input stream is corrupt. So checking this can catch some corrupt +/// files especially if they don't have any other integrity check. +#define rc_is_finished(range_decoder) \ + ((range_decoder).code == 0) + + +/// Read the next input byte if needed. If more input is needed but there is +/// no more input available, "goto out" is used to jump out of the main +/// decoder loop. +#define rc_normalize(seq) \ +do { \ + if (rc.range < RC_TOP_VALUE) { \ + if (unlikely(rc_in_pos == in_size)) { \ + coder->sequence = seq; \ + goto out; \ + } \ + rc.range <<= RC_SHIFT_BITS; \ + rc.code = (rc.code << RC_SHIFT_BITS) | in[rc_in_pos++]; \ + } \ +} while (0) + + +/// Start decoding a bit. This must be used together with rc_update_0() +/// and rc_update_1(): +/// +/// rc_if_0(prob, seq) { +/// rc_update_0(prob); +/// // Do something +/// } else { +/// rc_update_1(prob); +/// // Do something else +/// } +/// +#define rc_if_0(prob, seq) \ + rc_normalize(seq); \ + rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \ + if (rc.code < rc_bound) + + +/// Update the range decoder state and the used probability variable to +/// match a decoded bit of 0. +#define rc_update_0(prob) \ +do { \ + rc.range = rc_bound; \ + prob += (RC_BIT_MODEL_TOTAL - (prob)) >> RC_MOVE_BITS; \ +} while (0) + + +/// Update the range decoder state and the used probability variable to +/// match a decoded bit of 1. +#define rc_update_1(prob) \ +do { \ + rc.range -= rc_bound; \ + rc.code -= rc_bound; \ + prob -= (prob) >> RC_MOVE_BITS; \ +} while (0) + + +/// Decodes one bit and runs action0 or action1 depending on the decoded bit. +/// This macro is used as the last step in bittree reverse decoders since +/// those don't use "symbol" for anything else than indexing the probability +/// arrays. +#define rc_bit_last(prob, action0, action1, seq) \ +do { \ + rc_if_0(prob, seq) { \ + rc_update_0(prob); \ + action0; \ + } else { \ + rc_update_1(prob); \ + action1; \ + } \ +} while (0) + + +/// Decodes one bit, updates "symbol", and runs action0 or action1 depending +/// on the decoded bit. +#define rc_bit(prob, action0, action1, seq) \ + rc_bit_last(prob, \ + symbol <<= 1; action0, \ + symbol = (symbol << 1) + 1; action1, \ + seq); + + +/// Like rc_bit() but add "case seq:" as a prefix. This makes the unrolled +/// loops more readable because the code isn't littered with "case" +/// statements. On the other hand this also makes it less readable, since +/// spotting the places where the decoder loop may be restarted is less +/// obvious. +#define rc_bit_case(prob, action0, action1, seq) \ + case seq: rc_bit(prob, action0, action1, seq) + + +/// Decode a bit without using a probability. +#define rc_direct(dest, seq) \ +do { \ + rc_normalize(seq); \ + rc.range >>= 1; \ + rc.code -= rc.range; \ + rc_bound = UINT32_C(0) - (rc.code >> 31); \ + rc.code += rc.range & rc_bound; \ + dest = (dest << 1) + (rc_bound + 1); \ +} while (0) + + +// NOTE: No macros are provided for bittree decoding. It seems to be simpler +// to just write them open in the code. + +#endif diff --git a/liblzma/rangecoder/range_encoder.h b/liblzma/rangecoder/range_encoder.h new file mode 100644 index 0000000..5124176 --- /dev/null +++ b/liblzma/rangecoder/range_encoder.h @@ -0,0 +1,237 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file range_encoder.h +/// \brief Range Encoder +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_RANGE_ENCODER_H +#define LZMA_RANGE_ENCODER_H + +#include "range_common.h" +#include "price.h" + + +/// Maximum number of symbols that can be put pending into lzma_range_encoder +/// structure between calls to lzma_rc_encode(). For LZMA, 52+5 is enough +/// (match with big distance and length followed by range encoder flush). +#define RC_SYMBOLS_MAX 58 + + +typedef struct { + uint64_t low; + uint64_t cache_size; + uint32_t range; + uint8_t cache; + + /// Number of symbols in the tables + size_t count; + + /// rc_encode()'s position in the tables + size_t pos; + + /// Symbols to encode + enum { + RC_BIT_0, + RC_BIT_1, + RC_DIRECT_0, + RC_DIRECT_1, + RC_FLUSH, + } symbols[RC_SYMBOLS_MAX]; + + /// Probabilities associated with RC_BIT_0 or RC_BIT_1 + probability *probs[RC_SYMBOLS_MAX]; + +} lzma_range_encoder; + + +static inline void +rc_reset(lzma_range_encoder *rc) +{ + rc->low = 0; + rc->cache_size = 1; + rc->range = UINT32_MAX; + rc->cache = 0; + rc->count = 0; + rc->pos = 0; +} + + +static inline void +rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit) +{ + rc->symbols[rc->count] = bit; + rc->probs[rc->count] = prob; + ++rc->count; +} + + +static inline void +rc_bittree(lzma_range_encoder *rc, probability *probs, + uint32_t bit_count, uint32_t symbol) +{ + uint32_t model_index = 1; + + do { + const uint32_t bit = (symbol >> --bit_count) & 1; + rc_bit(rc, &probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (bit_count != 0); +} + + +static inline void +rc_bittree_reverse(lzma_range_encoder *rc, probability *probs, + uint32_t bit_count, uint32_t symbol) +{ + uint32_t model_index = 1; + + do { + const uint32_t bit = symbol & 1; + symbol >>= 1; + rc_bit(rc, &probs[model_index], bit); + model_index = (model_index << 1) + bit; + } while (--bit_count != 0); +} + + +static inline void +rc_direct(lzma_range_encoder *rc, + uint32_t value, uint32_t bit_count) +{ + do { + rc->symbols[rc->count++] + = RC_DIRECT_0 + ((value >> --bit_count) & 1); + } while (bit_count != 0); +} + + +static inline void +rc_flush(lzma_range_encoder *rc) +{ +#if __MOJOSETUP__ + size_t i; + for (i = 0; i < 5; ++i) + rc->symbols[rc->count++] = RC_FLUSH; +#else + for (size_t i = 0; i < 5; ++i) + rc->symbols[rc->count++] = RC_FLUSH; +#endif +} + + +static inline bool +rc_shift_low(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + if ((uint32_t)(rc->low) < (uint32_t)(0xFF000000) + || (uint32_t)(rc->low >> 32) != 0) { + do { + if (*out_pos == out_size) + return true; + + out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32); + ++*out_pos; + rc->cache = 0xFF; + + } while (--rc->cache_size != 0); + + rc->cache = (rc->low >> 24) & 0xFF; + } + + ++rc->cache_size; + rc->low = (rc->low & 0x00FFFFFF) << RC_SHIFT_BITS; + + return false; +} + + +static inline bool +rc_encode(lzma_range_encoder *rc, + uint8_t *out, size_t *out_pos, size_t out_size) +{ + assert(rc->count <= RC_SYMBOLS_MAX); + + while (rc->pos < rc->count) { + // Normalize + if (rc->range < RC_TOP_VALUE) { + if (rc_shift_low(rc, out, out_pos, out_size)) + return true; + + rc->range <<= RC_SHIFT_BITS; + } + + // Encode a bit + switch (rc->symbols[rc->pos]) { + case RC_BIT_0: { + probability prob = *rc->probs[rc->pos]; + rc->range = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) + * prob; + prob += (RC_BIT_MODEL_TOTAL - prob) >> RC_MOVE_BITS; + *rc->probs[rc->pos] = prob; + break; + } + + case RC_BIT_1: { + probability prob = *rc->probs[rc->pos]; + const uint32_t bound = prob * (rc->range + >> RC_BIT_MODEL_TOTAL_BITS); + rc->low += bound; + rc->range -= bound; + prob -= prob >> RC_MOVE_BITS; + *rc->probs[rc->pos] = prob; + break; + } + + case RC_DIRECT_0: + rc->range >>= 1; + break; + + case RC_DIRECT_1: + rc->range >>= 1; + rc->low += rc->range; + break; + + case RC_FLUSH: + // Prevent further normalizations. + rc->range = UINT32_MAX; + + // Flush the last five bytes (see rc_flush()). + do { + if (rc_shift_low(rc, out, out_pos, out_size)) + return true; + } while (++rc->pos < rc->count); + + // Reset the range encoder so we are ready to continue + // encoding if we weren't finishing the stream. + rc_reset(rc); + return false; + + default: + assert(0); + break; + } + + ++rc->pos; + } + + rc->count = 0; + rc->pos = 0; + + return false; +} + + +static inline uint64_t +rc_pending(const lzma_range_encoder *rc) +{ + return rc->cache_size + 5 - 1; +} + +#endif diff --git a/liblzma/simple/arm.c b/liblzma/simple/arm.c new file mode 100644 index 0000000..8fcf643 --- /dev/null +++ b/liblzma/simple/arm.c @@ -0,0 +1,69 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file arm.c +/// \brief Filter for ARM binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +arm_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + if (buffer[i + 3] == 0xEB) { + uint32_t src = (buffer[i + 2] << 16) + | (buffer[i + 1] << 8) + | (buffer[i + 0]); + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 8 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 8); + + dest >>= 2; + buffer[i + 2] = (dest >> 16); + buffer[i + 1] = (dest >> 8); + buffer[i + 0] = dest; + } + } + + return i; +} + + +static lzma_ret +arm_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &arm_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_arm_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_arm_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return arm_coder_init(next, allocator, filters, false); +} diff --git a/liblzma/simple/armthumb.c b/liblzma/simple/armthumb.c new file mode 100644 index 0000000..eb6a69d --- /dev/null +++ b/liblzma/simple/armthumb.c @@ -0,0 +1,74 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file armthumb.c +/// \brief Filter for ARM-Thumb binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +armthumb_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 2) { + if ((buffer[i + 1] & 0xF8) == 0xF0 + && (buffer[i + 3] & 0xF8) == 0xF8) { + uint32_t src = ((buffer[i + 1] & 0x7) << 19) + | (buffer[i + 0] << 11) + | ((buffer[i + 3] & 0x7) << 8) + | (buffer[i + 2]); + + src <<= 1; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + 4 + src; + else + dest = src - (now_pos + (uint32_t)(i) + 4); + + dest >>= 1; + buffer[i + 1] = 0xF0 | ((dest >> 19) & 0x7); + buffer[i + 0] = (dest >> 11); + buffer[i + 3] = 0xF8 | ((dest >> 8) & 0x7); + buffer[i + 2] = (dest); + i += 2; + } + } + + return i; +} + + +static lzma_ret +armthumb_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &armthumb_code, 0, 4, 2, is_encoder); +} + + +extern lzma_ret +lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return armthumb_coder_init(next, allocator, filters, false); +} diff --git a/liblzma/simple/ia64.c b/liblzma/simple/ia64.c new file mode 100644 index 0000000..a5e2ab2 --- /dev/null +++ b/liblzma/simple/ia64.c @@ -0,0 +1,130 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file ia64.c +/// \brief Filter for IA64 (Itanium) binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +ia64_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const uint32_t BRANCH_TABLE[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + size_t i; + for (i = 0; i + 16 <= size; i += 16) { + const uint32_t instr_template = buffer[i] & 0x1F; + const uint32_t mask = BRANCH_TABLE[instr_template]; + uint32_t bit_pos = 5; + +#if __MOJOSETUP__ + size_t slot; + for (slot = 0; slot < 3; ++slot, bit_pos += 41) { +#else + for (size_t slot = 0; slot < 3; ++slot, bit_pos += 41) { +#endif + if (((mask >> slot) & 1) == 0) + continue; + + const size_t byte_pos = (bit_pos >> 3); + const uint32_t bit_res = bit_pos & 0x7; + uint64_t instruction = 0; + +#if __MOJOSETUP__ + size_t j; + for (j = 0; j < 6; ++j) + instruction += (uint64_t)( + buffer[i + j + byte_pos]) + << (8 * j); +#else + for (size_t j = 0; j < 6; ++j) + instruction += (uint64_t)( + buffer[i + j + byte_pos]) + << (8 * j); +#endif + + uint64_t inst_norm = instruction >> bit_res; + + if (((inst_norm >> 37) & 0xF) == 0x5 + && ((inst_norm >> 9) & 0x7) == 0 + /* && (inst_norm & 0x3F)== 0 */ + ) { + uint32_t src = (uint32_t)( + (inst_norm >> 13) & 0xFFFFF); + src |= ((inst_norm >> 36) & 1) << 20; + + src <<= 4; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 4; + + inst_norm &= ~((uint64_t)(0x8FFFFF) << 13); + inst_norm |= (uint64_t)(dest & 0xFFFFF) << 13; + inst_norm |= (uint64_t)(dest & 0x100000) + << (36 - 20); + + instruction &= (1 << bit_res) - 1; + instruction |= (inst_norm << bit_res); + +#if __MOJOSETUP__ + for (j = 0; j < 6; j++) + buffer[i + j + byte_pos] = (uint8_t)( + instruction + >> (8 * j)); +#else + for (size_t j = 0; j < 6; j++) + buffer[i + j + byte_pos] = (uint8_t)( + instruction + >> (8 * j)); +#endif + } + } + } + + return i; +} + + +static lzma_ret +ia64_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &ia64_code, 0, 16, 16, is_encoder); +} + + +extern lzma_ret +lzma_simple_ia64_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_ia64_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return ia64_coder_init(next, allocator, filters, false); +} diff --git a/liblzma/simple/powerpc.c b/liblzma/simple/powerpc.c new file mode 100644 index 0000000..aaa14f2 --- /dev/null +++ b/liblzma/simple/powerpc.c @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file powerpc.c +/// \brief Filter for PowerPC (big endian) binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +powerpc_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + // PowerPC branch 6(48) 24(Offset) 1(Abs) 1(Link) + if ((buffer[i] >> 2) == 0x12 + && ((buffer[i + 3] & 3) == 1)) { + + const uint32_t src = ((buffer[i + 0] & 3) << 24) + | (buffer[i + 1] << 16) + | (buffer[i + 2] << 8) + | (buffer[i + 3] & (~3)); + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + buffer[i + 0] = 0x48 | ((dest >> 24) & 0x03); + buffer[i + 1] = (dest >> 16); + buffer[i + 2] = (dest >> 8); + buffer[i + 3] &= 0x03; + buffer[i + 3] |= dest; + } + } + + return i; +} + + +static lzma_ret +powerpc_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &powerpc_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return powerpc_coder_init(next, allocator, filters, false); +} diff --git a/liblzma/simple/simple_coder.c b/liblzma/simple/simple_coder.c new file mode 100644 index 0000000..06db86e --- /dev/null +++ b/liblzma/simple/simple_coder.c @@ -0,0 +1,280 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.c +/// \brief Wrapper for simple filters +/// +/// Simple filters don't change the size of the data i.e. number of bytes +/// in equals the number of bytes out. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +/// Copied or encodes/decodes more data to out[]. +static lzma_ret +copy_or_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + assert(!coder->end_was_reached); + + if (coder->next.code == NULL) { + lzma_bufcpy(in, in_pos, in_size, out, out_pos, out_size); + + // Check if end of stream was reached. + if (coder->is_encoder && action == LZMA_FINISH + && *in_pos == in_size) + coder->end_was_reached = true; + + } else { + // Call the next coder in the chain to provide us some data. + // We don't care about uncompressed_size here, because + // the next filter in the chain will do it for us (since + // we don't change the size of the data). + const lzma_ret ret = coder->next.code( + coder->next.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + + if (ret == LZMA_STREAM_END) { + assert(!coder->is_encoder + || action == LZMA_FINISH); + coder->end_was_reached = true; + + } else if (ret != LZMA_OK) { + return ret; + } + } + + return LZMA_OK; +} + + +static size_t +call_filter(lzma_coder *coder, uint8_t *buffer, size_t size) +{ + const size_t filtered = coder->filter(coder->simple, + coder->now_pos, coder->is_encoder, + buffer, size); + coder->now_pos += filtered; + return filtered; +} + + +static lzma_ret +simple_code(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // TODO: Add partial support for LZMA_SYNC_FLUSH. We can support it + // in cases when the filter is able to filter everything. With most + // simple filters it can be done at offset that is a multiple of 2, + // 4, or 16. With x86 filter, it needs good luck, and thus cannot + // be made to work predictably. + if (action == LZMA_SYNC_FLUSH) + return LZMA_OPTIONS_ERROR; + + // Flush already filtered data from coder->buffer[] to out[]. + if (coder->pos < coder->filtered) { + lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + + // If we couldn't flush all the filtered data, return to + // application immediately. + if (coder->pos < coder->filtered) + return LZMA_OK; + + if (coder->end_was_reached) { + assert(coder->filtered == coder->size); + return LZMA_STREAM_END; + } + } + + // If we get here, there is no filtered data left in the buffer. + coder->filtered = 0; + + assert(!coder->end_was_reached); + + // If there is more output space left than there is unfiltered data + // in coder->buffer[], flush coder->buffer[] to out[], and copy/code + // more data to out[] hopefully filling it completely. Then filter + // the data in out[]. This step is where most of the data gets + // filtered if the buffer sizes used by the application are reasonable. + const size_t out_avail = out_size - *out_pos; + const size_t buf_avail = coder->size - coder->pos; + if (out_avail > buf_avail) { + // Store the old position so that we know from which byte + // to start filtering. + const size_t out_start = *out_pos; + + // Flush data from coder->buffer[] to out[], but don't reset + // coder->pos and coder->size yet. This way the coder can be + // restarted if the next filter in the chain returns e.g. + // LZMA_MEM_ERROR. + memcpy(out + *out_pos, coder->buffer + coder->pos, buf_avail); + *out_pos += buf_avail; + + // Copy/Encode/Decode more data to out[]. + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + // Filter out[]. + const size_t size = *out_pos - out_start; + const size_t filtered = call_filter( + coder, out + out_start, size); + + const size_t unfiltered = size - filtered; + assert(unfiltered <= coder->allocated / 2); + + // Now we can update coder->pos and coder->size, because + // the next coder in the chain (if any) was successful. + coder->pos = 0; + coder->size = unfiltered; + + if (coder->end_was_reached) { + // The last byte has been copied to out[] already. + // They are left as is. + coder->size = 0; + + } else if (unfiltered > 0) { + // There is unfiltered data left in out[]. Copy it to + // coder->buffer[] and rewind *out_pos appropriately. + *out_pos -= unfiltered; + memcpy(coder->buffer, out + *out_pos, unfiltered); + } + } else if (coder->pos > 0) { + memmove(coder->buffer, coder->buffer + coder->pos, buf_avail); + coder->size -= coder->pos; + coder->pos = 0; + } + + assert(coder->pos == 0); + + // If coder->buffer[] isn't empty, try to fill it by copying/decoding + // more data. Then filter coder->buffer[] and copy the successfully + // filtered data to out[]. It is probable, that some filtered and + // unfiltered data will be left to coder->buffer[]. + if (coder->size > 0) { + { + const lzma_ret ret = copy_or_code(coder, allocator, + in, in_pos, in_size, + coder->buffer, &coder->size, + coder->allocated, action); + assert(ret != LZMA_STREAM_END); + if (ret != LZMA_OK) + return ret; + } + + coder->filtered = call_filter( + coder, coder->buffer, coder->size); + + // Everything is considered to be filtered if coder->buffer[] + // contains the last bytes of the data. + if (coder->end_was_reached) + coder->filtered = coder->size; + + // Flush as much as possible. + lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered, + out, out_pos, out_size); + } + + // Check if we got everything done. + if (coder->end_was_reached && coder->pos == coder->size) + return LZMA_STREAM_END; + + return LZMA_OK; +} + + +static void +simple_coder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder->simple, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +simple_coder_update(lzma_coder *coder, lzma_allocator *allocator, + const lzma_filter *filters_null lzma_attribute((unused)), + const lzma_filter *reversed_filters) +{ + // No update support, just call the next filter in the chain. + return lzma_next_filter_update( + &coder->next, allocator, reversed_filters + 1); +} + + +extern lzma_ret +lzma_simple_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, + uint32_t alignment, bool is_encoder) +{ + // Allocate memory for the lzma_coder structure if needed. + if (next->coder == NULL) { + // Here we allocate space also for the temporary buffer. We + // need twice the size of unfiltered_max, because then it + // is always possible to filter at least unfiltered_max bytes + // more data in coder->buffer[] if it can be filled completely. + next->coder = lzma_alloc(sizeof(lzma_coder) + + 2 * unfiltered_max, allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &simple_code; + next->end = &simple_coder_end; + next->update = &simple_coder_update; + + next->coder->next = LZMA_NEXT_CODER_INIT; + next->coder->filter = filter; + next->coder->allocated = 2 * unfiltered_max; + + // Allocate memory for filter-specific data structure. + if (simple_size > 0) { + next->coder->simple = lzma_alloc( + simple_size, allocator); + if (next->coder->simple == NULL) + return LZMA_MEM_ERROR; + } else { + next->coder->simple = NULL; + } + } + + if (filters[0].options != NULL) { + const lzma_options_bcj *simple = filters[0].options; + next->coder->now_pos = simple->start_offset; + if (next->coder->now_pos & (alignment - 1)) + return LZMA_OPTIONS_ERROR; + } else { + next->coder->now_pos = 0; + } + + // Reset variables. + next->coder->is_encoder = is_encoder; + next->coder->end_was_reached = false; + next->coder->pos = 0; + next->coder->filtered = 0; + next->coder->size = 0; + + return lzma_next_filter_init( + &next->coder->next, allocator, filters + 1); +} diff --git a/liblzma/simple/simple_coder.h b/liblzma/simple/simple_coder.h new file mode 100644 index 0000000..0952fad --- /dev/null +++ b/liblzma/simple/simple_coder.h @@ -0,0 +1,60 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_coder.h +/// \brief Wrapper for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_CODER_H +#define LZMA_SIMPLE_CODER_H + +#include "common.h" + + +extern lzma_ret lzma_simple_x86_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_x86_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_powerpc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_powerpc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_ia64_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_ia64_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_arm_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_arm_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_armthumb_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + + +extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern lzma_ret lzma_simple_sparc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +#endif diff --git a/liblzma/simple/simple_decoder.c b/liblzma/simple/simple_decoder.c new file mode 100644 index 0000000..0beccd3 --- /dev/null +++ b/liblzma/simple/simple_decoder.c @@ -0,0 +1,40 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_decoder.c +/// \brief Properties decoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_decoder.h" + + +extern lzma_ret +lzma_simple_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size == 0) + return LZMA_OK; + + if (props_size != 4) + return LZMA_OPTIONS_ERROR; + + lzma_options_bcj *opt = lzma_alloc( + sizeof(lzma_options_bcj), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + opt->start_offset = unaligned_read32le(props); + + // Don't leave an options structure allocated if start_offset is zero. + if (opt->start_offset == 0) + lzma_free(opt, allocator); + else + *options = opt; + + return LZMA_OK; +} diff --git a/liblzma/simple/simple_decoder.h b/liblzma/simple/simple_decoder.h new file mode 100644 index 0000000..b8bf590 --- /dev/null +++ b/liblzma/simple/simple_decoder.h @@ -0,0 +1,22 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_decoder.h +/// \brief Properties decoder for simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_DECODER_H +#define LZMA_SIMPLE_DECODER_H + +#include "simple_coder.h" + +extern lzma_ret lzma_simple_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/liblzma/simple/simple_private.h b/liblzma/simple/simple_private.h new file mode 100644 index 0000000..a69f827 --- /dev/null +++ b/liblzma/simple/simple_private.h @@ -0,0 +1,76 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file simple_private.h +/// \brief Private definitions for so called simple filters +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_SIMPLE_PRIVATE_H +#define LZMA_SIMPLE_PRIVATE_H + +#include "simple_coder.h" + + +typedef struct lzma_simple_s lzma_simple; + +struct lzma_coder_s { + /// Next filter in the chain + lzma_next_coder next; + + /// True if the next coder in the chain has returned LZMA_STREAM_END + /// or if we have processed uncompressed_size bytes. + bool end_was_reached; + + /// True if filter() should encode the data; false to decode. + /// Currently all simple filters use the same function for encoding + /// and decoding, because the difference between encoders and decoders + /// is very small. + bool is_encoder; + + /// Pointer to filter-specific function, which does + /// the actual filtering. + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size); + + /// Pointer to filter-specific data, or NULL if filter doesn't need + /// any extra data. + lzma_simple *simple; + + /// The lowest 32 bits of the current position in the data. Most + /// filters need this to do conversions between absolute and relative + /// addresses. + uint32_t now_pos; + + /// Size of the memory allocated for the buffer. + size_t allocated; + + /// Flushing position in the temporary buffer. buffer[pos] is the + /// next byte to be copied to out[]. + size_t pos; + + /// buffer[filtered] is the first unfiltered byte. When pos is smaller + /// than filtered, there is unflushed filtered data in the buffer. + size_t filtered; + + /// Total number of bytes (both filtered and unfiltered) currently + /// in the temporary buffer. + size_t size; + + /// Temporary buffer + uint8_t buffer[]; +}; + + +extern lzma_ret lzma_simple_coder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + size_t (*filter)(lzma_simple *simple, uint32_t now_pos, + bool is_encoder, uint8_t *buffer, size_t size), + size_t simple_size, size_t unfiltered_max, + uint32_t alignment, bool is_encoder); + +#endif diff --git a/liblzma/simple/sparc.c b/liblzma/simple/sparc.c new file mode 100644 index 0000000..808a59a --- /dev/null +++ b/liblzma/simple/sparc.c @@ -0,0 +1,81 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file sparc.c +/// \brief Filter for SPARC binaries +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +static size_t +sparc_code(lzma_simple *simple lzma_attribute((unused)), + uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + size_t i; + for (i = 0; i + 4 <= size; i += 4) { + + if ((buffer[i] == 0x40 && (buffer[i + 1] & 0xC0) == 0x00) + || (buffer[i] == 0x7F + && (buffer[i + 1] & 0xC0) == 0xC0)) { + + uint32_t src = ((uint32_t)buffer[i + 0] << 24) + | ((uint32_t)buffer[i + 1] << 16) + | ((uint32_t)buffer[i + 2] << 8) + | ((uint32_t)buffer[i + 3]); + + src <<= 2; + + uint32_t dest; + if (is_encoder) + dest = now_pos + (uint32_t)(i) + src; + else + dest = src - (now_pos + (uint32_t)(i)); + + dest >>= 2; + + dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) + | (dest & 0x3FFFFF) + | 0x40000000; + + buffer[i + 0] = (uint8_t)(dest >> 24); + buffer[i + 1] = (uint8_t)(dest >> 16); + buffer[i + 2] = (uint8_t)(dest >> 8); + buffer[i + 3] = (uint8_t)(dest); + } + } + + return i; +} + + +static lzma_ret +sparc_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + return lzma_simple_coder_init(next, allocator, filters, + &sparc_code, 0, 4, 4, is_encoder); +} + + +extern lzma_ret +lzma_simple_sparc_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_sparc_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters) +{ + return sparc_coder_init(next, allocator, filters, false); +} diff --git a/liblzma/simple/x86.c b/liblzma/simple/x86.c new file mode 100644 index 0000000..7f07678 --- /dev/null +++ b/liblzma/simple/x86.c @@ -0,0 +1,159 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file x86.c +/// \brief Filter for x86 binaries (BCJ filter) +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "simple_private.h" + + +#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) + + +struct lzma_simple_s { + uint32_t prev_mask; + uint32_t prev_pos; +}; + + +static size_t +x86_code(lzma_simple *simple, uint32_t now_pos, bool is_encoder, + uint8_t *buffer, size_t size) +{ + static const bool MASK_TO_ALLOWED_STATUS[8] + = { true, true, true, false, true, false, false, false }; + + static const uint32_t MASK_TO_BIT_NUMBER[8] + = { 0, 1, 2, 2, 3, 3, 3, 3 }; + + uint32_t prev_mask = simple->prev_mask; + uint32_t prev_pos = simple->prev_pos; + + if (size < 5) + return 0; + + if (now_pos - prev_pos > 5) + prev_pos = now_pos - 5; + + const size_t limit = size - 5; + size_t buffer_pos = 0; + + while (buffer_pos <= limit) { + uint8_t b = buffer[buffer_pos]; + if (b != 0xE8 && b != 0xE9) { + ++buffer_pos; + continue; + } + + const uint32_t offset = now_pos + (uint32_t)(buffer_pos) + - prev_pos; + prev_pos = now_pos + (uint32_t)(buffer_pos); + + if (offset > 5) { + prev_mask = 0; + } else { +#if __MOJOSETUP__ + uint32_t i; + for (i = 0; i < offset; ++i) { +#else + for (uint32_t i = 0; i < offset; ++i) { +#endif + prev_mask &= 0x77; + prev_mask <<= 1; + } + } + + b = buffer[buffer_pos + 4]; + + if (Test86MSByte(b) + && MASK_TO_ALLOWED_STATUS[(prev_mask >> 1) & 0x7] + && (prev_mask >> 1) < 0x10) { + + uint32_t src = ((uint32_t)(b) << 24) + | ((uint32_t)(buffer[buffer_pos + 3]) << 16) + | ((uint32_t)(buffer[buffer_pos + 2]) << 8) + | (buffer[buffer_pos + 1]); + + uint32_t dest; + while (true) { + if (is_encoder) + dest = src + (now_pos + (uint32_t)( + buffer_pos) + 5); + else + dest = src - (now_pos + (uint32_t)( + buffer_pos) + 5); + + if (prev_mask == 0) + break; + + const uint32_t i = MASK_TO_BIT_NUMBER[ + prev_mask >> 1]; + + b = (uint8_t)(dest >> (24 - i * 8)); + + if (!Test86MSByte(b)) + break; + + src = dest ^ ((1 << (32 - i * 8)) - 1); + } + + buffer[buffer_pos + 4] + = (uint8_t)(~(((dest >> 24) & 1) - 1)); + buffer[buffer_pos + 3] = (uint8_t)(dest >> 16); + buffer[buffer_pos + 2] = (uint8_t)(dest >> 8); + buffer[buffer_pos + 1] = (uint8_t)(dest); + buffer_pos += 5; + prev_mask = 0; + + } else { + ++buffer_pos; + prev_mask |= 1; + if (Test86MSByte(b)) + prev_mask |= 0x10; + } + } + + simple->prev_mask = prev_mask; + simple->prev_pos = prev_pos; + + return buffer_pos; +} + + +static lzma_ret +x86_coder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, bool is_encoder) +{ + const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters, + &x86_code, sizeof(lzma_simple), 5, 1, is_encoder); + + if (ret == LZMA_OK) { + next->coder->simple->prev_mask = 0; + next->coder->simple->prev_pos = (uint32_t)(-5); + } + + return ret; +} + + +extern lzma_ret +lzma_simple_x86_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, true); +} + + +extern lzma_ret +lzma_simple_x86_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return x86_coder_init(next, allocator, filters, false); +}