From d267d86f6e24c4f947c30c1a3642d57b82f8addd Mon Sep 17 00:00:00 2001 From: Petr Mrázek Date: Sat, 28 Sep 2013 18:43:30 +0200 Subject: Replace lzma with xz. Tweaks to fix debug builds. --- depends/xz-embedded/CMakeLists.txt | 45 ++ depends/xz-embedded/include/xz.h | 304 ++++++++ depends/xz-embedded/src/xz_config.h | 124 ++++ depends/xz-embedded/src/xz_crc32.c | 59 ++ depends/xz-embedded/src/xz_crc64.c | 50 ++ depends/xz-embedded/src/xz_dec_bcj.c | 574 +++++++++++++++ depends/xz-embedded/src/xz_dec_lzma2.c | 1171 +++++++++++++++++++++++++++++++ depends/xz-embedded/src/xz_dec_stream.c | 847 ++++++++++++++++++++++ depends/xz-embedded/src/xz_lzma2.h | 204 ++++++ depends/xz-embedded/src/xz_private.h | 156 ++++ depends/xz-embedded/src/xz_stream.h | 62 ++ depends/xz-embedded/xzminidec.c | 135 ++++ 12 files changed, 3731 insertions(+) create mode 100644 depends/xz-embedded/CMakeLists.txt create mode 100644 depends/xz-embedded/include/xz.h create mode 100644 depends/xz-embedded/src/xz_config.h create mode 100644 depends/xz-embedded/src/xz_crc32.c create mode 100644 depends/xz-embedded/src/xz_crc64.c create mode 100644 depends/xz-embedded/src/xz_dec_bcj.c create mode 100644 depends/xz-embedded/src/xz_dec_lzma2.c create mode 100644 depends/xz-embedded/src/xz_dec_stream.c create mode 100644 depends/xz-embedded/src/xz_lzma2.h create mode 100644 depends/xz-embedded/src/xz_private.h create mode 100644 depends/xz-embedded/src/xz_stream.h create mode 100644 depends/xz-embedded/xzminidec.c (limited to 'depends/xz-embedded') diff --git a/depends/xz-embedded/CMakeLists.txt b/depends/xz-embedded/CMakeLists.txt new file mode 100644 index 00000000..a71002fb --- /dev/null +++ b/depends/xz-embedded/CMakeLists.txt @@ -0,0 +1,45 @@ +cmake_minimum_required(VERSION 2.6) +project(xz-embedded) + +option(XZ_BUILD_BCJ "Build xz-embedded with BCJ support (native binary optimization)" OFF) +option(XZ_BUILD_CRC64 "Build xz-embedded with CRC64 checksum support" ON) +option(XZ_BUILD_MINIDEC "Build a tiny utility that decompresses xz streams" ON) + +set(CMAKE_C_FLAGS "-std=c99") + +include_directories(include) + +set(XZ_SOURCES +include/xz.h +src/xz_config.h +src/xz_crc32.c +src/xz_dec_lzma2.c +src/xz_dec_stream.c +src/xz_lzma2.h +src/xz_private.h +src/xz_stream.h +) + +# checksum checks +add_definitions(-DXZ_DEC_ANY_CHECK) +if(XZ_BUILD_CRC64) + add_definitions(-DXZ_USE_CRC64) + LIST(APPEND XZ_SOURCES src/xz_crc64.c) +endif() +# TODO: add SHA256 + +# uncomment these, if required. +if(XZ_BUILD_BCJ) + add_definitions(-DXZ_DEC_X86 -DXZ_DEC_POWERPC -DXZ_DEC_IA64) + add_definitions(-DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_SPARC) + LIST(APPEND XZ_SOURCES src/xz_dec_bcj.c) +endif() + +# Static link! +ADD_DEFINITIONS(-DXZ_STATIC) + +add_definitions(-DXZ_LIBRARY) + +add_library(xz-embedded SHARED ${XZ_SOURCES}) +add_executable(xzminidec xzminidec.c) +target_link_libraries(xzminidec xz-embedded) diff --git a/depends/xz-embedded/include/xz.h b/depends/xz-embedded/include/xz.h new file mode 100644 index 00000000..0a4b38d3 --- /dev/null +++ b/depends/xz-embedded/include/xz.h @@ -0,0 +1,304 @@ +/* + * XZ decompressor + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_H +#define XZ_H + +#ifdef __KERNEL__ +# include +# include +#else +# include +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* In Linux, this is used to make extern functions static when needed. */ +#ifndef XZ_EXTERN +# define XZ_EXTERN extern +#endif + +/** + * enum xz_mode - Operation mode + * + * @XZ_SINGLE: Single-call mode. This uses less RAM than + * than multi-call modes, because the LZMA2 + * dictionary doesn't need to be allocated as + * part of the decoder state. All required data + * structures are allocated at initialization, + * so xz_dec_run() cannot return XZ_MEM_ERROR. + * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2 + * dictionary buffer. All data structures are + * allocated at initialization, so xz_dec_run() + * cannot return XZ_MEM_ERROR. + * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is + * allocated once the required size has been + * parsed from the stream headers. If the + * allocation fails, xz_dec_run() will return + * XZ_MEM_ERROR. + * + * It is possible to enable support only for a subset of the above + * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC, + * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled + * with support for all operation modes, but the preboot code may + * be built with fewer features to minimize code size. + */ +enum xz_mode { + XZ_SINGLE, + XZ_PREALLOC, + XZ_DYNALLOC +}; + +/** + * enum xz_ret - Return codes + * @XZ_OK: Everything is OK so far. More input or more + * output space is required to continue. This + * return code is possible only in multi-call mode + * (XZ_PREALLOC or XZ_DYNALLOC). + * @XZ_STREAM_END: Operation finished successfully. + * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding + * is still possible in multi-call mode by simply + * calling xz_dec_run() again. + * Note that this return value is used only if + * XZ_DEC_ANY_CHECK was defined at build time, + * which is not used in the kernel. Unsupported + * check types return XZ_OPTIONS_ERROR if + * XZ_DEC_ANY_CHECK was not defined at build time. + * @XZ_MEM_ERROR: Allocating memory failed. This return code is + * possible only if the decoder was initialized + * with XZ_DYNALLOC. The amount of memory that was + * tried to be allocated was no more than the + * dict_max argument given to xz_dec_init(). + * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than + * allowed by the dict_max argument given to + * xz_dec_init(). This return value is possible + * only in multi-call mode (XZ_PREALLOC or + * XZ_DYNALLOC); the single-call mode (XZ_SINGLE) + * ignores the dict_max argument. + * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic + * bytes). + * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested + * compression options. In the decoder this means + * that the header CRC32 matches, but the header + * itself specifies something that we don't support. + * @XZ_DATA_ERROR: Compressed data is corrupt. + * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly + * different between multi-call and single-call + * mode; more information below. + * + * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls + * to XZ code cannot consume any input and cannot produce any new output. + * This happens when there is no new input available, or the output buffer + * is full while at least one output byte is still pending. Assuming your + * code is not buggy, you can get this error only when decoding a compressed + * stream that is truncated or otherwise corrupt. + * + * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer + * is too small or the compressed input is corrupt in a way that makes the + * decoder produce more output than the caller expected. When it is + * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR + * is used instead of XZ_BUF_ERROR. + */ +enum xz_ret { + XZ_OK, + XZ_STREAM_END, + XZ_UNSUPPORTED_CHECK, + XZ_MEM_ERROR, + XZ_MEMLIMIT_ERROR, + XZ_FORMAT_ERROR, + XZ_OPTIONS_ERROR, + XZ_DATA_ERROR, + XZ_BUF_ERROR +}; + +/** + * struct xz_buf - Passing input and output buffers to XZ code + * @in: Beginning of the input buffer. This may be NULL if and only + * if in_pos is equal to in_size. + * @in_pos: Current position in the input buffer. This must not exceed + * in_size. + * @in_size: Size of the input buffer + * @out: Beginning of the output buffer. This may be NULL if and only + * if out_pos is equal to out_size. + * @out_pos: Current position in the output buffer. This must not exceed + * out_size. + * @out_size: Size of the output buffer + * + * Only the contents of the output buffer from out[out_pos] onward, and + * the variables in_pos and out_pos are modified by the XZ code. + */ +struct xz_buf { + const uint8_t *in; + size_t in_pos; + size_t in_size; + + uint8_t *out; + size_t out_pos; + size_t out_size; +}; + +/** + * struct xz_dec - Opaque type to hold the XZ decoder state + */ +struct xz_dec; + +/** + * xz_dec_init() - Allocate and initialize a XZ decoder state + * @mode: Operation mode + * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for + * multi-call decoding. This is ignored in single-call mode + * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes + * or 2^n + 2^(n-1) bytes (the latter sizes are less common + * in practice), so other values for dict_max don't make sense. + * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB, + * 512 KiB, and 1 MiB are probably the only reasonable values, + * except for kernel and initramfs images where a bigger + * dictionary can be fine and useful. + * + * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at + * once. The caller must provide enough output space or the decoding will + * fail. The output space is used as the dictionary buffer, which is why + * there is no need to allocate the dictionary as part of the decoder's + * internal state. + * + * Because the output buffer is used as the workspace, streams encoded using + * a big dictionary are not a problem in single-call mode. It is enough that + * the output buffer is big enough to hold the actual uncompressed data; it + * can be smaller than the dictionary size stored in the stream headers. + * + * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes + * of memory is preallocated for the LZMA2 dictionary. This way there is no + * risk that xz_dec_run() could run out of memory, since xz_dec_run() will + * never allocate any memory. Instead, if the preallocated dictionary is too + * small for decoding the given input stream, xz_dec_run() will return + * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be + * decoded to avoid allocating excessive amount of memory for the dictionary. + * + * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC): + * dict_max specifies the maximum allowed dictionary size that xz_dec_run() + * may allocate once it has parsed the dictionary size from the stream + * headers. This way excessive allocations can be avoided while still + * limiting the maximum memory usage to a sane value to prevent running the + * system out of memory when decompressing streams from untrusted sources. + * + * On success, xz_dec_init() returns a pointer to struct xz_dec, which is + * ready to be used with xz_dec_run(). If memory allocation fails, + * xz_dec_init() returns NULL. + */ +XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); + +/** + * xz_dec_run() - Run the XZ decoder + * @s: Decoder state allocated using xz_dec_init() + * @b: Input and output buffers + * + * The possible return values depend on build options and operation mode. + * See enum xz_ret for details. + * + * Note that if an error occurs in single-call mode (return value is not + * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the + * contents of the output buffer from b->out[b->out_pos] onward are + * undefined. This is true even after XZ_BUF_ERROR, because with some filter + * chains, there may be a second pass over the output buffer, and this pass + * cannot be properly done if the output buffer is truncated. Thus, you + * cannot give the single-call decoder a too small buffer and then expect to + * get that amount valid data from the beginning of the stream. You must use + * the multi-call decoder if you don't want to uncompress the whole stream. + */ +XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); + +/** + * xz_dec_reset() - Reset an already allocated decoder state + * @s: Decoder state allocated using xz_dec_init() + * + * This function can be used to reset the multi-call decoder state without + * freeing and reallocating memory with xz_dec_end() and xz_dec_init(). + * + * In single-call mode, xz_dec_reset() is always called in the beginning of + * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in + * multi-call mode. + */ +XZ_EXTERN void xz_dec_reset(struct xz_dec *s); + +/** + * xz_dec_end() - Free the memory allocated for the decoder state + * @s: Decoder state allocated using xz_dec_init(). If s is NULL, + * this function does nothing. + */ +XZ_EXTERN void xz_dec_end(struct xz_dec *s); + +/* + * Standalone build (userspace build or in-kernel build for boot time use) + * needs a CRC32 implementation. For normal in-kernel use, kernel's own + * CRC32 module is used instead, and users of this module don't need to + * care about the functions below. + */ +#ifndef XZ_INTERNAL_CRC32 +# ifdef __KERNEL__ +# define XZ_INTERNAL_CRC32 0 +# else +# define XZ_INTERNAL_CRC32 1 +# endif +#endif + +/* + * If CRC64 support has been enabled with XZ_USE_CRC64, a CRC64 + * implementation is needed too. + */ +#ifndef XZ_USE_CRC64 +# undef XZ_INTERNAL_CRC64 +# define XZ_INTERNAL_CRC64 0 +#endif +#ifndef XZ_INTERNAL_CRC64 +# ifdef __KERNEL__ +# error Using CRC64 in the kernel has not been implemented. +# else +# define XZ_INTERNAL_CRC64 1 +# endif +#endif + +#if XZ_INTERNAL_CRC32 +/* + * This must be called before any other xz_* function to initialize + * the CRC32 lookup table. + */ +XZ_EXTERN void xz_crc32_init(void); + +/* + * Update CRC32 value using the polynomial from IEEE-802.3. To start a new + * calculation, the third argument must be zero. To continue the calculation, + * the previously returned value is passed as the third argument. + */ +XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc); +#endif + +#if XZ_INTERNAL_CRC64 +/* + * This must be called before any other xz_* function (except xz_crc32_init()) + * to initialize the CRC64 lookup table. + */ +XZ_EXTERN void xz_crc64_init(void); + +/* + * Update CRC64 value using the polynomial from ECMA-182. To start a new + * calculation, the third argument must be zero. To continue the calculation, + * the previously returned value is passed as the third argument. + */ +XZ_EXTERN uint64_t xz_crc64(const uint8_t *buf, size_t size, uint64_t crc); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/depends/xz-embedded/src/xz_config.h b/depends/xz-embedded/src/xz_config.h new file mode 100644 index 00000000..eb9dac1a --- /dev/null +++ b/depends/xz-embedded/src/xz_config.h @@ -0,0 +1,124 @@ +/* + * Private includes and definitions for userspace use of XZ Embedded + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_CONFIG_H +#define XZ_CONFIG_H + +/* Uncomment to enable CRC64 support. */ +/* #define XZ_USE_CRC64 */ + +/* Uncomment as needed to enable BCJ filter decoders. */ +/* #define XZ_DEC_X86 */ +/* #define XZ_DEC_POWERPC */ +/* #define XZ_DEC_IA64 */ +/* #define XZ_DEC_ARM */ +/* #define XZ_DEC_ARMTHUMB */ +/* #define XZ_DEC_SPARC */ + +/* + * MSVC doesn't support modern C but XZ Embedded is mostly C89 + * so these are enough. + */ +#ifdef _MSC_VER +typedef unsigned char bool; +# define true 1 +# define false 0 +# define inline __inline +#else +# include +#endif + +#include +#include + +#include "xz.h" + +#define kmalloc(size, flags) malloc(size) +#define kfree(ptr) free(ptr) +#define vmalloc(size) malloc(size) +#define vfree(ptr) free(ptr) + +#define memeq(a, b, size) (memcmp(a, b, size) == 0) +#define memzero(buf, size) memset(buf, 0, size) + +#ifndef min +# define min(x, y) ((x) < (y) ? (x) : (y)) +#endif +#define min_t(type, x, y) min(x, y) + +/* + * Some functions have been marked with __always_inline to keep the + * performance reasonable even when the compiler is optimizing for + * small code size. You may be able to save a few bytes by #defining + * __always_inline to plain inline, but don't complain if the code + * becomes slow. + * + * NOTE: System headers on GNU/Linux may #define this macro already, + * so if you want to change it, you need to #undef it first. + */ +#ifndef __always_inline +# ifdef __GNUC__ +# define __always_inline \ + inline __attribute__((__always_inline__)) +# else +# define __always_inline inline +# endif +#endif + +/* Inline functions to access unaligned unsigned 32-bit integers */ +#ifndef get_unaligned_le32 +static inline uint32_t get_unaligned_le32(const uint8_t *buf) +{ + return (uint32_t)buf[0] + | ((uint32_t)buf[1] << 8) + | ((uint32_t)buf[2] << 16) + | ((uint32_t)buf[3] << 24); +} +#endif + +#ifndef get_unaligned_be32 +static inline uint32_t get_unaligned_be32(const uint8_t *buf) +{ + return (uint32_t)(buf[0] << 24) + | ((uint32_t)buf[1] << 16) + | ((uint32_t)buf[2] << 8) + | (uint32_t)buf[3]; +} +#endif + +#ifndef put_unaligned_le32 +static inline void put_unaligned_le32(uint32_t val, uint8_t *buf) +{ + buf[0] = (uint8_t)val; + buf[1] = (uint8_t)(val >> 8); + buf[2] = (uint8_t)(val >> 16); + buf[3] = (uint8_t)(val >> 24); +} +#endif + +#ifndef put_unaligned_be32 +static inline void put_unaligned_be32(uint32_t val, uint8_t *buf) +{ + buf[0] = (uint8_t)(val >> 24); + buf[1] = (uint8_t)(val >> 16); + buf[2] = (uint8_t)(val >> 8); + buf[3] = (uint8_t)val; +} +#endif + +/* + * Use get_unaligned_le32() also for aligned access for simplicity. On + * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr)) + * could save a few bytes in code size. + */ +#ifndef get_le32 +# define get_le32 get_unaligned_le32 +#endif + +#endif diff --git a/depends/xz-embedded/src/xz_crc32.c b/depends/xz-embedded/src/xz_crc32.c new file mode 100644 index 00000000..34532d14 --- /dev/null +++ b/depends/xz-embedded/src/xz_crc32.c @@ -0,0 +1,59 @@ +/* + * CRC32 using the polynomial from IEEE-802.3 + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +/* + * This is not the fastest implementation, but it is pretty compact. + * The fastest versions of xz_crc32() on modern CPUs without hardware + * accelerated CRC instruction are 3-5 times as fast as this version, + * but they are bigger and use more memory for the lookup table. + */ + +#include "xz_private.h" + +/* + * STATIC_RW_DATA is used in the pre-boot environment on some architectures. + * See for details. + */ +#ifndef STATIC_RW_DATA +# define STATIC_RW_DATA static +#endif + +STATIC_RW_DATA uint32_t xz_crc32_table[256]; + +XZ_EXTERN void xz_crc32_init(void) +{ + const uint32_t poly = 0xEDB88320; + + uint32_t i; + uint32_t j; + uint32_t r; + + for (i = 0; i < 256; ++i) { + r = i; + for (j = 0; j < 8; ++j) + r = (r >> 1) ^ (poly & ~((r & 1) - 1)); + + xz_crc32_table[i] = r; + } + + return; +} + +XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ + crc = ~crc; + + while (size != 0) { + crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8); + --size; + } + + return ~crc; +} diff --git a/depends/xz-embedded/src/xz_crc64.c b/depends/xz-embedded/src/xz_crc64.c new file mode 100644 index 00000000..ca1caee8 --- /dev/null +++ b/depends/xz-embedded/src/xz_crc64.c @@ -0,0 +1,50 @@ +/* + * CRC64 using the polynomial from ECMA-182 + * + * This file is similar to xz_crc32.c. See the comments there. + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" + +#ifndef STATIC_RW_DATA +# define STATIC_RW_DATA static +#endif + +STATIC_RW_DATA uint64_t xz_crc64_table[256]; + +XZ_EXTERN void xz_crc64_init(void) +{ + const uint64_t poly = 0xC96C5795D7870F42; + + uint32_t i; + uint32_t j; + uint64_t r; + + for (i = 0; i < 256; ++i) { + r = i; + for (j = 0; j < 8; ++j) + r = (r >> 1) ^ (poly & ~((r & 1) - 1)); + + xz_crc64_table[i] = r; + } + + return; +} + +XZ_EXTERN uint64_t xz_crc64(const uint8_t *buf, size_t size, uint64_t crc) +{ + crc = ~crc; + + while (size != 0) { + crc = xz_crc64_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8); + --size; + } + + return ~crc; +} diff --git a/depends/xz-embedded/src/xz_dec_bcj.c b/depends/xz-embedded/src/xz_dec_bcj.c new file mode 100644 index 00000000..a768e6d2 --- /dev/null +++ b/depends/xz-embedded/src/xz_dec_bcj.c @@ -0,0 +1,574 @@ +/* + * Branch/Call/Jump (BCJ) filter decoders + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" + +/* + * The rest of the file is inside this ifdef. It makes things a little more + * convenient when building without support for any BCJ filters. + */ +#ifdef XZ_DEC_BCJ + +struct xz_dec_bcj { + /* Type of the BCJ filter being used */ + enum { + BCJ_X86 = 4, /* x86 or x86-64 */ + BCJ_POWERPC = 5, /* Big endian only */ + BCJ_IA64 = 6, /* Big or little endian */ + BCJ_ARM = 7, /* Little endian only */ + BCJ_ARMTHUMB = 8, /* Little endian only */ + BCJ_SPARC = 9 /* Big or little endian */ + } type; + + /* + * Return value of the next filter in the chain. We need to preserve + * this information across calls, because we must not call the next + * filter anymore once it has returned XZ_STREAM_END. + */ + enum xz_ret ret; + + /* True if we are operating in single-call mode. */ + bool single_call; + + /* + * Absolute position relative to the beginning of the uncompressed + * data (in a single .xz Block). We care only about the lowest 32 + * bits so this doesn't need to be uint64_t even with big files. + */ + uint32_t pos; + + /* x86 filter state */ + uint32_t x86_prev_mask; + + /* Temporary space to hold the variables from struct xz_buf */ + uint8_t *out; + size_t out_pos; + size_t out_size; + + struct { + /* Amount of already filtered data in the beginning of buf */ + size_t filtered; + + /* Total amount of data currently stored in buf */ + size_t size; + + /* + * Buffer to hold a mix of filtered and unfiltered data. This + * needs to be big enough to hold Alignment + 2 * Look-ahead: + * + * Type Alignment Look-ahead + * x86 1 4 + * PowerPC 4 0 + * IA-64 16 0 + * ARM 4 0 + * ARM-Thumb 2 2 + * SPARC 4 0 + */ + uint8_t buf[16]; + } temp; +}; + +#ifdef XZ_DEC_X86 +/* + * This is used to test the most significant byte of a memory address + * in an x86 instruction. + */ +static inline int bcj_x86_test_msbyte(uint8_t b) +{ + return b == 0x00 || b == 0xFF; +} + +static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + static const bool mask_to_allowed_status[8] + = { true, true, true, false, true, false, false, false }; + + static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; + + size_t i; + size_t prev_pos = (size_t)-1; + uint32_t prev_mask = s->x86_prev_mask; + uint32_t src; + uint32_t dest; + uint32_t j; + uint8_t b; + + if (size <= 4) + return 0; + + size -= 4; + for (i = 0; i < size; ++i) { + if ((buf[i] & 0xFE) != 0xE8) + continue; + + prev_pos = i - prev_pos; + if (prev_pos > 3) { + prev_mask = 0; + } else { + prev_mask = (prev_mask << (prev_pos - 1)) & 7; + if (prev_mask != 0) { + b = buf[i + 4 - mask_to_bit_num[prev_mask]]; + if (!mask_to_allowed_status[prev_mask] + || bcj_x86_test_msbyte(b)) { + prev_pos = i; + prev_mask = (prev_mask << 1) | 1; + continue; + } + } + } + + prev_pos = i; + + if (bcj_x86_test_msbyte(buf[i + 4])) { + src = get_unaligned_le32(buf + i + 1); + while (true) { + dest = src - (s->pos + (uint32_t)i + 5); + if (prev_mask == 0) + break; + + j = mask_to_bit_num[prev_mask] * 8; + b = (uint8_t)(dest >> (24 - j)); + if (!bcj_x86_test_msbyte(b)) + break; + + src = dest ^ (((uint32_t)1 << (32 - j)) - 1); + } + + dest &= 0x01FFFFFF; + dest |= (uint32_t)0 - (dest & 0x01000000); + put_unaligned_le32(dest, buf + i + 1); + i += 4; + } else { + prev_mask = (prev_mask << 1) | 1; + } + } + + prev_pos = i - prev_pos; + s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); + return i; +} +#endif + +#ifdef XZ_DEC_POWERPC +static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + + for (i = 0; i + 4 <= size; i += 4) { + instr = get_unaligned_be32(buf + i); + if ((instr & 0xFC000003) == 0x48000001) { + instr &= 0x03FFFFFC; + instr -= s->pos + (uint32_t)i; + instr &= 0x03FFFFFC; + instr |= 0x48000001; + put_unaligned_be32(instr, buf + i); + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_IA64 +static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + static const uint8_t branch_table[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + /* + * The local variables take a little bit stack space, but it's less + * than what LZMA2 decoder takes, so it doesn't make sense to reduce + * stack usage here without doing that for the LZMA2 decoder too. + */ + + /* Loop counters */ + size_t i; + size_t j; + + /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ + uint32_t slot; + + /* Bitwise offset of the instruction indicated by slot */ + uint32_t bit_pos; + + /* bit_pos split into byte and bit parts */ + uint32_t byte_pos; + uint32_t bit_res; + + /* Address part of an instruction */ + uint32_t addr; + + /* Mask used to detect which instructions to convert */ + uint32_t mask; + + /* 41-bit instruction stored somewhere in the lowest 48 bits */ + uint64_t instr; + + /* Instruction normalized with bit_res for easier manipulation */ + uint64_t norm; + + for (i = 0; i + 16 <= size; i += 16) { + mask = branch_table[buf[i] & 0x1F]; + for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { + if (((mask >> slot) & 1) == 0) + continue; + + byte_pos = bit_pos >> 3; + bit_res = bit_pos & 7; + instr = 0; + for (j = 0; j < 6; ++j) + instr |= (uint64_t)(buf[i + j + byte_pos]) + << (8 * j); + + norm = instr >> bit_res; + + if (((norm >> 37) & 0x0F) == 0x05 + && ((norm >> 9) & 0x07) == 0) { + addr = (norm >> 13) & 0x0FFFFF; + addr |= ((uint32_t)(norm >> 36) & 1) << 20; + addr <<= 4; + addr -= s->pos + (uint32_t)i; + addr >>= 4; + + norm &= ~((uint64_t)0x8FFFFF << 13); + norm |= (uint64_t)(addr & 0x0FFFFF) << 13; + norm |= (uint64_t)(addr & 0x100000) + << (36 - 20); + + instr &= (1 << bit_res) - 1; + instr |= norm << bit_res; + + for (j = 0; j < 6; j++) + buf[i + j + byte_pos] + = (uint8_t)(instr >> (8 * j)); + } + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_ARM +static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t addr; + + for (i = 0; i + 4 <= size; i += 4) { + if (buf[i + 3] == 0xEB) { + addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) + | ((uint32_t)buf[i + 2] << 16); + addr <<= 2; + addr -= s->pos + (uint32_t)i + 8; + addr >>= 2; + buf[i] = (uint8_t)addr; + buf[i + 1] = (uint8_t)(addr >> 8); + buf[i + 2] = (uint8_t)(addr >> 16); + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_ARMTHUMB +static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t addr; + + for (i = 0; i + 4 <= size; i += 2) { + if ((buf[i + 1] & 0xF8) == 0xF0 + && (buf[i + 3] & 0xF8) == 0xF8) { + addr = (((uint32_t)buf[i + 1] & 0x07) << 19) + | ((uint32_t)buf[i] << 11) + | (((uint32_t)buf[i + 3] & 0x07) << 8) + | (uint32_t)buf[i + 2]; + addr <<= 1; + addr -= s->pos + (uint32_t)i + 4; + addr >>= 1; + buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); + buf[i] = (uint8_t)(addr >> 11); + buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); + buf[i + 2] = (uint8_t)addr; + i += 2; + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_SPARC +static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + + for (i = 0; i + 4 <= size; i += 4) { + instr = get_unaligned_be32(buf + i); + if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { + instr <<= 2; + instr -= s->pos + (uint32_t)i; + instr >>= 2; + instr = ((uint32_t)0x40000000 - (instr & 0x400000)) + | 0x40000000 | (instr & 0x3FFFFF); + put_unaligned_be32(instr, buf + i); + } + } + + return i; +} +#endif + +/* + * Apply the selected BCJ filter. Update *pos and s->pos to match the amount + * of data that got filtered. + * + * NOTE: This is implemented as a switch statement to avoid using function + * pointers, which could be problematic in the kernel boot code, which must + * avoid pointers to static data (at least on x86). + */ +static void bcj_apply(struct xz_dec_bcj *s, + uint8_t *buf, size_t *pos, size_t size) +{ + size_t filtered; + + buf += *pos; + size -= *pos; + + switch (s->type) { +#ifdef XZ_DEC_X86 + case BCJ_X86: + filtered = bcj_x86(s, buf, size); + break; +#endif +#ifdef XZ_DEC_POWERPC + case BCJ_POWERPC: + filtered = bcj_powerpc(s, buf, size); + break; +#endif +#ifdef XZ_DEC_IA64 + case BCJ_IA64: + filtered = bcj_ia64(s, buf, size); + break; +#endif +#ifdef XZ_DEC_ARM + case BCJ_ARM: + filtered = bcj_arm(s, buf, size); + break; +#endif +#ifdef XZ_DEC_ARMTHUMB + case BCJ_ARMTHUMB: + filtered = bcj_armthumb(s, buf, size); + break; +#endif +#ifdef XZ_DEC_SPARC + case BCJ_SPARC: + filtered = bcj_sparc(s, buf, size); + break; +#endif + default: + /* Never reached but silence compiler warnings. */ + filtered = 0; + break; + } + + *pos += filtered; + s->pos += filtered; +} + +/* + * Flush pending filtered data from temp to the output buffer. + * Move the remaining mixture of possibly filtered and unfiltered + * data to the beginning of temp. + */ +static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) +{ + size_t copy_size; + + copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); + memcpy(b->out + b->out_pos, s->temp.buf, copy_size); + b->out_pos += copy_size; + + s->temp.filtered -= copy_size; + s->temp.size -= copy_size; + memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size); +} + +/* + * The BCJ filter functions are primitive in sense that they process the + * data in chunks of 1-16 bytes. To hide this issue, this function does + * some buffering. + */ +XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, + struct xz_dec_lzma2 *lzma2, + struct xz_buf *b) +{ + size_t out_start; + + /* + * Flush pending already filtered data to the output buffer. Return + * immediatelly if we couldn't flush everything, or if the next + * filter in the chain had already returned XZ_STREAM_END. + */ + if (s->temp.filtered > 0) { + bcj_flush(s, b); + if (s->temp.filtered > 0) + return XZ_OK; + + if (s->ret == XZ_STREAM_END) + return XZ_STREAM_END; + } + + /* + * If we have more output space than what is currently pending in + * temp, copy the unfiltered data from temp to the output buffer + * and try to fill the output buffer by decoding more data from the + * next filter in the chain. Apply the BCJ filter on the new data + * in the output buffer. If everything cannot be filtered, copy it + * to temp and rewind the output buffer position accordingly. + * + * This needs to be always run when temp.size == 0 to handle a special + * case where the output buffer is full and the next filter has no + * more output coming but hasn't returned XZ_STREAM_END yet. + */ + if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) { + out_start = b->out_pos; + memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); + b->out_pos += s->temp.size; + + s->ret = xz_dec_lzma2_run(lzma2, b); + if (s->ret != XZ_STREAM_END + && (s->ret != XZ_OK || s->single_call)) + return s->ret; + + bcj_apply(s, b->out, &out_start, b->out_pos); + + /* + * As an exception, if the next filter returned XZ_STREAM_END, + * we can do that too, since the last few bytes that remain + * unfiltered are meant to remain unfiltered. + */ + if (s->ret == XZ_STREAM_END) + return XZ_STREAM_END; + + s->temp.size = b->out_pos - out_start; + b->out_pos -= s->temp.size; + memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); + + /* + * If there wasn't enough input to the next filter to fill + * the output buffer with unfiltered data, there's no point + * to try decoding more data to temp. + */ + if (b->out_pos + s->temp.size < b->out_size) + return XZ_OK; + } + + /* + * We have unfiltered data in temp. If the output buffer isn't full + * yet, try to fill the temp buffer by decoding more data from the + * next filter. Apply the BCJ filter on temp. Then we hopefully can + * fill the actual output buffer by copying filtered data from temp. + * A mix of filtered and unfiltered data may be left in temp; it will + * be taken care on the next call to this function. + */ + if (b->out_pos < b->out_size) { + /* Make b->out{,_pos,_size} temporarily point to s->temp. */ + s->out = b->out; + s->out_pos = b->out_pos; + s->out_size = b->out_size; + b->out = s->temp.buf; + b->out_pos = s->temp.size; + b->out_size = sizeof(s->temp.buf); + + s->ret = xz_dec_lzma2_run(lzma2, b); + + s->temp.size = b->out_pos; + b->out = s->out; + b->out_pos = s->out_pos; + b->out_size = s->out_size; + + if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) + return s->ret; + + bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size); + + /* + * If the next filter returned XZ_STREAM_END, we mark that + * everything is filtered, since the last unfiltered bytes + * of the stream are meant to be left as is. + */ + if (s->ret == XZ_STREAM_END) + s->temp.filtered = s->temp.size; + + bcj_flush(s, b); + if (s->temp.filtered > 0) + return XZ_OK; + } + + return s->ret; +} + +XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call) +{ + struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s != NULL) + s->single_call = single_call; + + return s; +} + +XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) +{ + switch (id) { +#ifdef XZ_DEC_X86 + case BCJ_X86: +#endif +#ifdef XZ_DEC_POWERPC + case BCJ_POWERPC: +#endif +#ifdef XZ_DEC_IA64 + case BCJ_IA64: +#endif +#ifdef XZ_DEC_ARM + case BCJ_ARM: +#endif +#ifdef XZ_DEC_ARMTHUMB + case BCJ_ARMTHUMB: +#endif +#ifdef XZ_DEC_SPARC + case BCJ_SPARC: +#endif + break; + + default: + /* Unsupported Filter ID */ + return XZ_OPTIONS_ERROR; + } + + s->type = id; + s->ret = XZ_OK; + s->pos = 0; + s->x86_prev_mask = 0; + s->temp.filtered = 0; + s->temp.size = 0; + + return XZ_OK; +} + +#endif diff --git a/depends/xz-embedded/src/xz_dec_lzma2.c b/depends/xz-embedded/src/xz_dec_lzma2.c new file mode 100644 index 00000000..a6cdc969 --- /dev/null +++ b/depends/xz-embedded/src/xz_dec_lzma2.c @@ -0,0 +1,1171 @@ +/* + * LZMA2 decoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" +#include "xz_lzma2.h" + +/* + * Range decoder initialization eats the first five bytes of each LZMA chunk. + */ +#define RC_INIT_BYTES 5 + +/* + * Minimum number of usable input buffer to safely decode one LZMA symbol. + * The worst case is that we decode 22 bits using probabilities and 26 + * direct bits. This may decode at maximum of 20 bytes of input. However, + * lzma_main() does an extra normalization before returning, thus we + * need to put 21 here. + */ +#define LZMA_IN_REQUIRED 21 + +/* + * Dictionary (history buffer) + * + * These are always true: + * start <= pos <= full <= end + * pos <= limit <= end + * + * In multi-call mode, also these are true: + * end == size + * size <= size_max + * allocated <= size + * + * Most of these variables are size_t to support single-call mode, + * in which the dictionary variables address the actual output + * buffer directly. + */ +struct dictionary { + /* Beginning of the history buffer */ + uint8_t *buf; + + /* Old position in buf (before decoding more data) */ + size_t start; + + /* Position in buf */ + size_t pos; + + /* + * How full dictionary is. This is used to detect corrupt input that + * would read beyond the beginning of the uncompressed stream. + */ + size_t full; + + /* Write limit; we don't write to buf[limit] or later bytes. */ + size_t limit; + + /* + * End of the dictionary buffer. In multi-call mode, this is + * the same as the dictionary size. In single-call mode, this + * indicates the size of the output buffer. + */ + size_t end; + + /* + * Size of the dictionary as specified in Block Header. This is used + * together with "full" to detect corrupt input that would make us + * read beyond the beginning of the uncompressed stream. + */ + uint32_t size; + + /* + * Maximum allowed dictionary size in multi-call mode. + * This is ignored in single-call mode. + */ + uint32_t size_max; + + /* + * Amount of memory currently allocated for the dictionary. + * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, + * size_max is always the same as the allocated size.) + */ + uint32_t allocated; + + /* Operation mode */ + enum xz_mode mode; +}; + +/* Range decoder */ +struct rc_dec { + uint32_t range; + uint32_t code; + + /* + * Number of initializing bytes remaining to be read + * by rc_read_init(). + */ + uint32_t init_bytes_left; + + /* + * Buffer from which we read our input. It can be either + * temp.buf or the caller-provided input buffer. + */ + const uint8_t *in; + size_t in_pos; + size_t in_limit; +}; + +/* Probabilities for a length decoder. */ +struct lzma_len_dec { + /* Probability of match length being at least 10 */ + uint16_t choice; + + /* Probability of match length being at least 18 */ + uint16_t choice2; + + /* Probabilities for match lengths 2-9 */ + uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + + /* Probabilities for match lengths 10-17 */ + uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + + /* Probabilities for match lengths 18-273 */ + uint16_t high[LEN_HIGH_SYMBOLS]; +}; + +struct lzma_dec { + /* Distances of latest four matches */ + uint32_t rep0; + uint32_t rep1; + uint32_t rep2; + uint32_t rep3; + + /* Types of the most recently seen LZMA symbols */ + enum lzma_state state; + + /* + * Length of a match. This is updated so that dict_repeat can + * be called again to finish repeating the whole match. + */ + uint32_t len; + + /* + * LZMA properties or related bit masks (number of literal + * context bits, a mask dervied from the number of literal + * position bits, and a mask dervied from the number + * position bits) + */ + uint32_t lc; + uint32_t literal_pos_mask; /* (1 << lp) - 1 */ + uint32_t pos_mask; /* (1 << pb) - 1 */ + + /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ + uint16_t is_match[STATES][POS_STATES_MAX]; + + /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */ + uint16_t is_rep[STATES]; + + /* + * If 0, distance of a repeated match is rep0. + * Otherwise check is_rep1. + */ + uint16_t is_rep0[STATES]; + + /* + * If 0, distance of a repeated match is rep1. + * Otherwise check is_rep2. + */ + uint16_t is_rep1[STATES]; + + /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */ + uint16_t is_rep2[STATES]; + + /* + * If 1, the repeated match has length of one byte. Otherwise + * the length is decoded from rep_len_decoder. + */ + uint16_t is_rep0_long[STATES][POS_STATES_MAX]; + + /* + * Probability tree for the highest two bits of the match + * distance. There is a separate probability tree for match + * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + */ + uint16_t dist_slot[DIST_STATES][DIST_SLOTS]; + + /* + * Probility trees for additional bits for match distance + * when the distance is in the range [4, 127]. + */ + uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END]; + + /* + * Probability tree for the lowest four bits of a match + * distance that is equal to or greater than 128. + */ + uint16_t dist_align[ALIGN_SIZE]; + + /* Length of a normal match */ + struct lzma_len_dec match_len_dec; + + /* Length of a repeated match */ + struct lzma_len_dec rep_len_dec; + + /* Probabilities of literals */ + uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; +}; + +struct lzma2_dec { + /* Position in xz_dec_lzma2_run(). */ + enum lzma2_seq { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA_PREPARE, + SEQ_LZMA_RUN, + SEQ_COPY + } sequence; + + /* Next position after decoding the compressed size of the chunk. */ + enum lzma2_seq next_sequence; + + /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ + uint32_t uncompressed; + + /* + * Compressed size of LZMA chunk or compressed/uncompressed + * size of uncompressed chunk (64 KiB at maximum) + */ + uint32_t compressed; + + /* + * True if dictionary reset is needed. This is false before + * the first chunk (LZMA or uncompressed). + */ + bool need_dict_reset; + + /* + * True if new LZMA properties are needed. This is false + * before the first LZMA chunk. + */ + bool need_props; +}; + +struct xz_dec_lzma2 { + /* + * The order below is important on x86 to reduce code size and + * it shouldn't hurt on other platforms. Everything up to and + * including lzma.pos_mask are in the first 128 bytes on x86-32, + * which allows using smaller instructions to access those + * variables. On x86-64, fewer variables fit into the first 128 + * bytes, but this is still the best order without sacrificing + * the readability by splitting the structures. + */ + struct rc_dec rc; + struct dictionary dict; + struct lzma2_dec lzma2; + struct lzma_dec lzma; + + /* + * Temporary buffer which holds small number of input bytes between + * decoder calls. See lzma2_lzma() for details. + */ + struct { + uint32_t size; + uint8_t buf[3 * LZMA_IN_REQUIRED]; + } temp; +}; + +/************** + * Dictionary * + **************/ + +/* + * Reset the dictionary state. When in single-call mode, set up the beginning + * of the dictionary to point to the actual output buffer. + */ +static void dict_reset(struct dictionary *dict, struct xz_buf *b) +{ + if (DEC_IS_SINGLE(dict->mode)) { + dict->buf = b->out + b->out_pos; + dict->end = b->out_size - b->out_pos; + } + + dict->start = 0; + dict->pos = 0; + dict->limit = 0; + dict->full = 0; +} + +/* Set dictionary write limit */ +static void dict_limit(struct dictionary *dict, size_t out_max) +{ + if (dict->end - dict->pos <= out_max) + dict->limit = dict->end; + else + dict->limit = dict->pos + out_max; +} + +/* Return true if at least one byte can be written into the dictionary. */ +static inline bool dict_has_space(const struct dictionary *dict) +{ + return dict->pos < dict->limit; +} + +/* + * Get a byte from the dictionary at the given distance. The distance is + * assumed to valid, or as a special case, zero when the dictionary is + * still empty. This special case is needed for single-call decoding to + * avoid writing a '\0' to the end of the destination buffer. + */ +static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist) +{ + size_t offset = dict->pos - dist - 1; + + if (dist >= dict->pos) + offset += dict->end; + + return dict->full > 0 ? dict->buf[offset] : 0; +} + +/* + * Put one byte into the dictionary. It is assumed that there is space for it. + */ +static inline void dict_put(struct dictionary *dict, uint8_t byte) +{ + dict->buf[dict->pos++] = byte; + + if (dict->full < dict->pos) + dict->full = dict->pos; +} + +/* + * Repeat given number of bytes from the given distance. If the distance is + * invalid, false is returned. On success, true is returned and *len is + * updated to indicate how many bytes were left to be repeated. + */ +static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist) +{ + size_t back; + uint32_t left; + + if (dist >= dict->full || dist >= dict->size) + return false; + + left = min_t(size_t, dict->limit - dict->pos, *len); + *len -= left; + + back = dict->pos - dist - 1; + if (dist >= dict->pos) + back += dict->end; + + do { + dict->buf[dict->pos++] = dict->buf[back++]; + if (back == dict->end) + back = 0; + } while (--left > 0); + + if (dict->full < dict->pos) + dict->full = dict->pos; + + return true; +} + +/* Copy uncompressed data as is from input to dictionary and output buffers. */ +static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, + uint32_t *left) +{ + size_t copy_size; + + while (*left > 0 && b->in_pos < b->in_size + && b->out_pos < b->out_size) { + copy_size = min(b->in_size - b->in_pos, + b->out_size - b->out_pos); + if (copy_size > dict->end - dict->pos) + copy_size = dict->end - dict->pos; + if (copy_size > *left) + copy_size = *left; + + *left -= copy_size; + + memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size); + dict->pos += copy_size; + + if (dict->full < dict->pos) + dict->full = dict->pos; + + if (DEC_IS_MULTI(dict->mode)) { + if (dict->pos == dict->end) + dict->pos = 0; + + memcpy(b->out + b->out_pos, b->in + b->in_pos, + copy_size); + } + + dict->start = dict->pos; + + b->out_pos += copy_size; + b->in_pos += copy_size; + } +} + +/* + * Flush pending data from dictionary to b->out. It is assumed that there is + * enough space in b->out. This is guaranteed because caller uses dict_limit() + * before decoding data into the dictionary. + */ +static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) +{ + size_t copy_size = dict->pos - dict->start; + + if (DEC_IS_MULTI(dict->mode)) { + if (dict->pos == dict->end) + dict->pos = 0; + + memcpy(b->out + b->out_pos, dict->buf + dict->start, + copy_size); + } + + dict->start = dict->pos; + b->out_pos += copy_size; + return copy_size; +} + +/***************** + * Range decoder * + *****************/ + +/* Reset the range decoder. */ +static void rc_reset(struct rc_dec *rc) +{ + rc->range = (uint32_t)-1; + rc->code = 0; + rc->init_bytes_left = RC_INIT_BYTES; +} + +/* + * Read the first five initial bytes into rc->code if they haven't been + * read already. (Yes, the first byte gets completely ignored.) + */ +static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b) +{ + while (rc->init_bytes_left > 0) { + if (b->in_pos == b->in_size) + return false; + + rc->code = (rc->code << 8) + b->in[b->in_pos++]; + --rc->init_bytes_left; + } + + return true; +} + +/* Return true if there may not be enough input for the next decoding loop. */ +static inline bool rc_limit_exceeded(const struct rc_dec *rc) +{ + return rc->in_pos > rc->in_limit; +} + +/* + * Return true if it is possible (from point of view of range decoder) that + * we have reached the end of the LZMA chunk. + */ +static inline bool rc_is_finished(const struct rc_dec *rc) +{ + return rc->code == 0; +} + +/* Read the next input byte if needed. */ +static __always_inline void rc_normalize(struct rc_dec *rc) +{ + if (rc->range < RC_TOP_VALUE) { + rc->range <<= RC_SHIFT_BITS; + rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++]; + } +} + +/* + * Decode one bit. In some versions, this function has been splitted in three + * functions so that the compiler is supposed to be able to more easily avoid + * an extra branch. In this particular version of the LZMA decoder, this + * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 + * on x86). Using a non-splitted version results in nicer looking code too. + * + * NOTE: This must return an int. Do not make it return a bool or the speed + * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, + * and it generates 10-20 % faster code than GCC 3.x from this file anyway.) + */ +static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob) +{ + uint32_t bound; + int bit; + + rc_normalize(rc); + bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob; + if (rc->code < bound) { + rc->range = bound; + *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS; + bit = 0; + } else { + rc->range -= bound; + rc->code -= bound; + *prob -= *prob >> RC_MOVE_BITS; + bit = 1; + } + + return bit; +} + +/* Decode a bittree starting from the most significant bit. */ +static __always_inline uint32_t rc_bittree(struct rc_dec *rc, + uint16_t *probs, uint32_t limit) +{ + uint32_t symbol = 1; + + do { + if (rc_bit(rc, &probs[symbol])) + symbol = (symbol << 1) + 1; + else + symbol <<= 1; + } while (symbol < limit); + + return symbol; +} + +/* Decode a bittree starting from the least significant bit. */ +static __always_inline void rc_bittree_reverse(struct rc_dec *rc, + uint16_t *probs, + uint32_t *dest, uint32_t limit) +{ + uint32_t symbol = 1; + uint32_t i = 0; + + do { + if (rc_bit(rc, &probs[symbol])) { + symbol = (symbol << 1) + 1; + *dest += 1 << i; + } else { + symbol <<= 1; + } + } while (++i < limit); +} + +/* Decode direct bits (fixed fifty-fifty probability) */ +static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit) +{ + uint32_t mask; + + do { + rc_normalize(rc); + rc->range >>= 1; + rc->code -= rc->range; + mask = (uint32_t)0 - (rc->code >> 31); + rc->code += rc->range & mask; + *dest = (*dest << 1) + (mask + 1); + } while (--limit > 0); +} + +/******** + * LZMA * + ********/ + +/* Get pointer to literal coder probability array. */ +static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s) +{ + uint32_t prev_byte = dict_get(&s->dict, 0); + uint32_t low = prev_byte >> (8 - s->lzma.lc); + uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc; + return s->lzma.literal[low + high]; +} + +/* Decode a literal (one 8-bit byte) */ +static void lzma_literal(struct xz_dec_lzma2 *s) +{ + uint16_t *probs; + uint32_t symbol; + uint32_t match_byte; + uint32_t match_bit; + uint32_t offset; + uint32_t i; + + probs = lzma_literal_probs(s); + + if (lzma_state_is_literal(s->lzma.state)) { + symbol = rc_bittree(&s->rc, probs, 0x100); + } else { + symbol = 1; + match_byte = dict_get(&s->dict, s->lzma.rep0) << 1; + offset = 0x100; + + do { + match_bit = match_byte & offset; + match_byte <<= 1; + i = offset + match_bit + symbol; + + if (rc_bit(&s->rc, &probs[i])) { + symbol = (symbol << 1) + 1; + offset &= match_bit; + } else { + symbol <<= 1; + offset &= ~match_bit; + } + } while (symbol < 0x100); + } + + dict_put(&s->dict, (uint8_t)symbol); + lzma_state_literal(&s->lzma.state); +} + +/* Decode the length of the match into s->lzma.len. */ +static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l, + uint32_t pos_state) +{ + uint16_t *probs; + uint32_t limit; + + if (!rc_bit(&s->rc, &l->choice)) { + probs = l->low[pos_state]; + limit = LEN_LOW_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN; + } else { + if (!rc_bit(&s->rc, &l->choice2)) { + probs = l->mid[pos_state]; + limit = LEN_MID_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; + } else { + probs = l->high; + limit = LEN_HIGH_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS + + LEN_MID_SYMBOLS; + } + } + + s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit; +} + +/* Decode a match. The distance will be stored in s->lzma.rep0. */ +static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state) +{ + uint16_t *probs; + uint32_t dist_slot; + uint32_t limit; + + lzma_state_match(&s->lzma.state); + + s->lzma.rep3 = s->lzma.rep2; + s->lzma.rep2 = s->lzma.rep1; + s->lzma.rep1 = s->lzma.rep0; + + lzma_len(s, &s->lzma.match_len_dec, pos_state); + + probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)]; + dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS; + + if (dist_slot < DIST_MODEL_START) { + s->lzma.rep0 = dist_slot; + } else { + limit = (dist_slot >> 1) - 1; + s->lzma.rep0 = 2 + (dist_slot & 1); + + if (dist_slot < DIST_MODEL_END) { + s->lzma.rep0 <<= limit; + probs = s->lzma.dist_special + s->lzma.rep0 + - dist_slot - 1; + rc_bittree_reverse(&s->rc, probs, + &s->lzma.rep0, limit); + } else { + rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS); + s->lzma.rep0 <<= ALIGN_BITS; + rc_bittree_reverse(&s->rc, s->lzma.dist_align, + &s->lzma.rep0, ALIGN_BITS); + } + } +} + +/* + * Decode a repeated match. The distance is one of the four most recently + * seen matches. The distance will be stored in s->lzma.rep0. + */ +static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state) +{ + uint32_t tmp; + + if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) { + if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[ + s->lzma.state][pos_state])) { + lzma_state_short_rep(&s->lzma.state); + s->lzma.len = 1; + return; + } + } else { + if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) { + tmp = s->lzma.rep1; + } else { + if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) { + tmp = s->lzma.rep2; + } else { + tmp = s->lzma.rep3; + s->lzma.rep3 = s->lzma.rep2; + } + + s->lzma.rep2 = s->lzma.rep1; + } + + s->lzma.rep1 = s->lzma.rep0; + s->lzma.rep0 = tmp; + } + + lzma_state_long_rep(&s->lzma.state); + lzma_len(s, &s->lzma.rep_len_dec, pos_state); +} + +/* LZMA decoder core */ +static bool lzma_main(struct xz_dec_lzma2 *s) +{ + uint32_t pos_state; + + /* + * If the dictionary was reached during the previous call, try to + * finish the possibly pending repeat in the dictionary. + */ + if (dict_has_space(&s->dict) && s->lzma.len > 0) + dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0); + + /* + * Decode more LZMA symbols. One iteration may consume up to + * LZMA_IN_REQUIRED - 1 bytes. + */ + while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) { + pos_state = s->dict.pos & s->lzma.pos_mask; + + if (!rc_bit(&s->rc, &s->lzma.is_match[ + s->lzma.state][pos_state])) { + lzma_literal(s); + } else { + if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state])) + lzma_rep_match(s, pos_state); + else + lzma_match(s, pos_state); + + if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0)) + return false; + } + } + + /* + * Having the range decoder always normalized when we are outside + * this function makes it easier to correctly handle end of the chunk. + */ + rc_normalize(&s->rc); + + return true; +} + +/* + * Reset the LZMA decoder and range decoder state. Dictionary is nore reset + * here, because LZMA state may be reset without resetting the dictionary. + */ +static void lzma_reset(struct xz_dec_lzma2 *s) +{ + uint16_t *probs; + size_t i; + + s->lzma.state = STATE_LIT_LIT; + s->lzma.rep0 = 0; + s->lzma.rep1 = 0; + s->lzma.rep2 = 0; + s->lzma.rep3 = 0; + + /* + * All probabilities are initialized to the same value. This hack + * makes the code smaller by avoiding a separate loop for each + * probability array. + * + * This could be optimized so that only that part of literal + * probabilities that are actually required. In the common case + * we would write 12 KiB less. + */ + probs = s->lzma.is_match[0]; + for (i = 0; i < PROBS_TOTAL; ++i) + probs[i] = RC_BIT_MODEL_TOTAL / 2; + + rc_reset(&s->rc); +} + +/* + * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks + * from the decoded lp and pb values. On success, the LZMA decoder state is + * reset and true is returned. + */ +static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props) +{ + if (props > (4 * 5 + 4) * 9 + 8) + return false; + + s->lzma.pos_mask = 0; + while (props >= 9 * 5) { + props -= 9 * 5; + ++s->lzma.pos_mask; + } + + s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1; + + s->lzma.literal_pos_mask = 0; + while (props >= 9) { + props -= 9; + ++s->lzma.literal_pos_mask; + } + + s->lzma.lc = props; + + if (s->lzma.lc + s->lzma.literal_pos_mask > 4) + return false; + + s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1; + + lzma_reset(s); + + return true; +} + +/********* + * LZMA2 * + *********/ + +/* + * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't + * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This + * wrapper function takes care of making the LZMA decoder's assumption safe. + * + * As long as there is plenty of input left to be decoded in the current LZMA + * chunk, we decode directly from the caller-supplied input buffer until + * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into + * s->temp.buf, which (hopefully) gets filled on the next call to this + * function. We decode a few bytes from the temporary buffer so that we can + * continue decoding from the caller-supplied input buffer again. + */ +static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) +{ + size_t in_avail; + uint32_t tmp; + + in_avail = b->in_size - b->in_pos; + if (s->temp.size > 0 || s->lzma2.compressed == 0) { + tmp = 2 * LZMA_IN_REQUIRED - s->temp.size; + if (tmp > s->lzma2.compressed - s->temp.size) + tmp = s->lzma2.compressed - s->temp.size; + if (tmp > in_avail) + tmp = in_avail; + + memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp); + + if (s->temp.size + tmp == s->lzma2.compressed) { + memzero(s->temp.buf + s->temp.size + tmp, + sizeof(s->temp.buf) + - s->temp.size - tmp); + s->rc.in_limit = s->temp.size + tmp; + } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) { + s->temp.size += tmp; + b->in_pos += tmp; + return true; + } else { + s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED; + } + + s->rc.in = s->temp.buf; + s->rc.in_pos = 0; + + if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp) + return false; + + s->lzma2.compressed -= s->rc.in_pos; + + if (s->rc.in_pos < s->temp.size) { + s->temp.size -= s->rc.in_pos; + memmove(s->temp.buf, s->temp.buf + s->rc.in_pos, + s->temp.size); + return true; + } + + b->in_pos += s->rc.in_pos - s->temp.size; + s->temp.size = 0; + } + + in_avail = b->in_size - b->in_pos; + if (in_avail >= LZMA_IN_REQUIRED) { + s->rc.in = b->in; + s->rc.in_pos = b->in_pos; + + if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED) + s->rc.in_limit = b->in_pos + s->lzma2.compressed; + else + s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED; + + if (!lzma_main(s)) + return false; + + in_avail = s->rc.in_pos - b->in_pos; + if (in_avail > s->lzma2.compressed) + return false; + + s->lzma2.compressed -= in_avail; + b->in_pos = s->rc.in_pos; + } + + in_avail = b->in_size - b->in_pos; + if (in_avail < LZMA_IN_REQUIRED) { + if (in_avail > s->lzma2.compressed) + in_avail = s->lzma2.compressed; + + memcpy(s->temp.buf, b->in + b->in_pos, in_avail); + s->temp.size = in_avail; + b->in_pos += in_avail; + } + + return true; +} + +/* + * Take care of the LZMA2 control layer, and forward the job of actual LZMA + * decoding or copying of uncompressed chunks to other functions. + */ +XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, + struct xz_buf *b) +{ + uint32_t tmp; + + while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) { + switch (s->lzma2.sequence) { + case SEQ_CONTROL: + /* + * LZMA2 control byte + * + * Exact values: + * 0x00 End marker + * 0x01 Dictionary reset followed by + * an uncompressed chunk + * 0x02 Uncompressed chunk (no dictionary reset) + * + * Highest three bits (s->control & 0xE0): + * 0xE0 Dictionary reset, new properties and state + * reset, followed by LZMA compressed chunk + * 0xC0 New properties and state reset, followed + * by LZMA compressed chunk (no dictionary + * reset) + * 0xA0 State reset using old properties, + * followed by LZMA compressed chunk (no + * dictionary reset) + * 0x80 LZMA chunk (no dictionary or state reset) + * + * For LZMA compressed chunks, the lowest five bits + * (s->control & 1F) are the highest bits of the + * uncompressed size (bits 16-20). + * + * A new LZMA2 stream must begin with a dictionary + * reset. The first LZMA chunk must set new + * properties and reset the LZMA state. + * + * Values that don't match anything described above + * are invalid and we return XZ_DATA_ERROR. + */ + tmp = b->in[b->in_pos++]; + + if (tmp == 0x00) + return XZ_STREAM_END; + + if (tmp >= 0xE0 || tmp == 0x01) { + s->lzma2.need_props = true; + s->lzma2.need_dict_reset = false; + dict_reset(&s->dict, b); + } else if (s->lzma2.need_dict_reset) { + return XZ_DATA_ERROR; + } + + if (tmp >= 0x80) { + s->lzma2.uncompressed = (tmp & 0x1F) << 16; + s->lzma2.sequence = SEQ_UNCOMPRESSED_1; + + if (tmp >= 0xC0) { + /* + * When there are new properties, + * state reset is done at + * SEQ_PROPERTIES. + */ + s->lzma2.need_props = false; + s->lzma2.next_sequence + = SEQ_PROPERTIES; + + } else if (s->lzma2.need_props) { + return XZ_DATA_ERROR; + + } else { + s->lzma2.next_sequence + = SEQ_LZMA_PREPARE; + if (tmp >= 0xA0) + lzma_reset(s); + } + } else { + if (tmp > 0x02) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_COMPRESSED_0; + s->lzma2.next_sequence = SEQ_COPY; + } + + break; + + case SEQ_UNCOMPRESSED_1: + s->lzma2.uncompressed + += (uint32_t)b->in[b->in_pos++] << 8; + s->lzma2.sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + s->lzma2.uncompressed + += (uint32_t)b->in[b->in_pos++] + 1; + s->lzma2.sequence = SEQ_COMPRESSED_0; + break; + + case SEQ_COMPRESSED_0: + s->lzma2.compressed + = (uint32_t)b->in[b->in_pos++] << 8; + s->lzma2.sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + s->lzma2.compressed + += (uint32_t)b->in[b->in_pos++] + 1; + s->lzma2.sequence = s->lzma2.next_sequence; + break; + + case SEQ_PROPERTIES: + if (!lzma_props(s, b->in[b->in_pos++])) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_LZMA_PREPARE; + + case SEQ_LZMA_PREPARE: + if (s->lzma2.compressed < RC_INIT_BYTES) + return XZ_DATA_ERROR; + + if (!rc_read_init(&s->rc, b)) + return XZ_OK; + + s->lzma2.compressed -= RC_INIT_BYTES; + s->lzma2.sequence = SEQ_LZMA_RUN; + + case SEQ_LZMA_RUN: + /* + * Set dictionary limit to indicate how much we want + * to be encoded at maximum. Decode new data into the + * dictionary. Flush the new data from dictionary to + * b->out. Check if we finished decoding this chunk. + * In case the dictionary got full but we didn't fill + * the output buffer yet, we may run this loop + * multiple times without changing s->lzma2.sequence. + */ + dict_limit(&s->dict, min_t(size_t, + b->out_size - b->out_pos, + s->lzma2.uncompressed)); + if (!lzma2_lzma(s, b)) + return XZ_DATA_ERROR; + + s->lzma2.uncompressed -= dict_flush(&s->dict, b); + + if (s->lzma2.uncompressed == 0) { + if (s->lzma2.compressed > 0 || s->lzma.len > 0 + || !rc_is_finished(&s->rc)) + return XZ_DATA_ERROR; + + rc_reset(&s->rc); + s->lzma2.sequence = SEQ_CONTROL; + + } else if (b->out_pos == b->out_size + || (b->in_pos == b->in_size + && s->temp.size + < s->lzma2.compressed)) { + return XZ_OK; + } + + break; + + case SEQ_COPY: + dict_uncompressed(&s->dict, b, &s->lzma2.compressed); + if (s->lzma2.compressed > 0) + return XZ_OK; + + s->lzma2.sequence = SEQ_CONTROL; + break; + } + } + + return XZ_OK; +} + +XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, + uint32_t dict_max) +{ + struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->dict.mode = mode; + s->dict.size_max = dict_max; + + if (DEC_IS_PREALLOC(mode)) { + s->dict.buf = vmalloc(dict_max); + if (s->dict.buf == NULL) { + kfree(s); + return NULL; + } + } else if (DEC_IS_DYNALLOC(mode)) { + s->dict.buf = NULL; + s->dict.allocated = 0; + } + + return s; +} + +XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) +{ + /* This limits dictionary size to 3 GiB to keep parsing simpler. */ + if (props > 39) + return XZ_OPTIONS_ERROR; + + s->dict.size = 2 + (props & 1); + s->dict.size <<= (props >> 1) + 11; + + if (DEC_IS_MULTI(s->dict.mode)) { + if (s->dict.size > s->dict.size_max) + return XZ_MEMLIMIT_ERROR; + + s->dict.end = s->dict.size; + + if (DEC_IS_DYNALLOC(s->dict.mode)) { + if (s->dict.allocated < s->dict.size) { + vfree(s->dict.buf); + s->dict.buf = vmalloc(s->dict.size); + if (s->dict.buf == NULL) { + s->dict.allocated = 0; + return XZ_MEM_ERROR; + } + } + } + } + + s->lzma.len = 0; + + s->lzma2.sequence = SEQ_CONTROL; + s->lzma2.need_dict_reset = true; + + s->temp.size = 0; + + return XZ_OK; +} + +XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) +{ + if (DEC_IS_MULTI(s->dict.mode)) + vfree(s->dict.buf); + + kfree(s); +} diff --git a/depends/xz-embedded/src/xz_dec_stream.c b/depends/xz-embedded/src/xz_dec_stream.c new file mode 100644 index 00000000..d6525506 --- /dev/null +++ b/depends/xz-embedded/src/xz_dec_stream.c @@ -0,0 +1,847 @@ +/* + * .xz Stream decoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" +#include "xz_stream.h" + +#ifdef XZ_USE_CRC64 +# define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64) +#else +# define IS_CRC64(check_type) false +#endif + +/* Hash used to validate the Index field */ +struct xz_dec_hash { + vli_type unpadded; + vli_type uncompressed; + uint32_t crc32; +}; + +struct xz_dec { + /* Position in dec_main() */ + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_START, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_UNCOMPRESS, + SEQ_BLOCK_PADDING, + SEQ_BLOCK_CHECK, + SEQ_INDEX, + SEQ_INDEX_PADDING, + SEQ_INDEX_CRC32, + SEQ_STREAM_FOOTER + } sequence; + + /* Position in variable-length integers and Check fields */ + uint32_t pos; + + /* Variable-length integer decoded by dec_vli() */ + vli_type vli; + + /* Saved in_pos and out_pos */ + size_t in_start; + size_t out_start; + +#ifdef XZ_USE_CRC64 + /* CRC32 or CRC64 value in Block or CRC32 value in Index */ + uint64_t crc; +#else + /* CRC32 value in Block or Index */ + uint32_t crc; +#endif + + /* Type of the integrity check calculated from uncompressed data */ + enum xz_check check_type; + + /* Operation mode */ + enum xz_mode mode; + + /* + * True if the next call to xz_dec_run() is allowed to return + * XZ_BUF_ERROR. + */ + bool allow_buf_error; + + /* Information stored in Block Header */ + struct { + /* + * Value stored in the Compressed Size field, or + * VLI_UNKNOWN if Compressed Size is not present. + */ + vli_type compressed; + + /* + * Value stored in the Uncompressed Size field, or + * VLI_UNKNOWN if Uncompressed Size is not present. + */ + vli_type uncompressed; + + /* Size of the Block Header field */ + uint32_t size; + } block_header; + + /* Information collected when decoding Blocks */ + struct { + /* Observed compressed size of the current Block */ + vli_type compressed; + + /* Observed uncompressed size of the current Block */ + vli_type uncompressed; + + /* Number of Blocks decoded so far */ + vli_type count; + + /* + * Hash calculated from the Block sizes. This is used to + * validate the Index field. + */ + struct xz_dec_hash hash; + } block; + + /* Variables needed when verifying the Index field */ + struct { + /* Position in dec_index() */ + enum { + SEQ_INDEX_COUNT, + SEQ_INDEX_UNPADDED, + SEQ_INDEX_UNCOMPRESSED + } sequence; + + /* Size of the Index in bytes */ + vli_type size; + + /* Number of Records (matches block.count in valid files) */ + vli_type count; + + /* + * Hash calculated from the Records (matches block.hash in + * valid files). + */ + struct xz_dec_hash hash; + } index; + + /* + * Temporary buffer needed to hold Stream Header, Block Header, + * and Stream Footer. The Block Header is the biggest (1 KiB) + * so we reserve space according to that. buf[] has to be aligned + * to a multiple of four bytes; the size_t variables before it + * should guarantee this. + */ + struct { + size_t pos; + size_t size; + uint8_t buf[1024]; + } temp; + + struct xz_dec_lzma2 *lzma2; + +#ifdef XZ_DEC_BCJ + struct xz_dec_bcj *bcj; + bool bcj_active; +#endif +}; + +#ifdef XZ_DEC_ANY_CHECK +/* Sizes of the Check field with different Check IDs */ +static const uint8_t check_sizes[16] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 +}; +#endif + +/* + * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller + * must have set s->temp.pos to indicate how much data we are supposed + * to copy into s->temp.buf. Return true once s->temp.pos has reached + * s->temp.size. + */ +static bool fill_temp(struct xz_dec *s, struct xz_buf *b) +{ + size_t copy_size = min_t(size_t, + b->in_size - b->in_pos, s->temp.size - s->temp.pos); + + memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); + b->in_pos += copy_size; + s->temp.pos += copy_size; + + if (s->temp.pos == s->temp.size) { + s->temp.pos = 0; + return true; + } + + return false; +} + +/* Decode a variable-length integer (little-endian base-128 encoding) */ +static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + uint8_t byte; + + if (s->pos == 0) + s->vli = 0; + + while (*in_pos < in_size) { + byte = in[*in_pos]; + ++*in_pos; + + s->vli |= (vli_type)(byte & 0x7F) << s->pos; + + if ((byte & 0x80) == 0) { + /* Don't allow non-minimal encodings. */ + if (byte == 0 && s->pos != 0) + return XZ_DATA_ERROR; + + s->pos = 0; + return XZ_STREAM_END; + } + + s->pos += 7; + if (s->pos == 7 * VLI_BYTES_MAX) + return XZ_DATA_ERROR; + } + + return XZ_OK; +} + +/* + * Decode the Compressed Data field from a Block. Update and validate + * the observed compressed and uncompressed sizes of the Block so that + * they don't exceed the values possibly stored in the Block Header + * (validation assumes that no integer overflow occurs, since vli_type + * is normally uint64_t). Update the CRC32 or CRC64 value if presence of + * the CRC32 or CRC64 field was indicated in Stream Header. + * + * Once the decoding is finished, validate that the observed sizes match + * the sizes possibly stored in the Block Header. Update the hash and + * Block count, which are later used to validate the Index field. + */ +static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + s->in_start = b->in_pos; + s->out_start = b->out_pos; + +#ifdef XZ_DEC_BCJ + if (s->bcj_active) + ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); + else +#endif + ret = xz_dec_lzma2_run(s->lzma2, b); + + s->block.compressed += b->in_pos - s->in_start; + s->block.uncompressed += b->out_pos - s->out_start; + + /* + * There is no need to separately check for VLI_UNKNOWN, since + * the observed sizes are always smaller than VLI_UNKNOWN. + */ + if (s->block.compressed > s->block_header.compressed + || s->block.uncompressed + > s->block_header.uncompressed) + return XZ_DATA_ERROR; + + if (s->check_type == XZ_CHECK_CRC32) + s->crc = xz_crc32(b->out + s->out_start, + b->out_pos - s->out_start, s->crc); +#ifdef XZ_USE_CRC64 + else if (s->check_type == XZ_CHECK_CRC64) + s->crc = xz_crc64(b->out + s->out_start, + b->out_pos - s->out_start, s->crc); +#endif + + if (ret == XZ_STREAM_END) { + if (s->block_header.compressed != VLI_UNKNOWN + && s->block_header.compressed + != s->block.compressed) + return XZ_DATA_ERROR; + + if (s->block_header.uncompressed != VLI_UNKNOWN + && s->block_header.uncompressed + != s->block.uncompressed) + return XZ_DATA_ERROR; + + s->block.hash.unpadded += s->block_header.size + + s->block.compressed; + +#ifdef XZ_DEC_ANY_CHECK + s->block.hash.unpadded += check_sizes[s->check_type]; +#else + if (s->check_type == XZ_CHECK_CRC32) + s->block.hash.unpadded += 4; + else if (IS_CRC64(s->check_type)) + s->block.hash.unpadded += 8; +#endif + + s->block.hash.uncompressed += s->block.uncompressed; + s->block.hash.crc32 = xz_crc32( + (const uint8_t *)&s->block.hash, + sizeof(s->block.hash), s->block.hash.crc32); + + ++s->block.count; + } + + return ret; +} + +/* Update the Index size and the CRC32 value. */ +static void index_update(struct xz_dec *s, const struct xz_buf *b) +{ + size_t in_used = b->in_pos - s->in_start; + s->index.size += in_used; + s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc); +} + +/* + * Decode the Number of Records, Unpadded Size, and Uncompressed Size + * fields from the Index field. That is, Index Padding and CRC32 are not + * decoded by this function. + * + * This can return XZ_OK (more input needed), XZ_STREAM_END (everything + * successfully decoded), or XZ_DATA_ERROR (input is corrupt). + */ +static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + do { + ret = dec_vli(s, b->in, &b->in_pos, b->in_size); + if (ret != XZ_STREAM_END) { + index_update(s, b); + return ret; + } + + switch (s->index.sequence) { + case SEQ_INDEX_COUNT: + s->index.count = s->vli; + + /* + * Validate that the Number of Records field + * indicates the same number of Records as + * there were Blocks in the Stream. + */ + if (s->index.count != s->block.count) + return XZ_DATA_ERROR; + + s->index.sequence = SEQ_INDEX_UNPADDED; + break; + + case SEQ_INDEX_UNPADDED: + s->index.hash.unpadded += s->vli; + s->index.sequence = SEQ_INDEX_UNCOMPRESSED; + break; + + case SEQ_INDEX_UNCOMPRESSED: + s->index.hash.uncompressed += s->vli; + s->index.hash.crc32 = xz_crc32( + (const uint8_t *)&s->index.hash, + sizeof(s->index.hash), + s->index.hash.crc32); + --s->index.count; + s->index.sequence = SEQ_INDEX_UNPADDED; + break; + } + } while (s->index.count > 0); + + return XZ_STREAM_END; +} + +/* + * Validate that the next four or eight input bytes match the value + * of s->crc. s->pos must be zero when starting to validate the first byte. + * The "bits" argument allows using the same code for both CRC32 and CRC64. + */ +static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b, + uint32_t bits) +{ + do { + if (b->in_pos == b->in_size) + return XZ_OK; + + if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++]) + return XZ_DATA_ERROR; + + s->pos += 8; + + } while (s->pos < bits); + + s->crc = 0; + s->pos = 0; + + return XZ_STREAM_END; +} + +#ifdef XZ_DEC_ANY_CHECK +/* + * Skip over the Check field when the Check ID is not supported. + * Returns true once the whole Check field has been skipped over. + */ +static bool check_skip(struct xz_dec *s, struct xz_buf *b) +{ + while (s->pos < check_sizes[s->check_type]) { + if (b->in_pos == b->in_size) + return false; + + ++b->in_pos; + ++s->pos; + } + + s->pos = 0; + + return true; +} +#endif + +/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ +static enum xz_ret dec_stream_header(struct xz_dec *s) +{ + if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) + return XZ_FORMAT_ERROR; + + if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) + != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) + return XZ_DATA_ERROR; + + if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) + return XZ_OPTIONS_ERROR; + + /* + * Of integrity checks, we support none (Check ID = 0), + * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4). + * However, if XZ_DEC_ANY_CHECK is defined, we will accept other + * check types too, but then the check won't be verified and + * a warning (XZ_UNSUPPORTED_CHECK) will be given. + */ + s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; + +#ifdef XZ_DEC_ANY_CHECK + if (s->check_type > XZ_CHECK_MAX) + return XZ_OPTIONS_ERROR; + + if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) + return XZ_UNSUPPORTED_CHECK; +#else + if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) + return XZ_OPTIONS_ERROR; +#endif + + return XZ_OK; +} + +/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ +static enum xz_ret dec_stream_footer(struct xz_dec *s) +{ + if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) + return XZ_DATA_ERROR; + + if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) + return XZ_DATA_ERROR; + + /* + * Validate Backward Size. Note that we never added the size of the + * Index CRC32 field to s->index.size, thus we use s->index.size / 4 + * instead of s->index.size / 4 - 1. + */ + if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) + return XZ_DATA_ERROR; + + if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) + return XZ_DATA_ERROR; + + /* + * Use XZ_STREAM_END instead of XZ_OK to be more convenient + * for the caller. + */ + return XZ_STREAM_END; +} + +/* Decode the Block Header and initialize the filter chain. */ +static enum xz_ret dec_block_header(struct xz_dec *s) +{ + enum xz_ret ret; + + /* + * Validate the CRC32. We know that the temp buffer is at least + * eight bytes so this is safe. + */ + s->temp.size -= 4; + if (xz_crc32(s->temp.buf, s->temp.size, 0) + != get_le32(s->temp.buf + s->temp.size)) + return XZ_DATA_ERROR; + + s->temp.pos = 2; + + /* + * Catch unsupported Block Flags. We support only one or two filters + * in the chain, so we catch that with the same test. + */ +#ifdef XZ_DEC_BCJ + if (s->temp.buf[1] & 0x3E) +#else + if (s->temp.buf[1] & 0x3F) +#endif + return XZ_OPTIONS_ERROR; + + /* Compressed Size */ + if (s->temp.buf[1] & 0x40) { + if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) + != XZ_STREAM_END) + return XZ_DATA_ERROR; + + s->block_header.compressed = s->vli; + } else { + s->block_header.compressed = VLI_UNKNOWN; + } + + /* Uncompressed Size */ + if (s->temp.buf[1] & 0x80) { + if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) + != XZ_STREAM_END) + return XZ_DATA_ERROR; + + s->block_header.uncompressed = s->vli; + } else { + s->block_header.uncompressed = VLI_UNKNOWN; + } + +#ifdef XZ_DEC_BCJ + /* If there are two filters, the first one must be a BCJ filter. */ + s->bcj_active = s->temp.buf[1] & 0x01; + if (s->bcj_active) { + if (s->temp.size - s->temp.pos < 2) + return XZ_OPTIONS_ERROR; + + ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); + if (ret != XZ_OK) + return ret; + + /* + * We don't support custom start offset, + * so Size of Properties must be zero. + */ + if (s->temp.buf[s->temp.pos++] != 0x00) + return XZ_OPTIONS_ERROR; + } +#endif + + /* Valid Filter Flags always take at least two bytes. */ + if (s->temp.size - s->temp.pos < 2) + return XZ_DATA_ERROR; + + /* Filter ID = LZMA2 */ + if (s->temp.buf[s->temp.pos++] != 0x21) + return XZ_OPTIONS_ERROR; + + /* Size of Properties = 1-byte Filter Properties */ + if (s->temp.buf[s->temp.pos++] != 0x01) + return XZ_OPTIONS_ERROR; + + /* Filter Properties contains LZMA2 dictionary size. */ + if (s->temp.size - s->temp.pos < 1) + return XZ_DATA_ERROR; + + ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); + if (ret != XZ_OK) + return ret; + + /* The rest must be Header Padding. */ + while (s->temp.pos < s->temp.size) + if (s->temp.buf[s->temp.pos++] != 0x00) + return XZ_OPTIONS_ERROR; + + s->temp.pos = 0; + s->block.compressed = 0; + s->block.uncompressed = 0; + + return XZ_OK; +} + +static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + /* + * Store the start position for the case when we are in the middle + * of the Index field. + */ + s->in_start = b->in_pos; + + while (true) { + switch (s->sequence) { + case SEQ_STREAM_HEADER: + /* + * Stream Header is copied to s->temp, and then + * decoded from there. This way if the caller + * gives us only little input at a time, we can + * still keep the Stream Header decoding code + * simple. Similar approach is used in many places + * in this file. + */ + if (!fill_temp(s, b)) + return XZ_OK; + + /* + * If dec_stream_header() returns + * XZ_UNSUPPORTED_CHECK, it is still possible + * to continue decoding if working in multi-call + * mode. Thus, update s->sequence before calling + * dec_stream_header(). + */ + s->sequence = SEQ_BLOCK_START; + + ret = dec_stream_header(s); + if (ret != XZ_OK) + return ret; + + case SEQ_BLOCK_START: + /* We need one byte of input to continue. */ + if (b->in_pos == b->in_size) + return XZ_OK; + + /* See if this is the beginning of the Index field. */ + if (b->in[b->in_pos] == 0) { + s->in_start = b->in_pos++; + s->sequence = SEQ_INDEX; + break; + } + + /* + * Calculate the size of the Block Header and + * prepare to decode it. + */ + s->block_header.size + = ((uint32_t)b->in[b->in_pos] + 1) * 4; + + s->temp.size = s->block_header.size; + s->temp.pos = 0; + s->sequence = SEQ_BLOCK_HEADER; + + case SEQ_BLOCK_HEADER: + if (!fill_temp(s, b)) + return XZ_OK; + + ret = dec_block_header(s); + if (ret != XZ_OK) + return ret; + + s->sequence = SEQ_BLOCK_UNCOMPRESS; + + case SEQ_BLOCK_UNCOMPRESS: + ret = dec_block(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->sequence = SEQ_BLOCK_PADDING; + + case SEQ_BLOCK_PADDING: + /* + * Size of Compressed Data + Block Padding + * must be a multiple of four. We don't need + * s->block.compressed for anything else + * anymore, so we use it here to test the size + * of the Block Padding field. + */ + while (s->block.compressed & 3) { + if (b->in_pos == b->in_size) + return XZ_OK; + + if (b->in[b->in_pos++] != 0) + return XZ_DATA_ERROR; + + ++s->block.compressed; + } + + s->sequence = SEQ_BLOCK_CHECK; + + case SEQ_BLOCK_CHECK: + if (s->check_type == XZ_CHECK_CRC32) { + ret = crc_validate(s, b, 32); + if (ret != XZ_STREAM_END) + return ret; + } + else if (IS_CRC64(s->check_type)) { + ret = crc_validate(s, b, 64); + if (ret != XZ_STREAM_END) + return ret; + } +#ifdef XZ_DEC_ANY_CHECK + else if (!check_skip(s, b)) { + return XZ_OK; + } +#endif + + s->sequence = SEQ_BLOCK_START; + break; + + case SEQ_INDEX: + ret = dec_index(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->sequence = SEQ_INDEX_PADDING; + + case SEQ_INDEX_PADDING: + while ((s->index.size + (b->in_pos - s->in_start)) + & 3) { + if (b->in_pos == b->in_size) { + index_update(s, b); + return XZ_OK; + } + + if (b->in[b->in_pos++] != 0) + return XZ_DATA_ERROR; + } + + /* Finish the CRC32 value and Index size. */ + index_update(s, b); + + /* Compare the hashes to validate the Index field. */ + if (!memeq(&s->block.hash, &s->index.hash, + sizeof(s->block.hash))) + return XZ_DATA_ERROR; + + s->sequence = SEQ_INDEX_CRC32; + + case SEQ_INDEX_CRC32: + ret = crc_validate(s, b, 32); + if (ret != XZ_STREAM_END) + return ret; + + s->temp.size = STREAM_HEADER_SIZE; + s->sequence = SEQ_STREAM_FOOTER; + + case SEQ_STREAM_FOOTER: + if (!fill_temp(s, b)) + return XZ_OK; + + return dec_stream_footer(s); + } + } + + /* Never reached */ +} + +/* + * xz_dec_run() is a wrapper for dec_main() to handle some special cases in + * multi-call and single-call decoding. + * + * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we + * are not going to make any progress anymore. This is to prevent the caller + * from calling us infinitely when the input file is truncated or otherwise + * corrupt. Since zlib-style API allows that the caller fills the input buffer + * only when the decoder doesn't produce any new output, we have to be careful + * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only + * after the second consecutive call to xz_dec_run() that makes no progress. + * + * In single-call mode, if we couldn't decode everything and no error + * occurred, either the input is truncated or the output buffer is too small. + * Since we know that the last input byte never produces any output, we know + * that if all the input was consumed and decoding wasn't finished, the file + * must be corrupt. Otherwise the output buffer has to be too small or the + * file is corrupt in a way that decoding it produces too big output. + * + * If single-call decoding fails, we reset b->in_pos and b->out_pos back to + * their original values. This is because with some filter chains there won't + * be any valid uncompressed data in the output buffer unless the decoding + * actually succeeds (that's the price to pay of using the output buffer as + * the workspace). + */ +XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) +{ + size_t in_start; + size_t out_start; + enum xz_ret ret; + + if (DEC_IS_SINGLE(s->mode)) + xz_dec_reset(s); + + in_start = b->in_pos; + out_start = b->out_pos; + ret = dec_main(s, b); + + if (DEC_IS_SINGLE(s->mode)) { + if (ret == XZ_OK) + ret = b->in_pos == b->in_size + ? XZ_DATA_ERROR : XZ_BUF_ERROR; + + if (ret != XZ_STREAM_END) { + b->in_pos = in_start; + b->out_pos = out_start; + } + + } else if (ret == XZ_OK && in_start == b->in_pos + && out_start == b->out_pos) { + if (s->allow_buf_error) + ret = XZ_BUF_ERROR; + + s->allow_buf_error = true; + } else { + s->allow_buf_error = false; + } + + return ret; +} + +XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) +{ + struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->mode = mode; + +#ifdef XZ_DEC_BCJ + s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); + if (s->bcj == NULL) + goto error_bcj; +#endif + + s->lzma2 = xz_dec_lzma2_create(mode, dict_max); + if (s->lzma2 == NULL) + goto error_lzma2; + + xz_dec_reset(s); + return s; + +error_lzma2: +#ifdef XZ_DEC_BCJ + xz_dec_bcj_end(s->bcj); +error_bcj: +#endif + kfree(s); + return NULL; +} + +XZ_EXTERN void xz_dec_reset(struct xz_dec *s) +{ + s->sequence = SEQ_STREAM_HEADER; + s->allow_buf_error = false; + s->pos = 0; + s->crc = 0; + memzero(&s->block, sizeof(s->block)); + memzero(&s->index, sizeof(s->index)); + s->temp.pos = 0; + s->temp.size = STREAM_HEADER_SIZE; +} + +XZ_EXTERN void xz_dec_end(struct xz_dec *s) +{ + if (s != NULL) { + xz_dec_lzma2_end(s->lzma2); +#ifdef XZ_DEC_BCJ + xz_dec_bcj_end(s->bcj); +#endif + kfree(s); + } +} diff --git a/depends/xz-embedded/src/xz_lzma2.h b/depends/xz-embedded/src/xz_lzma2.h new file mode 100644 index 00000000..071d67be --- /dev/null +++ b/depends/xz-embedded/src/xz_lzma2.h @@ -0,0 +1,204 @@ +/* + * LZMA2 definitions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_LZMA2_H +#define XZ_LZMA2_H + +/* Range coder constants */ +#define RC_SHIFT_BITS 8 +#define RC_TOP_BITS 24 +#define RC_TOP_VALUE (1 << RC_TOP_BITS) +#define RC_BIT_MODEL_TOTAL_BITS 11 +#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS) +#define RC_MOVE_BITS 5 + +/* + * Maximum number of position states. A position state is the lowest pb + * number of bits of the current uncompressed offset. In some places there + * are different sets of probabilities for different position states. + */ +#define POS_STATES_MAX (1 << 4) + +/* + * This enum is used to track which LZMA symbols have occurred most recently + * and in which order. This information is used to predict the next symbol. + * + * Symbols: + * - Literal: One 8-bit byte + * - Match: Repeat a chunk of data at some distance + * - Long repeat: Multi-byte match at a recently seen distance + * - Short repeat: One-byte repeat at a recently seen distance + * + * The symbol names are in from STATE_oldest_older_previous. REP means + * either short or long repeated match, and NONLIT means any non-literal. + */ +enum lzma_state { + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_LIT_MATCH, + STATE_LIT_LONGREP, + STATE_LIT_SHORTREP, + STATE_NONLIT_MATCH, + STATE_NONLIT_REP +}; + +/* Total number of states */ +#define STATES 12 + +/* The lowest 7 states indicate that the previous state was a literal. */ +#define LIT_STATES 7 + +/* Indicate that the latest symbol was a literal. */ +static inline void lzma_state_literal(enum lzma_state *state) +{ + if (*state <= STATE_SHORTREP_LIT_LIT) + *state = STATE_LIT_LIT; + else if (*state <= STATE_LIT_SHORTREP) + *state -= 3; + else + *state -= 6; +} + +/* Indicate that the latest symbol was a match. */ +static inline void lzma_state_match(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH; +} + +/* Indicate that the latest state was a long repeated match. */ +static inline void lzma_state_long_rep(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP; +} + +/* Indicate that the latest symbol was a short match. */ +static inline void lzma_state_short_rep(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP; +} + +/* Test if the previous symbol was a literal. */ +static inline bool lzma_state_is_literal(enum lzma_state state) +{ + return state < LIT_STATES; +} + +/* Each literal coder is divided in three sections: + * - 0x001-0x0FF: Without match byte + * - 0x101-0x1FF: With match byte; match bit is 0 + * - 0x201-0x2FF: With match byte; match bit is 1 + * + * Match byte is used when the previous LZMA symbol was something else than + * a literal (that is, it was some kind of match). + */ +#define LITERAL_CODER_SIZE 0x300 + +/* Maximum number of literal coders */ +#define LITERAL_CODERS_MAX (1 << 4) + +/* Minimum length of a match is two bytes. */ +#define MATCH_LEN_MIN 2 + +/* Match length is encoded with 4, 5, or 10 bits. + * + * Length Bits + * 2-9 4 = Choice=0 + 3 bits + * 10-17 5 = Choice=1 + Choice2=0 + 3 bits + * 18-273 10 = Choice=1 + Choice2=1 + 8 bits + */ +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +/* + * Maximum length of a match is 273 which is a result of the encoding + * described above. + */ +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + +/* + * Different sets of probabilities are used for match distances that have + * very short match length: Lengths of 2, 3, and 4 bytes have a separate + * set of probabilities for each length. The matches with longer length + * use a shared set of probabilities. + */ +#define DIST_STATES 4 + +/* + * Get the index of the appropriate probability array for decoding + * the distance slot. + */ +static inline uint32_t lzma_get_dist_state(uint32_t len) +{ + return len < DIST_STATES + MATCH_LEN_MIN + ? len - MATCH_LEN_MIN : DIST_STATES - 1; +} + +/* + * The highest two bits of a 32-bit match distance are encoded using six bits. + * This six-bit value is called a distance slot. This way encoding a 32-bit + * value takes 6-36 bits, larger values taking more bits. + */ +#define DIST_SLOT_BITS 6 +#define DIST_SLOTS (1 << DIST_SLOT_BITS) + +/* Match distances up to 127 are fully encoded using probabilities. Since + * the highest two bits (distance slot) are always encoded using six bits, + * the distances 0-3 don't need any additional bits to encode, since the + * distance slot itself is the same as the actual distance. DIST_MODEL_START + * indicates the first distance slot where at least one additional bit is + * needed. + */ +#define DIST_MODEL_START 4 + +/* + * Match distances greater than 127 are encoded in three pieces: + * - distance slot: the highest two bits + * - direct bits: 2-26 bits below the highest two bits + * - alignment bits: four lowest bits + * + * Direct bits don't use any probabilities. + * + * The distance slot value of 14 is for distances 128-191. + */ +#define DIST_MODEL_END 14 + +/* Distance slots that indicate a distance <= 127. */ +#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +/* + * For match distances greater than 127, only the highest two bits and the + * lowest four bits (alignment) is encoded using probabilities. + */ +#define ALIGN_BITS 4 +#define ALIGN_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_SIZE - 1) + +/* Total number of all probability variables */ +#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE) + +/* + * LZMA remembers the four most recent match distances. Reusing these + * distances tends to take less space than re-encoding the actual + * distance value. + */ +#define REPS 4 + +#endif diff --git a/depends/xz-embedded/src/xz_private.h b/depends/xz-embedded/src/xz_private.h new file mode 100644 index 00000000..482b90f3 --- /dev/null +++ b/depends/xz-embedded/src/xz_private.h @@ -0,0 +1,156 @@ +/* + * Private includes and definitions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_PRIVATE_H +#define XZ_PRIVATE_H + +#ifdef __KERNEL__ +# include +# include +# include + /* XZ_PREBOOT may be defined only via decompress_unxz.c. */ +# ifndef XZ_PREBOOT +# include +# include +# include +# ifdef CONFIG_XZ_DEC_X86 +# define XZ_DEC_X86 +# endif +# ifdef CONFIG_XZ_DEC_POWERPC +# define XZ_DEC_POWERPC +# endif +# ifdef CONFIG_XZ_DEC_IA64 +# define XZ_DEC_IA64 +# endif +# ifdef CONFIG_XZ_DEC_ARM +# define XZ_DEC_ARM +# endif +# ifdef CONFIG_XZ_DEC_ARMTHUMB +# define XZ_DEC_ARMTHUMB +# endif +# ifdef CONFIG_XZ_DEC_SPARC +# define XZ_DEC_SPARC +# endif +# define memeq(a, b, size) (memcmp(a, b, size) == 0) +# define memzero(buf, size) memset(buf, 0, size) +# endif +# define get_le32(p) le32_to_cpup((const uint32_t *)(p)) +#else + /* + * For userspace builds, use a separate header to define the required + * macros and functions. This makes it easier to adapt the code into + * different environments and avoids clutter in the Linux kernel tree. + */ +# include "xz_config.h" +#endif + +/* If no specific decoding mode is requested, enable support for all modes. */ +#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \ + && !defined(XZ_DEC_DYNALLOC) +# define XZ_DEC_SINGLE +# define XZ_DEC_PREALLOC +# define XZ_DEC_DYNALLOC +#endif + +/* + * The DEC_IS_foo(mode) macros are used in "if" statements. If only some + * of the supported modes are enabled, these macros will evaluate to true or + * false at compile time and thus allow the compiler to omit unneeded code. + */ +#ifdef XZ_DEC_SINGLE +# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE) +#else +# define DEC_IS_SINGLE(mode) (false) +#endif + +#ifdef XZ_DEC_PREALLOC +# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC) +#else +# define DEC_IS_PREALLOC(mode) (false) +#endif + +#ifdef XZ_DEC_DYNALLOC +# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC) +#else +# define DEC_IS_DYNALLOC(mode) (false) +#endif + +#if !defined(XZ_DEC_SINGLE) +# define DEC_IS_MULTI(mode) (true) +#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC) +# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE) +#else +# define DEC_IS_MULTI(mode) (false) +#endif + +/* + * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ. + * XZ_DEC_BCJ is used to enable generic support for BCJ decoders. + */ +#ifndef XZ_DEC_BCJ +# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \ + || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \ + || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \ + || defined(XZ_DEC_SPARC) +# define XZ_DEC_BCJ +# endif +#endif + +/* + * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used + * before calling xz_dec_lzma2_run(). + */ +XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode, + uint32_t dict_max); + +/* + * Decode the LZMA2 properties (one byte) and reset the decoder. Return + * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not + * big enough, and XZ_OPTIONS_ERROR if props indicates something that this + * decoder doesn't support. + */ +XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, + uint8_t props); + +/* Decode raw LZMA2 stream from b->in to b->out. */ +XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, + struct xz_buf *b); + +/* Free the memory allocated for the LZMA2 decoder. */ +XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s); + +#ifdef XZ_DEC_BCJ +/* + * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before + * calling xz_dec_bcj_run(). + */ +XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call); + +/* + * Decode the Filter ID of a BCJ filter. This implementation doesn't + * support custom start offsets, so no decoding of Filter Properties + * is needed. Returns XZ_OK if the given Filter ID is supported. + * Otherwise XZ_OPTIONS_ERROR is returned. + */ +XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id); + +/* + * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is + * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run() + * must be called directly. + */ +XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, + struct xz_dec_lzma2 *lzma2, + struct xz_buf *b); + +/* Free the memory allocated for the BCJ filters. */ +#define xz_dec_bcj_end(s) kfree(s) +#endif + +#endif diff --git a/depends/xz-embedded/src/xz_stream.h b/depends/xz-embedded/src/xz_stream.h new file mode 100644 index 00000000..66cb5a70 --- /dev/null +++ b/depends/xz-embedded/src/xz_stream.h @@ -0,0 +1,62 @@ +/* + * Definitions for handling the .xz file format + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_STREAM_H +#define XZ_STREAM_H + +#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32 +# include +# undef crc32 +# define xz_crc32(buf, size, crc) \ + (~crc32_le(~(uint32_t)(crc), buf, size)) +#endif + +/* + * See the .xz file format specification at + * http://tukaani.org/xz/xz-file-format.txt + * to understand the container format. + */ + +#define STREAM_HEADER_SIZE 12 + +#define HEADER_MAGIC "\3757zXZ" +#define HEADER_MAGIC_SIZE 6 + +#define FOOTER_MAGIC "YZ" +#define FOOTER_MAGIC_SIZE 2 + +/* + * Variable-length integer can hold a 63-bit unsigned integer or a special + * value indicating that the value is unknown. + * + * Experimental: vli_type can be defined to uint32_t to save a few bytes + * in code size (no effect on speed). Doing so limits the uncompressed and + * compressed size of the file to less than 256 MiB and may also weaken + * error detection slightly. + */ +typedef uint64_t vli_type; + +#define VLI_MAX ((vli_type)-1 / 2) +#define VLI_UNKNOWN ((vli_type)-1) + +/* Maximum encoded size of a VLI */ +#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7) + +/* Integrity Check types */ +enum xz_check { + XZ_CHECK_NONE = 0, + XZ_CHECK_CRC32 = 1, + XZ_CHECK_CRC64 = 4, + XZ_CHECK_SHA256 = 10 +}; + +/* Maximum possible Check ID */ +#define XZ_CHECK_MAX 15 + +#endif diff --git a/depends/xz-embedded/xzminidec.c b/depends/xz-embedded/xzminidec.c new file mode 100644 index 00000000..ba074131 --- /dev/null +++ b/depends/xz-embedded/xzminidec.c @@ -0,0 +1,135 @@ +/* + * Simple XZ decoder command line tool + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +/* + * This is really limited: Not all filters from .xz format are supported, + * only CRC32 is supported as the integrity check, and decoding of + * concatenated .xz streams is not supported. Thus, you may want to look + * at xzdec from XZ Utils if a few KiB bigger tool is not a problem. + */ + +#include +#include +#include +#include "xz.h" + +static uint8_t in[BUFSIZ]; +static uint8_t out[BUFSIZ]; + +int main(int argc, char **argv) +{ + struct xz_buf b; + struct xz_dec *s; + enum xz_ret ret; + const char *msg; + + if (argc >= 2 && strcmp(argv[1], "--help") == 0) { + fputs("Uncompress a .xz file from stdin to stdout.\n" + "Arguments other than `--help' are ignored.\n", + stdout); + return 0; + } + + xz_crc32_init(); +#ifdef XZ_USE_CRC64 + xz_crc64_init(); +#endif + + /* + * Support up to 64 MiB dictionary. The actually needed memory + * is allocated once the headers have been parsed. + */ + s = xz_dec_init(XZ_DYNALLOC, 1 << 26); + if (s == NULL) { + msg = "Memory allocation failed\n"; + goto error; + } + + b.in = in; + b.in_pos = 0; + b.in_size = 0; + b.out = out; + b.out_pos = 0; + b.out_size = BUFSIZ; + + while (true) { + if (b.in_pos == b.in_size) { + b.in_size = fread(in, 1, sizeof(in), stdin); + b.in_pos = 0; + } + + ret = xz_dec_run(s, &b); + + if (b.out_pos == sizeof(out)) { + if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) { + msg = "Write error\n"; + goto error; + } + + b.out_pos = 0; + } + + if (ret == XZ_OK) + continue; + +#ifdef XZ_DEC_ANY_CHECK + if (ret == XZ_UNSUPPORTED_CHECK) { + fputs(argv[0], stderr); + fputs(": ", stderr); + fputs("Unsupported check; not verifying " + "file integrity\n", stderr); + continue; + } +#endif + + if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos + || fclose(stdout)) { + msg = "Write error\n"; + goto error; + } + + switch (ret) { + case XZ_STREAM_END: + xz_dec_end(s); + return 0; + + case XZ_MEM_ERROR: + msg = "Memory allocation failed\n"; + goto error; + + case XZ_MEMLIMIT_ERROR: + msg = "Memory usage limit reached\n"; + goto error; + + case XZ_FORMAT_ERROR: + msg = "Not a .xz file\n"; + goto error; + + case XZ_OPTIONS_ERROR: + msg = "Unsupported options in the .xz headers\n"; + goto error; + + case XZ_DATA_ERROR: + case XZ_BUF_ERROR: + msg = "File is corrupt\n"; + goto error; + + default: + msg = "Bug!\n"; + goto error; + } + } + +error: + xz_dec_end(s); + fputs(argv[0], stderr); + fputs(": ", stderr); + fputs(msg, stderr); + return 1; +} -- cgit From 604162acdf5283a9759c1b3ce9e90887a6599ce7 Mon Sep 17 00:00:00 2001 From: Petr Mrázek Date: Sun, 29 Sep 2013 21:11:30 +0200 Subject: Turn pack200 into an actual library --- depends/pack200/CMakeLists.txt | 10 +- depends/pack200/anti200.cpp | 28 ++ depends/pack200/include/unpack200.h | 38 +- depends/pack200/src/bands.cpp | 36 +- depends/pack200/src/bands.h | 9 +- depends/pack200/src/bytes.cpp | 4 +- depends/pack200/src/bytes.h | 2 +- depends/pack200/src/coding.cpp | 125 ++++--- depends/pack200/src/coding.h | 29 +- depends/pack200/src/constants.h | 2 +- depends/pack200/src/defines.h | 75 +--- depends/pack200/src/main.cpp | 489 -------------------------- depends/pack200/src/unpack.cpp | 670 ++++++++++-------------------------- depends/pack200/src/unpack.h | 114 ++---- depends/pack200/src/unpack200.cpp | 172 +++++++++ depends/pack200/src/utils.cpp | 28 +- depends/pack200/src/utils.h | 9 +- depends/pack200/src/zip.cpp | 56 +-- depends/pack200/src/zip.h | 28 +- depends/xz-embedded/CMakeLists.txt | 11 +- 20 files changed, 583 insertions(+), 1352 deletions(-) create mode 100644 depends/pack200/anti200.cpp delete mode 100644 depends/pack200/src/main.cpp create mode 100644 depends/pack200/src/unpack200.cpp (limited to 'depends/xz-embedded') diff --git a/depends/pack200/CMakeLists.txt b/depends/pack200/CMakeLists.txt index 79c78f80..657e303c 100644 --- a/depends/pack200/CMakeLists.txt +++ b/depends/pack200/CMakeLists.txt @@ -19,6 +19,7 @@ ELSE(UNIX) ENDIF(UNIX) SET(PACK200_SRC +include/unpack200.h src/bands.cpp src/bands.h src/bytes.cpp @@ -27,7 +28,7 @@ src/coding.cpp src/coding.h src/constants.h src/defines.h -src/main.cpp +src/unpack200.cpp src/unpack.cpp src/unpack.h src/utils.cpp @@ -36,7 +37,9 @@ src/zip.cpp src/zip.h ) -add_executable(unpack200 ${PACK200_SRC}) +include_directories(include) + +add_library(unpack200 STATIC ${PACK200_SRC}) IF(UNIX) target_link_libraries(unpack200 ${ZLIB_LIBRARIES}) @@ -44,3 +47,6 @@ ELSE() # zlib is part of Qt on windows. use it. QT5_USE_MODULES(unpack200 Core) ENDIF() + +add_executable(anti200 anti200.cpp) +target_link_libraries(anti200 unpack200) diff --git a/depends/pack200/anti200.cpp b/depends/pack200/anti200.cpp new file mode 100644 index 00000000..3dfdb5dc --- /dev/null +++ b/depends/pack200/anti200.cpp @@ -0,0 +1,28 @@ +/* + * This is trivial. Do what thou wilt with it. Public domain. + */ + +#include +#include +#include "unpack200.h" + +int main(int argc, char **argv) +{ + if (argc == 3) + { + try + { + unpack_200(argv[1], argv[2]); + } + catch (std::runtime_error &e) + { + std::cerr << "Bad things happened: " << e.what() << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; + } + else + std::cerr << "Simple pack200 unpacker!" << std::endl << "Run like this:" << std::endl + << " " << argv[0] << " input.jar.lzma output.jar" << std::endl; + return EXIT_FAILURE; +} diff --git a/depends/pack200/include/unpack200.h b/depends/pack200/include/unpack200.h index 8d1c8b69..bcee8009 100644 --- a/depends/pack200/include/unpack200.h +++ b/depends/pack200/include/unpack200.h @@ -1 +1,37 @@ - +/* + * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#pragma once +#include + +/** + * @brief Unpack a PACK200 file + * + * @param input_path Path to the input file in PACK200 format. System native string encoding. + * @param output_path Path to the output file in PACK200 format. System native string encoding. + * @return void + * @throw std::runtime_error for any error encountered + */ +void unpack_200(std::string input_path, std::string output_path); diff --git a/depends/pack200/src/bands.cpp b/depends/pack200/src/bands.cpp index 6b4e8971..1c10b35b 100644 --- a/depends/pack200/src/bands.cpp +++ b/depends/pack200/src/bands.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include "defines.h" #include "bytes.h" @@ -44,18 +45,8 @@ #include "constants.h" #include "unpack.h" -inline void band::abort(const char *msg) -{ - u->abort(msg); -} -inline bool band::aborting() -{ - return u->aborting(); -} - void band::readData(int expectedLength) { - CHECK; assert(expectedLength >= 0); assert(vs[0].cmk == cmk_ERROR); if (expectedLength != 0) @@ -82,7 +73,7 @@ void band::readData(int expectedLength) // Make a conservatively generous estimate of band size in bytes. // Assume B == 5 everywhere. // Assume awkward pop with all {U} values (2*5 per value) - jlong generous = (jlong)length * (B_MAX * 3 + 1) + C_SLOP; + int64_t generous = (int64_t)length * (B_MAX * 3 + 1) + C_SLOP; u->ensure_input(generous); } @@ -102,7 +93,6 @@ void band::readData(int expectedLength) assert(!valc->isMalloc); } xvs.init(u->rp, u->rplimit, valc); - CHECK; int X = xvs.getInt(); if (valc->S() != 0) { @@ -133,7 +123,6 @@ void band::readData(int expectedLength) byte XB_byte = (byte)XB; byte *XB_ptr = &XB_byte; cm.init(u->rp, u->rplimit, XB_ptr, 0, defc, length, nullptr); - CHECK; } else { @@ -162,7 +151,6 @@ void band::setIndexByTag(byte tag) entry *band::getRefCommon(cpindex *ix_, bool nullOKwithCaller) { - CHECK_0; assert(ix_->ixTag == ixTag || (ixTag == CONSTANT_Literal && ix_->ixTag >= CONSTANT_Integer && ix_->ixTag <= CONSTANT_String)); @@ -171,27 +159,26 @@ entry *band::getRefCommon(cpindex *ix_, bool nullOKwithCaller) // But nullOKwithCaller means caller is willing to tolerate a nullptr. entry *ref = ix_->get(n); if (ref == nullptr && !(nullOKwithCaller && n == -1)) - abort(n == -1 ? "nullptr ref" : "bad ref"); + unpack_abort(n == -1 ? "nullptr ref" : "bad ref"); return ref; } -jlong band::getLong(band &lo_band, bool have_hi) +int64_t band::getLong(band &lo_band, bool have_hi) { band &hi_band = (*this); assert(lo_band.bn == hi_band.bn + 1); - uint lo = lo_band.getInt(); + uint32_t lo = lo_band.getInt(); if (!have_hi) { assert(hi_band.length == 0); return makeLong(0, lo); } - uint hi = hi_band.getInt(); + uint32_t hi = hi_band.getInt(); return makeLong(hi, lo); } int band::getIntTotal() { - CHECK_0; if (length == 0) return 0; if (total_memo > 0) @@ -201,8 +188,7 @@ int band::getIntTotal() // and that the partial sums never overflow (wrap negative) if (total < 0) { - abort("overflow detected"); - return 0; + unpack_abort("overflow detected"); } for (int k = length - 1; k > 0; k--) { @@ -210,8 +196,7 @@ int band::getIntTotal() total += vs[0].getInt(); if (total < prev_total) { - abort("overflow detected"); - return 0; + unpack_abort("overflow detected"); } } rewind(); @@ -221,7 +206,6 @@ int band::getIntTotal() int band::getIntCount(int tag) { - CHECK_0; if (length == 0) return 0; if (tag >= HIST0_MIN && tag <= HIST0_MAX) @@ -230,7 +214,6 @@ int band::getIntCount(int tag) { // Lazily calculate an approximate histogram. hist0 = U_NEW(int, (HIST0_MAX - HIST0_MIN) + 1); - CHECK_0; for (int k = length; k > 0; k--) { int x = vs[0].getInt(); @@ -404,7 +387,6 @@ const band_init all_band_inits[] = BAND_INIT(file_modtime, DELTA5_spec, 0), BAND_INIT(file_options, UNSIGNED5_spec, 0), // BAND_INIT(file_bits, BYTE1_spec, 0), {0, 0}}; -#define NUM_BAND_INITS (sizeof(all_band_inits) / sizeof(all_band_inits[0])) band *band::makeBands(unpacker *u) { @@ -434,7 +416,7 @@ void band::initIndexes(unpacker *u) for (int i = 0; i < BAND_LIMIT; i++) { band *scan = &tmp_all_bands[i]; - uint tag = scan->ixTag; // Cf. #define INDEX(tag) above + uint32_t tag = scan->ixTag; // Cf. #define INDEX(tag) above if (tag != 0 && tag != CONSTANT_Literal && (tag & SUBINDEX_BIT) == 0) { scan->setIndex(u->cp.getIndex(tag)); diff --git a/depends/pack200/src/bands.h b/depends/pack200/src/bands.h index 3f944481..a56cd7d5 100644 --- a/depends/pack200/src/bands.h +++ b/depends/pack200/src/bands.h @@ -150,11 +150,11 @@ struct band return getRefCommon(ix2, true); } entry *getRefCommon(cpindex *ix, bool nullOK); - jlong getLong(band &lo_band, bool have_hi); + int64_t getLong(band &lo_band, bool have_hi); - static jlong makeLong(uint hi, uint lo) + static int64_t makeLong(uint32_t hi, uint32_t lo) { - return ((julong)hi << 32) + (((julong)lo << 32) >> 32); + return ((uint64_t)hi << 32) + (((uint64_t)lo << 32) >> 32); } int getIntTotal(); @@ -162,9 +162,6 @@ struct band static band *makeBands(unpacker *u); static void initIndexes(unpacker *u); - - void abort(const char *msg = nullptr); //{ u->abort(msg); } - bool aborting(); //{ return u->aborting(); } }; extern band all_bands[]; diff --git a/depends/pack200/src/bytes.cpp b/depends/pack200/src/bytes.cpp index b82a987a..d3808afa 100644 --- a/depends/pack200/src/bytes.cpp +++ b/depends/pack200/src/bytes.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include "defines.h" #include "bytes.h" #include "utils.h" @@ -114,7 +115,7 @@ int bytes::compareTo(bytes &other) void bytes::saveFrom(const void *ptr_, size_t len_) { malloc(len_); - // Save as much as possible. (Helps unpacker::abort.) + // Save as much as possible. if (len_ > len) { assert(ptr == dummy); // error recovery @@ -161,7 +162,6 @@ byte *fillbytes::grow(size_t s) allocated = b.len; if (allocated != maxlen) { - assert(unpack_aborting()); b.len = nlen - s; // back up return dummy; // scribble during error recov. } diff --git a/depends/pack200/src/bytes.h b/depends/pack200/src/bytes.h index 3926f9f2..2e4a9daf 100644 --- a/depends/pack200/src/bytes.h +++ b/depends/pack200/src/bytes.h @@ -161,7 +161,7 @@ struct fillbytes b.len = 0; } int8_t *grow(size_t s); // grow so that limit() += s - int getByte(uint i) + int getByte(uint32_t i) { return *loc(i) & 0xFF; } diff --git a/depends/pack200/src/coding.cpp b/depends/pack200/src/coding.cpp index 32977e05..226ba458 100644 --- a/depends/pack200/src/coding.cpp +++ b/depends/pack200/src/coding.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include "defines.h" #include "bytes.h" @@ -53,12 +54,12 @@ extern coding basic_codings[]; #define IS_NEG_CODE(S, codeVal) ((((int)(codeVal) + 1) & ((1 << S) - 1)) == 0) -#define DECODE_SIGN_S1(ux) (((uint)(ux) >> 1) ^ -((int)(ux) & 1)) +#define DECODE_SIGN_S1(ux) (((uint32_t)(ux) >> 1) ^ -((int)(ux) & 1)) -static int decode_sign(int S, uint ux) +static int decode_sign(int S, uint32_t ux) { // == Coding.decodeSign32 assert(S > 0); - uint sigbits = (ux >> S); + uint32_t sigbits = (ux >> S); if (IS_NEG_CODE(S, ux)) return (int)(~sigbits); else @@ -90,9 +91,9 @@ coding *coding::init() return nullptr; // no 5-byte fixed-size coding // first compute the range of the coding, in 64 bits - jlong range = 0; + int64_t range = 0; { - jlong H_i = 1; + int64_t H_i = 1; for (int i = 0; i < B; i++) { range += H_i; @@ -106,7 +107,7 @@ coding *coding::init() int this_umax; // now, compute min and max - if (range >= ((jlong)1 << 32)) + if (range >= ((int64_t)1 << 32)) { this_umax = INT_MAX_VALUE; this->umin = INT_MIN_VALUE; @@ -121,13 +122,13 @@ coding *coding::init() if (S != 0 && range != 0) { int Smask = (1 << S) - 1; - jlong maxPosCode = range - 1; - jlong maxNegCode = range - 1; + int64_t maxPosCode = range - 1; + int64_t maxNegCode = range - 1; while (IS_NEG_CODE(S, maxPosCode)) --maxPosCode; while (!IS_NEG_CODE(S, maxNegCode)) --maxNegCode; - int maxPos = decode_sign(S, (uint)maxPosCode); + int maxPos = decode_sign(S, (uint32_t)maxPosCode); if (maxPos < 0) this->max = INT_MAX_VALUE; // 32-bit wraparound else @@ -135,7 +136,7 @@ coding *coding::init() if (maxNegCode < 0) this->min = 0; // No negative codings at all. else - this->min = decode_sign(S, (uint)maxNegCode); + this->min = decode_sign(S, (uint32_t)maxNegCode); } } @@ -163,7 +164,8 @@ coding *coding::findBySpec(int spec) break; } coding *ptr = NEW(coding, 1); - CHECK_NULL_0(ptr); + if (!ptr) + return nullptr; coding *c = ptr->initFrom(spec); if (c == nullptr) { @@ -207,25 +209,25 @@ void coding_method::reset(value_stream *state) } } -uint coding::parse(byte *&rp, int B, int H) +uint32_t coding::parse(byte *&rp, int B, int H) { int L = 256 - H; byte *ptr = rp; // hand peel the i==0 part of the loop: - uint b_i = *ptr++ & 0xFF; - if (B == 1 || b_i < (uint)L) + uint32_t b_i = *ptr++ & 0xFF; + if (B == 1 || b_i < (uint32_t)L) { rp = ptr; return b_i; } - uint sum = b_i; - uint H_i = H; + uint32_t sum = b_i; + uint32_t H_i = H; assert(B <= B_MAX); for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired b_i = *ptr++ & 0xFF; sum += b_i * H_i; - if (i == B || b_i < (uint)L) + if (i == B || b_i < (uint32_t)L) { rp = ptr; return sum; @@ -236,26 +238,26 @@ uint coding::parse(byte *&rp, int B, int H) return 0; } -uint coding::parse_lgH(byte *&rp, int B, int H, int lgH) +uint32_t coding::parse_lgH(byte *&rp, int B, int H, int lgH) { assert(H == (1 << lgH)); int L = 256 - (1 << lgH); byte *ptr = rp; // hand peel the i==0 part of the loop: - uint b_i = *ptr++ & 0xFF; - if (B == 1 || b_i < (uint)L) + uint32_t b_i = *ptr++ & 0xFF; + if (B == 1 || b_i < (uint32_t)L) { rp = ptr; return b_i; } - uint sum = b_i; - uint lg_H_i = lgH; + uint32_t sum = b_i; + uint32_t lg_H_i = lgH; assert(B <= B_MAX); for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired b_i = *ptr++ & 0xFF; sum += b_i << lg_H_i; - if (i == B || b_i < (uint)L) + if (i == B || b_i < (uint32_t)L) { rp = ptr; return sum; @@ -272,7 +274,7 @@ void coding::parseMultiple(byte *&rp, int N, byte *limit, int B, int H) { if (N < 0) { - abort("bad value count"); + unpack_abort("bad value count"); return; } byte *ptr = rp; @@ -281,7 +283,7 @@ void coding::parseMultiple(byte *&rp, int N, byte *limit, int B, int H) size_t len = (size_t)N * B; if (len / B != (size_t)N || ptr + len > limit) { - abort(ERB); + unpack_abort(ERB); return; } rp = ptr + len; @@ -312,7 +314,7 @@ void coding::parseMultiple(byte *&rp, int N, byte *limit, int B, int H) // do an error check here if (ptr > limit) { - abort(ERB); + unpack_abort(ERB); return; } } @@ -401,12 +403,12 @@ void value_stream::setCoding(coding *defc) } } -static int getPopValue(value_stream *self, uint uval) +static int getPopValue(value_stream *self, uint32_t uval) { if (uval > 0) { // note that the initial parse performed a range check - assert(uval <= (uint)self->cm->fVlength); + assert(uval <= (uint32_t)self->cm->fVlength); return self->cm->fValues[uval - 1]; } else @@ -422,7 +424,7 @@ int coding::sumInUnsignedRange(int x, int y) int range = (int)(umax + 1); assert(range > 0); x += y; - if (x != (int)((jlong)(x - y) + (jlong)y)) + if (x != (int)((int64_t)(x - y) + (int64_t)y)) { // 32-bit overflow interferes with range reduction. // Back off from the overflow by adding a multiple of range: @@ -461,9 +463,9 @@ int coding::sumInUnsignedRange(int x, int y) return x; } -static int getDeltaValue(value_stream *self, uint uval, bool isSubrange) +static int getDeltaValue(value_stream *self, uint32_t uval, bool isSubrange) { - assert((uint)(self->c.isSubrange) == (uint)isSubrange); + assert((uint32_t)(self->c.isSubrange) == (uint32_t)isSubrange); assert(self->c.isSubrange | self->c.isFullRange); if (isSubrange) return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval); @@ -499,7 +501,7 @@ int value_stream::getInt() } CODING_PRIVATE(c.spec); - uint uval; + uint32_t uval; enum { B5 = 5, @@ -546,19 +548,19 @@ int value_stream::getInt() assert(D == 1); uval = coding::parse(rp, B, H); if (S != 0) - uval = (uint)decode_sign(S, uval); + uval = (uint32_t)decode_sign(S, uval); return getDeltaValue(this, uval, (bool)c.isSubrange); case cmk_BHS1D1full: assert(S == 1 && D == 1 && c.isFullRange); uval = coding::parse(rp, B, H); - uval = (uint)DECODE_SIGN_S1(uval); + uval = (uint32_t)DECODE_SIGN_S1(uval); return getDeltaValue(this, uval, false); case cmk_BHS1D1sub: assert(S == 1 && D == 1 && c.isSubrange); uval = coding::parse(rp, B, H); - uval = (uint)DECODE_SIGN_S1(uval); + uval = (uint32_t)DECODE_SIGN_S1(uval); return getDeltaValue(this, uval, true); case cmk_DELTA5: @@ -583,7 +585,7 @@ int value_stream::getInt() uval = coding::parse(rp, B, H); if (S != 0) { - uval = (uint)decode_sign(S, uval); + uval = (uint32_t)decode_sign(S, uval); } if (D != 0) { @@ -592,7 +594,7 @@ int value_stream::getInt() sum = c.sumInUnsignedRange(sum, (int)uval); else sum += (int)uval; - uval = (uint)sum; + uval = (uint32_t)sum; } return getPopValue(this, uval); @@ -616,8 +618,8 @@ int value_stream::getInt() static int moreCentral(int x, int y) { // used to find end of Pop.{F} // Suggested implementation from the Pack200 specification: - uint kx = (x >> 31) ^ (x << 1); - uint ky = (y >> 31) ^ (y << 1); + uint32_t kx = (x >> 31) ^ (x << 1); + uint32_t ky = (y >> 31) ^ (y << 1); return (kx < ky ? x : y); } // static maybe_inline @@ -680,7 +682,7 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m to_free = foundc; // findBySpec may dynamically allocate if (foundc == nullptr) { - abort("illegal arb. coding"); + unpack_abort("illegal arbitrary coding"); return; } // and fall through @@ -699,13 +701,11 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m int N2 = (N >= 0) ? N - K : N; if (N == 0 || (N2 <= 0 && N2 != N)) { - abort("illegal run encoding"); - return; + unpack_abort("illegal run encoding"); } if ((mode & DISABLE_RUN) != 0) { - abort("illegal nested run encoding"); - return; + unpack_abort("illegal nested run encoding"); } // & Enc{ ACode } if ADef=0 (ABDef != 1) @@ -719,11 +719,11 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m { this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink); } - CHECK; // & Enc{ BCode } if BDef=0 (ABDef != 2) coding_method *tail = U_NEW(coding_method, 1); - CHECK_NULL(tail); + if (!tail) + return; tail->u = u; // The 'run' codings may be nested indirectly via 'pop' codings. @@ -764,13 +764,11 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m int TH = (256 - TL); if (N <= 0) { - abort("illegal pop encoding"); - return; + unpack_abort("illegal pop encoding"); } if ((mode & DISABLE_POP) != 0) { - abort("illegal nested pop encoding"); - return; + unpack_abort("illegal nested pop encoding"); } // No indirect nesting of 'pop', but 'run' is OK. @@ -796,7 +794,6 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m fValues = (u->saveTo(fvbuf, fValueSink.b), (int *)fvbuf.ptr); fVlength = fValueSink.length(); // i.e., the parameter K fValueSink.free(); - CHECK; // Skip the first {F} run in all subsequent passes. // The next call to this->init(...) will set vs0.rp to point after the {F}. @@ -812,12 +809,12 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m break; // found it tcode->free(); tcode = coding::findBySpec(B, TH); - CHECK_NULL(tcode); + if (!tcode) + return; } if (!(fVlength <= tcode->umax)) { - abort("pop.L value too small"); - return; + unpack_abort("pop.L value too small"); } this->init(band_rp, band_limit, NO_META, disPop, tcode, N, nullptr); tcode->free(); @@ -826,7 +823,6 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m { this->init(band_rp, band_limit, meta_rp, disPop, defc, N, nullptr); } - CHECK; // Count the number of zero tokens right now. // Also verify that they are in bounds. @@ -834,13 +830,12 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m value_stream vs = vs0; for (int i = 0; i < N; i++) { - uint val = vs.getInt(); + uint32_t val = vs.getInt(); if (val == 0) UN += 1; - if (!(val <= (uint)fVlength)) + if (!(val <= (uint32_t)fVlength)) { - abort("pop token out of range"); - return; + unpack_abort("pop token out of range"); } } vs.done(); @@ -849,7 +844,8 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m if (UN != 0) { uValues = U_NEW(coding_method, 1); - CHECK_NULL(uValues); + if (uValues == nullptr) + return; uValues->u = u; if (UDef != 0) { @@ -867,7 +863,7 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m int uop = (*meta_rp++ & 0xFF); if (uop > _meta_canon_max) // %%% Spec. requires the more strict (uop != _meta_default). - abort("bad meta-coding for empty pop/U"); + unpack_abort("bad meta-coding for empty pop/U"); } } @@ -901,8 +897,7 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m } else { - abort("bad meta-coding"); - return; + unpack_abort("bad meta-coding"); } // Common code here skips a series of values with one coding. @@ -926,7 +921,7 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m coding &c = vs0.c; CODING_PRIVATE(c.spec); // assert sane N - assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N); + assert((uint32_t)N < INT_MAX_VALUE || N == POP_FAVORED_N); // Look at the values, or at least skip over them quickly. if (valueSink == nullptr) @@ -970,14 +965,12 @@ void coding_method::init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int m if (valueSink->length() > 0 && (val == last || val == min)) //|| val == min2 break; valueSink->add(val); - CHECK; last = val; min = moreCentral(min, last); // min2 = moreCentral2(min2, last, min); } band_rp = vs.rp; } - CHECK; // Get an accurate upper limit now. vs0.rplimit = band_rp; diff --git a/depends/pack200/src/coding.h b/depends/pack200/src/coding.h index 5f017b9e..f9bd6ca2 100644 --- a/depends/pack200/src/coding.h +++ b/depends/pack200/src/coding.h @@ -84,11 +84,11 @@ struct coding static coding *findBySpec(int B, int H, int S = 0, int D = 0); static coding *findByIndex(int irregularCodingIndex); - static uint parse(byte *&rp, int B, int H); - static uint parse_lgH(byte *&rp, int B, int H, int lgH); + static uint32_t parse(byte *&rp, int B, int H); + static uint32_t parse_lgH(byte *&rp, int B, int H, int lgH); static void parseMultiple(byte *&rp, int N, byte *limit, int B, int H); - uint parse(byte *&rp) + uint32_t parse(byte *&rp) { return parse(rp, CODING_B(spec), CODING_H(spec)); } @@ -116,12 +116,6 @@ struct coding } void free(); // free self if isMalloc - - // error handling - static void abort(const char *msg = nullptr) - { - unpack_abort(msg); - } }; enum coding_method_kind @@ -224,10 +218,6 @@ struct value_stream return this + 1; } bool hasHelper(); - - // error handling - // inline void abort(const char* msg); - // inline void aborting(); }; struct coding_method @@ -254,17 +244,4 @@ struct coding_method // The value sink is used to collect output values, when desired. void init(byte *&band_rp, byte *band_limit, byte *&meta_rp, int mode, coding *defc, int N, intlist *valueSink); - - // error handling - void abort(const char *msg) - { - unpack_abort(msg, u); - } - bool aborting() - { - return unpack_aborting(u); - } }; - -// inline void value_stream::abort(const char* msg) { cm->abort(msg); } -// inline void value_stream::aborting() { cm->aborting(); } diff --git a/depends/pack200/src/constants.h b/depends/pack200/src/constants.h index aeb3335d..2cc14b7d 100644 --- a/depends/pack200/src/constants.h +++ b/depends/pack200/src/constants.h @@ -51,7 +51,7 @@ // magic number for gzip streams (for processing pack200-gzip data) #define GZIP_MAGIC 0x1F8B0800 -#define GZIP_MAGIC_MASK 0xFFFFFF00 // last byte is variable "flg" field +#define GZIP_MAGIC_MASK 0xFFFFFF00 // last \bchar\b is variable "flg" field enum { diff --git a/depends/pack200/src/defines.h b/depends/pack200/src/defines.h index 63abae0a..cfe5fc28 100644 --- a/depends/pack200/src/defines.h +++ b/depends/pack200/src/defines.h @@ -32,39 +32,22 @@ #include #endif -#ifndef FULL -#define FULL 1 /* Adds <500 bytes to the zipped final product. */ -#endif - -#if FULL // define this if you want debugging and/or compile-time attributes -#define IF_FULL(x) x -#else -#define IF_FULL(x) /*x*/ -#endif - // Error messages that we have -#define ERROR_ENOMEM "Native allocation failed" +#define ERROR_ENOMEM "Memory allocation failed" #define ERROR_FORMAT "Corrupted pack file" #define ERROR_RESOURCE "Cannot extract resource file" #define ERROR_OVERFLOW "Internal buffer overflow" #define ERROR_INTERNAL "Internal error" -#define LOGFILE_STDOUT "-" -#define LOGFILE_STDERR "" - #define lengthof(array) (sizeof(array) / sizeof(array[0])) #define NEW(T, n) (T *) must_malloc((int)(scale_size(n, sizeof(T)))) #define U_NEW(T, n) (T *) u->alloc(scale_size(n, sizeof(T))) #define T_NEW(T, n) (T *) u->temp_alloc(scale_size(n, sizeof(T))) -// bytes and byte arrays - -typedef unsigned int uint; +typedef signed char byte; #ifdef _MSC_VER -typedef LONGLONG jlong; -typedef DWORDLONG julong; #define MKDIR(dir) mkdir(dir) #define getpid() _getpid() #define PATH_MAX MAX_PATH @@ -73,64 +56,10 @@ typedef DWORDLONG julong; #define tempname _tempname #define sleep Sleep #else -typedef signed char byte; -#ifdef _LP64 -typedef long jlong; -typedef long unsigned julong; -#else -typedef long long jlong; -typedef long long unsigned julong; -#endif #define MKDIR(dir) mkdir(dir, 0777); #endif /* Must cast to void *, then size_t, then int. */ #define ptrlowbits(x) ((int)(size_t)(void *)(x)) -/* Back and forth from jlong to pointer */ -#define ptr2jlong(x) ((jlong)(size_t)(void *)(x)) -#define jlong2ptr(x) ((void *)(size_t)(x)) - -// Keys used by Java: -#define UNPACK_DEFLATE_HINT "unpack.deflate.hint" - -#define COM_PREFIX "com.sun.java.util.jar.pack." -#define UNPACK_MODIFICATION_TIME COM_PREFIX "unpack.modification.time" -#define DEBUG_VERBOSE COM_PREFIX "verbose" - -#define ZIP_ARCHIVE_MARKER_COMMENT "PACK200" - -// The following are not known to the Java classes: -#define UNPACK_REMOVE_PACKFILE COM_PREFIX "unpack.remove.packfile" - -// Called from unpacker layers -#define _CHECK_DO(t, x) \ - { \ - if (t) \ - { \ - x; \ - } \ - } - -#define CHECK _CHECK_DO(aborting(), return) -#define CHECK_(y) _CHECK_DO(aborting(), return y) -#define CHECK_0 _CHECK_DO(aborting(), return 0) - -#define CHECK_NULL(p) _CHECK_DO((p) == nullptr, return) -#define CHECK_NULL_(y, p) _CHECK_DO((p) == nullptr, return y) -#define CHECK_NULL_0(p) _CHECK_DO((p) == nullptr, return 0) - -#define CHECK_COUNT(t) \ - if (t < 0) \ - { \ - abort("bad value count"); \ - } \ - CHECK - -#define STR_TRUE "true" -#define STR_FALSE "false" - -#define STR_TF(x) ((x) ? STR_TRUE : STR_FALSE) -#define BOOL_TF(x) (((x) != nullptr &&strcmp((x), STR_TRUE) == 0) ? true : false) - #define DEFAULT_ARCHIVE_MODTIME 1060000000 // Aug 04, 2003 5:26 PM PDT diff --git a/depends/pack200/src/main.cpp b/depends/pack200/src/main.cpp deleted file mode 100644 index ad46a2a2..00000000 --- a/depends/pack200/src/main.cpp +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "defines.h" -#include "bytes.h" -#include "utils.h" -#include "coding.h" -#include "bands.h" - -#include "constants.h" - -#include "zip.h" - -#include "unpack.h" - -int main(int argc, char **argv) -{ - return unpacker::run(argc, argv); -} - -unpacker *unpacker::non_mt_current = nullptr; -unpacker *unpacker::current() -{ - return non_mt_current; -} -static void set_current_unpacker(unpacker *u) -{ - unpacker::non_mt_current = u; -} - -// Callback for fetching data, Unix style. -static jlong read_input_via_stdio(unpacker *u, void *buf, jlong minlen, jlong maxlen) -{ - assert(minlen <= maxlen); // don't talk nonsense - jlong numread = 0; - char *bufptr = (char *)buf; - while (numread < minlen) - { - // read available input, up to buf.length or maxlen - int readlen = (1 << 16); - if (readlen > (maxlen - numread)) - readlen = (int)(maxlen - numread); - int nr = 0; - if (u->infileptr != nullptr) - { - nr = (int)fread(bufptr, 1, readlen, u->infileptr); - } - else - { -#ifndef WIN32 - // we prefer unbuffered inputs - nr = (int)read(u->infileno, bufptr, readlen); -#else - nr = (int)fread(bufptr, 1, readlen, stdin); -#endif - } - if (nr <= 0) - { - if (errno != EINTR) - break; - nr = 0; - } - numread += nr; - bufptr += nr; - assert(numread <= maxlen); - } - // fprintf(u->errstrm, "readInputFn(%d,%d) => %d\n", - // (int)minlen, (int)maxlen, (int)numread); - return numread; -} - -enum -{ - EOF_MAGIC = 0, - BAD_MAGIC = -1 -}; -static int read_magic(unpacker *u, char peek[], int peeklen) -{ - assert(peeklen == 4); // magic numbers are always 4 bytes - jlong nr = (u->read_input_fn)(u, peek, peeklen, peeklen); - if (nr != peeklen) - { - return (nr == 0) ? EOF_MAGIC : BAD_MAGIC; - } - int magic = 0; - for (int i = 0; i < peeklen; i++) - { - magic <<= 8; - magic += peek[i] & 0xFF; - } - return magic; -} - -static void setup_gzin(unpacker *u) -{ - gunzip *gzin = NEW(gunzip, 1); - gzin->init(u); -} - -static const char *nbasename(const char *progname) -{ - const char *slash = strrchr(progname, '/'); - if (slash != nullptr) - progname = ++slash; - return progname; -} - -static const char *usage_lines[] = { - "Usage: %s [-opt... | --option=value]... x.pack[.gz] y.jar\n", "\n", "Unpacking Options\n", - " -H{h}, --deflate-hint={h} override transmitted deflate hint: true, false, or keep " - "(default)\n", - " -r, --remove-pack-file remove input file after unpacking\n", - " -v, --verbose increase program verbosity\n", - " -q, --quiet set verbosity to lowest level\n", - " -l{F}, --log-file={F} output to the given log file, or '-' for standard output " - "(default)\n", - " -?, -h, --help print this message\n", - " -J{X} Java VM argument (ignored)\n", nullptr}; - -static void usage(unpacker *u, const char *progname, bool full = false) -{ - // WinMain does not set argv[0] to the progrname - progname = (progname != nullptr) ? nbasename(progname) : "unpack200"; - for (int i = 0; usage_lines[i] != nullptr; i++) - { - fprintf(stderr, usage_lines[i], progname); - if (!full) - { - fprintf(stderr, "(For more information, run %s --help .)\n", progname); - break; - } - } -} - -// argument parsing -static char **init_args(int argc, char **argv, int &envargc) -{ - const char *env = getenv("UNPACK200_FLAGS"); - ptrlist envargs; - envargs.init(); - if (env != nullptr) - { - char *buf = (char *)strdup(env); - const char *delim = "\n\t "; - for (char *p = strtok(buf, delim); p != nullptr; p = strtok(nullptr, delim)) - { - envargs.add(p); - } - } - // allocate extra margin at both head and tail - char **argp = NEW(char *, envargs.length() + argc + 1); - char **argp0 = argp; - int i; - for (i = 0; i < envargs.length(); i++) - { - *argp++ = (char *)envargs.get(i); - } - for (i = 1; i < argc; i++) - { - // note: skip argv[0] (program name) - *argp++ = (char *)strdup(argv[i]); // make a scratch copy - } - *argp = nullptr; // sentinel - envargc = envargs.length(); // report this count to next_arg - envargs.free(); - return argp0; -} - -static int strpcmp(const char *str, const char *pfx) -{ - return strncmp(str, pfx, strlen(pfx)); -} - -static const char flag_opts[] = "vqrVh?"; -static const char string_opts[] = "HlJ"; - -static int next_arg(char **&argp) -{ - char *arg = *argp; - if (arg == nullptr || arg[0] != '-') - { // end of option list - return 0; - } - // printf("opt: %s\n", arg); - char ach = arg[1]; - if (ach == '\0') - { - // ++argp; // do not pop this arg - return 0; // bare "-" is stdin/stdout - } - else if (arg[1] == '-') - { // --foo option - static const char *keys[] = {"Hdeflate-hint=", "vverbose", "qquiet", - "rremove-pack-file", "llog-file=", "Vversion", - "hhelp", nullptr}; - if (arg[2] == '\0') - { // end of option list - ++argp; // pop the "--" - return 0; - } - for (int i = 0; keys[i] != nullptr; i++) - { - const char *key = keys[i]; - char kch = *key++; - if (strchr(key, '=') == nullptr) - { - if (!strcmp(arg + 2, key)) - { - ++argp; // pop option arg - return kch; - } - } - else - { - if (!strpcmp(arg + 2, key)) - { - *argp += 2 + strlen(key); // remove "--"+key from arg - return kch; - } - } - } - } - else if (strchr(flag_opts, ach) != nullptr) - { // plain option - if (arg[2] == '\0') - { - ++argp; - } - else - { - // in-place edit of "-vxyz" to "-xyz" - arg += 1; // skip original '-' - arg[0] = '-'; - *argp = arg; - } - // printf(" key => %c\n", ach); - return ach; - } - else if (strchr(string_opts, ach) != nullptr) - { // argument-bearing option - if (arg[2] == '\0') - { - if (argp[1] == nullptr) - return -1; // no next arg - ++argp; // leave the argument in place - } - else - { - // in-place edit of "-Hxyz" to "xyz" - arg += 2; // skip original '-H' - *argp = arg; - } - // printf(" key => %c\n", ach); - return ach; - } - return -1; // bad argument -} - -static const char sccsver[] = "1.30, 07/05/05"; - -// Usage: unpackage input.pack output.jar -int unpacker::run(int argc, char **argv) -{ - unpacker u; - u.init(read_input_via_stdio); - set_current_unpacker(&u); - - jar jarout; - jarout.init(&u); - - int envargc = 0; - char **argbuf = init_args(argc, argv, envargc); - char **arg0 = argbuf + envargc; - char **argp = argbuf; - - int verbose = 0; - char *logfile = nullptr; - - for (;;) - { - const char *arg = (*argp == nullptr) ? "" : u.saveStr(*argp); - bool isenvarg = (argp < arg0); - int ach = next_arg(argp); - bool hasoptarg = (ach != 0 && strchr(string_opts, ach) != nullptr); - if (ach == 0 && argp >= arg0) - break; - if (isenvarg && argp == arg0 && hasoptarg) - ach = 0; // don't pull from cmdline - switch (ach) - { - case 'H': - u.set_option(UNPACK_DEFLATE_HINT, *argp++); - break; - case 'v': - ++verbose; - break; - case 'q': - verbose = 0; - break; - case 'r': - u.set_option(UNPACK_REMOVE_PACKFILE, "1"); - break; - case 'l': - logfile = *argp++; - break; - case 'J': - argp += 1; - break; // skip ignored -Jxxx parameter - - case 'h': - case '?': - usage(&u, argv[0], true); - exit(1); - - default: - const char *inenv = isenvarg ? " in ${UNPACK200_FLAGS}" : ""; - if (hasoptarg) - fprintf(stderr, "Missing option string%s: %s\n", inenv, arg); - else - fprintf(stderr, "Unrecognized argument%s: %s\n", inenv, arg); - usage(&u, argv[0]); - exit(2); - } - } - - if (verbose != 0) - { - u.set_option(DEBUG_VERBOSE, u.saveIntStr(verbose)); - } - - const char *source_file = *argp++; - const char *destination_file = *argp++; - - if (source_file == nullptr || destination_file == nullptr || *argp != nullptr) - { - usage(&u, argv[0]); - exit(2); - } - - if (verbose != 0) - { - fprintf(stderr, "Unpacking from %s to %s\n", source_file, destination_file); - } - bool &remove_source = u.remove_packfile; - - if (strcmp(source_file, "-") == 0) - { - remove_source = false; - u.infileno = fileno(stdin); - } - else - { - u.infileptr = fopen(source_file, "rb"); - if (u.infileptr == nullptr) - { - fprintf(stderr, "Error: Could not open input file: %s\n", source_file); - exit(3); // Called only from the native standalone unpacker - } - } - - if (strcmp(destination_file, "-") == 0) - { - jarout.jarfp = stdout; - } - else - { - jarout.openJarFile(destination_file); - assert(jarout.jarfp != nullptr); - } - - if (verbose != 0) - u.dump_options(); - - char peek[4]; - int magic; - - // check for GZIP input - magic = read_magic(&u, peek, (int)sizeof(peek)); - if ((magic & GZIP_MAGIC_MASK) == GZIP_MAGIC) - { - // Oops; must slap an input filter on this data. - setup_gzin(&u); - u.gzin->start(magic); - if (!u.aborting()) - { - u.start(); - } - } - else - { - u.start(peek, sizeof(peek)); - } - - // Note: The checks to u.aborting() are necessary to gracefully - // terminate processing when the first segment throws an error. - - for (;;) - { - if (u.aborting()) - break; - - // Each trip through this loop unpacks one segment - // and then resets the unpacker. - for (unpacker::file *filep; (filep = u.get_next_file()) != nullptr;) - { - if (u.aborting()) - break; - u.write_file_to_jar(filep); - } - if (u.aborting()) - break; - - // Peek ahead for more data. - magic = read_magic(&u, peek, (int)sizeof(peek)); - if (magic != (int)JAVA_PACKAGE_MAGIC) - { - if (magic != EOF_MAGIC) - u.abort("garbage after end of pack archive"); - break; // all done - } - - // Release all storage from parsing the old segment. - u.reset(); - - // Restart, beginning with the peek-ahead. - u.start(peek, sizeof(peek)); - } - - int status = 0; - if (u.aborting()) - { - fprintf(stderr, "Error: %s\n", u.get_abort_message()); - status = 1; - } - - if (u.infileptr != nullptr) - { - fclose(u.infileptr); - u.infileptr = nullptr; - } - - if (!u.aborting() && remove_source) - remove(source_file); - - if (verbose != 0) - { - fprintf(stderr, "unpacker completed with status=%d\n", status); - } - - u.finish(); - - u.free(); // tidy up malloc blocks - set_current_unpacker(nullptr); // clean up global pointer - - return status; -} diff --git a/depends/pack200/src/unpack.cpp b/depends/pack200/src/unpack.cpp index a562d442..8a66d42a 100644 --- a/depends/pack200/src/unpack.cpp +++ b/depends/pack200/src/unpack.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include "defines.h" #include "bytes.h" @@ -79,23 +80,15 @@ enum REQUESTED_LDC = -1 }; -#define NO_INORD ((uint) - 1) +#define NO_INORD ((uint32_t) - 1) struct entry { byte tag; - -#if 0 - byte bits; - enum { - //EB_EXTRA = 1, - EB_SUPER = 2 - }; -#endif unsigned short nrefs; // pack w/ tag int outputIndex; - uint inord; // &cp.entries[cp.tag_base[this->tag]+this->inord] == this + uint32_t inord; // &cp.entries[cp.tag_base[this->tag]+this->inord] == this entry **refs; @@ -104,10 +97,10 @@ struct entry { bytes b; int i; - jlong l; + int64_t l; } value; - void requestOutputIndex(cpool &cp, int req = REQUESTED); + void requestOutputIndex(constant_pool &cp, int req = REQUESTED); int getOutputIndex() { assert(outputIndex > NOT_REQUESTED); @@ -116,7 +109,7 @@ struct entry entry *ref(int refnum) { - assert((uint)refnum < nrefs); + assert((uint32_t)refnum < nrefs); return refs[refnum]; } @@ -178,11 +171,15 @@ struct entry bool tagMatches(byte tag2) { - return (tag2 == tag) || (tag2 == CONSTANT_Utf8 && tag == CONSTANT_Signature); + return (tag2 == tag) || (tag2 == CONSTANT_Utf8 && tag == CONSTANT_Signature) || + (tag2 == CONSTANT_Literal && tag >= CONSTANT_Integer && tag <= CONSTANT_String && + tag != CONSTANT_Class) || + (tag2 == CONSTANT_Member && tag >= CONSTANT_Fieldref && + tag <= CONSTANT_InterfaceMethodref); } }; -entry *cpindex::get(uint i) +entry *cpindex::get(uint32_t i) { if (i >= len) return nullptr; @@ -250,16 +247,16 @@ int entry::typeSize() } } -inline cpindex *cpool::getFieldIndex(entry *classRef) +inline cpindex *constant_pool::getFieldIndex(entry *classRef) { assert(classRef->tagMatches(CONSTANT_Class)); - assert((uint)classRef->inord < (uint)tag_count[CONSTANT_Class]); + assert((uint32_t)classRef->inord < (uint32_t)tag_count[CONSTANT_Class]); return &member_indexes[classRef->inord * 2 + 0]; } -inline cpindex *cpool::getMethodIndex(entry *classRef) +inline cpindex *constant_pool::getMethodIndex(entry *classRef) { assert(classRef->tagMatches(CONSTANT_Class)); - assert((uint)classRef->inord < (uint)tag_count[CONSTANT_Class]); + assert((uint32_t)classRef->inord < (uint32_t)tag_count[CONSTANT_Class]); return &member_indexes[classRef->inord * 2 + 1]; } @@ -277,7 +274,6 @@ struct inner_class void unpacker::free() { int i; - assert(infileptr == nullptr); // caller resp. if (jarout != nullptr) jarout->reset(); if (gzin != nullptr) @@ -287,7 +283,9 @@ void unpacker::free() } if (free_input) input.free(); - // free everybody ever allocated with U_NEW or (recently) with T_NEW + /* + * free everybody ever allocated with U_NEW or (recently) with T_NEW + */ assert(smallbuf.base() == nullptr || mallocs.contains(smallbuf.base())); assert(tsmallbuf.base() == nullptr || tmallocs.contains(tsmallbuf.base())); mallocs.freeAll(); @@ -318,10 +316,10 @@ void unpacker::free() // Will eagerly read ahead by larger chunks, if possible. // Returns false if (rplimit-rp) is not at least 'more', // unless rplimit hits input.limit(). -bool unpacker::ensure_input(jlong more) +bool unpacker::ensure_input(int64_t more) { - julong want = more - input_remaining(); - if ((jlong)want <= 0) + uint64_t want = more - input_remaining(); + if ((int64_t)want <= 0) return true; // it's already in the buffer if (rplimit == input.limit()) return true; // not expecting any more @@ -333,23 +331,22 @@ bool unpacker::ensure_input(jlong more) rplimit = input.limit(); return true; } - CHECK_0; - julong remaining = (input.limit() - rplimit); // how much left to read? + uint64_t remaining = (input.limit() - rplimit); // how much left to read? byte *rpgoal = (want >= remaining) ? input.limit() : rplimit + (size_t)want; enum { CHUNK_SIZE = (1 << 14) }; - julong fetch = want; + uint64_t fetch = want; if (fetch < CHUNK_SIZE) fetch = CHUNK_SIZE; if (fetch > remaining * 3 / 4) fetch = remaining; // Try to fetch at least "more" bytes. - while ((jlong)fetch > 0) + while ((int64_t)fetch > 0) { - jlong nr = (*read_input_fn)(this, rplimit, fetch, remaining); + int64_t nr = (*read_input_fn)(this, rplimit, fetch, remaining); if (nr <= 0) { return (rplimit >= rpgoal); @@ -358,7 +355,7 @@ bool unpacker::ensure_input(jlong more) rplimit += nr; fetch -= nr; bytes_read += nr; - assert(remaining == (julong)(input.limit() - rplimit)); + assert(remaining == (uint64_t)(input.limit() - rplimit)); } return true; } @@ -434,10 +431,10 @@ void unpacker::putu4_at(byte *wp, int n) wp[3] = (n) >> 0; } -void unpacker::putu8_at(byte *wp, jlong n) +void unpacker::putu8_at(byte *wp, int64_t n) { - putu4_at(wp + 0, (int)((julong)n >> 32)); - putu4_at(wp + 4, (int)((julong)n >> 0)); + putu4_at(wp + 0, (int)((uint64_t)n >> 32)); + putu4_at(wp + 4, (int)((uint64_t)n >> 0)); } void unpacker::putu2(int n) @@ -450,7 +447,7 @@ void unpacker::putu4(int n) putu4_at(put_space(4), n); } -void unpacker::putu8(jlong n) +void unpacker::putu8(int64_t n) { putu8_at(put_space(8), n); } @@ -521,11 +518,6 @@ void *unpacker::alloc_heap(size_t size, bool smallOK, bool temp) void unpacker::saveTo(bytes &b, byte *ptr, size_t len) { b.ptr = U_NEW(byte, add_size(len, 1)); - if (aborting()) - { - b.len = 0; - return; - } b.len = len; b.copyFrom(ptr, len); } @@ -591,8 +583,7 @@ void unpacker::read_file_header() // Therefore, the caller must use only a bare minimum of read-ahead. if (inbytes.len > FIRST_READ) { - abort("too much read-ahead"); - return; + unpack_abort("too much read-ahead"); } input.set(initbuf, sizeof(initbuf)); input.b.clear(); @@ -605,7 +596,7 @@ void unpacker::read_file_header() // but is certain not to overflow past the archive_header. input.b.len = FIRST_READ; if (!ensure_input(FIRST_READ)) - abort("EOF reading archive magic number"); + unpack_abort("EOF reading archive magic number"); if (rp[0] == 'P' && rp[1] == 'K') { @@ -621,7 +612,6 @@ void unpacker::read_file_header() { // Get some breathing room. input.set(U_NEW(byte, (size_t)CHUNK + C_SLOP), (size_t)CHUNK); - CHECK; } rp = rplimit = input.base(); if (!ensure_input(1)) @@ -658,9 +648,8 @@ void unpacker::read_file_header() magic, majver, minver, JAVA_PACKAGE_MAGIC, JAVA5_PACKAGE_MAJOR_VERSION, JAVA5_PACKAGE_MINOR_VERSION, JAVA_PACKAGE_MAGIC, JAVA6_PACKAGE_MAJOR_VERSION, JAVA6_PACKAGE_MINOR_VERSION); - abort(message); + unpack_abort(message); } - CHECK; archive_options = hdr.getInt(); hdrVals += 1; @@ -672,15 +661,15 @@ void unpacker::read_file_header() if ((archive_options & ~OPTION_LIMIT) != 0) { fprintf(stderr, "Warning: Illegal archive options 0x%x\n", archive_options); - abort("illegal archive options"); + unpack_abort("illegal archive options"); return; } if ((archive_options & AO_HAVE_FILE_HEADERS) != 0) { - uint hi = hdr.getInt(); - uint lo = hdr.getInt(); - julong x = band::makeLong(hi, lo); + uint32_t hi = hdr.getInt(); + uint32_t lo = hdr.getInt(); + uint64_t x = band::makeLong(hi, lo); archive_size = (size_t)x; if (archive_size != x) { @@ -701,12 +690,11 @@ void unpacker::read_file_header() int header_size_1 = (int)(rplimit - rp); // buffered unused initial fragment int header_size = header_size_0 + header_size_1; unsized_bytes_read = header_size_0; - CHECK; if (foreign_buf) { if (archive_size > (size_t)header_size_1) { - abort("EOF reading fixed input buffer"); + unpack_abort("EOF reading fixed input buffer"); return; } } @@ -714,17 +702,16 @@ void unpacker::read_file_header() { if (archive_size < ARCHIVE_SIZE_MIN) { - abort("impossible archive size"); // bad input data + unpack_abort("impossible archive size"); // bad input data return; } if (archive_size < header_size_1) { - abort("too much read-ahead"); // somehow we pre-fetched too much? + unpack_abort("too much read-ahead"); // somehow we pre-fetched too much? return; } input.set(U_NEW(byte, add_size(header_size_0, archive_size, C_SLOP)), (size_t)header_size_0 + archive_size); - CHECK; assert(input.limit()[0] == 0); // Move all the bytes we read initially into the real buffer. input.b.copyFrom(initbuf, header_size); @@ -736,19 +723,16 @@ void unpacker::read_file_header() // It's more complicated and painful. // A zero archive_size means that we must read until EOF. input.init(CHUNK * 2); - CHECK; input.b.len = input.allocated; rp = rplimit = input.base(); // Set up input buffer as if we already read the header: input.b.copyFrom(initbuf, header_size); - CHECK; rplimit += header_size; while (ensure_input(input.limit() - rp)) { size_t dataSoFar = input_remaining(); size_t nextSize = add_size(dataSoFar, CHUNK); input.ensureSize(nextSize); - CHECK; input.b.len = input.allocated; rp = rplimit = input.base(); rplimit += dataSoFar; @@ -756,7 +740,6 @@ void unpacker::read_file_header() size_t dataSize = (rplimit - input.base()); input.b.len = dataSize; input.grow(C_SLOP); - CHECK; free_input = true; // free it later input.b.len = dataSize; assert(input.limit()[0] == 0); @@ -765,25 +748,21 @@ void unpacker::read_file_header() rp += header_size_0; // already scanned these bytes... } live_input = true; // mark as "do not reuse" - if (aborting()) - { - abort("cannot allocate large input buffer for package file"); - return; - } // read the rest of the header fields ensure_input((AH_LENGTH - AH_LENGTH_0) * B_MAX); - CHECK; hdr.rp = rp; hdr.rplimit = rplimit; if ((archive_options & AO_HAVE_FILE_HEADERS) != 0) { archive_next_count = hdr.getInt(); - CHECK_COUNT(archive_next_count); + if (archive_next_count < 0) + unpack_abort("bad archive_next_count"); archive_modtime = hdr.getInt(); file_count = hdr.getInt(); - CHECK_COUNT(file_count); + if (file_count < 0) + unpack_abort("bad file_count"); hdrVals += 3; } else @@ -794,9 +773,11 @@ void unpacker::read_file_header() if ((archive_options & AO_HAVE_SPECIAL_FORMATS) != 0) { band_headers_size = hdr.getInt(); - CHECK_COUNT(band_headers_size); + if (band_headers_size < 0) + unpack_abort("bad band_headers_size"); attr_definition_count = hdr.getInt(); - CHECK_COUNT(attr_definition_count); + if (attr_definition_count < 0) + unpack_abort("bad attr_definition_count"); hdrVals += 2; } else @@ -821,16 +802,22 @@ void unpacker::read_file_header() } } cp_counts[k] = hdr.getInt(); - CHECK_COUNT(cp_counts[k]); + if (cp_counts[k] < 0) + unpack_abort("bad cp_counts"); hdrVals += 1; } ic_count = hdr.getInt(); - CHECK_COUNT(ic_count); + if (ic_count < 0) + unpack_abort("bad ic_count"); + default_class_minver = hdr.getInt(); default_class_majver = hdr.getInt(); + class_count = hdr.getInt(); - CHECK_COUNT(class_count); + if (class_count < 0) + unpack_abort("bad class_count"); + hdrVals += 4; // done with archive_header @@ -839,11 +826,10 @@ void unpacker::read_file_header() rp = hdr.rp; if (rp > rplimit) - abort("EOF reading archive header"); + unpack_abort("EOF reading archive header"); // Now size the CP. cp.init(this, cp_counts); - CHECK; default_file_modtime = archive_modtime; if (default_file_modtime == 0 && !(archive_options & AO_HAVE_FILE_MODTIME)) @@ -856,13 +842,13 @@ void unpacker::read_file_header() ensure_input(band_headers_size); if (input_remaining() < (size_t)band_headers_size) { - abort("EOF reading band headers"); + unpack_abort("EOF reading band headers"); return; } bytes band_headers; // The "1+" allows an initial byte to be pushed on the front. band_headers.set(1 + U_NEW(byte, 1 + band_headers_size + C_SLOP), band_headers_size); - CHECK; + // Start scanning band headers here: band_headers.copyFrom(rp, band_headers.len); rp += band_headers.len; @@ -891,7 +877,7 @@ void unpacker::finish() } // Cf. PackageReader.readConstantPoolCounts -void cpool::init(unpacker *u_, int counts[NUM_COUNTS]) +void constant_pool::init(unpacker *u_, int counts[NUM_COUNTS]) { this->u = u_; @@ -915,8 +901,7 @@ void cpool::init(unpacker *u_, int counts[NUM_COUNTS]) }; if (len >= (1 << 29) || len < 0 || next_entry >= CP_SIZE_LIMIT + IMPLICIT_ENTRY_COUNT) { - abort("archive too large: constant pool limit exceeded"); - return; + unpack_abort("archive too large: constant pool limit exceeded"); } } @@ -937,7 +922,6 @@ void cpool::init(unpacker *u_, int counts[NUM_COUNTS]) // the entries are renumbered for classfile output. entries = U_NEW(entry, maxentries); - CHECK; first_extra_entry = &entries[nentries]; @@ -951,8 +935,8 @@ void cpool::init(unpacker *u_, int counts[NUM_COUNTS]) } // Initialize hashTab to a generous power-of-two size. - uint pow2 = 1; - uint target = maxentries + maxentries / 2; // 60% full + uint32_t pow2 = 1; + uint32_t target = maxentries + maxentries / 2; // 60% full while (pow2 < target) pow2 <<= 1; hashTab = U_NEW(entry *, hashTabLength = pow2); @@ -1056,7 +1040,6 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) cp_Utf8_suffix.readData(len - SUFFIX_SKIP_1); bytes *allsuffixes = T_NEW(bytes, len); - CHECK; int nbigsuf = 0; fillbytes charbuf; // buffer to allocate small strings @@ -1069,8 +1052,7 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) int suffix = (i < SUFFIX_SKIP_1) ? 0 : cp_Utf8_suffix.getInt(); if (suffix < 0) { - abort("bad utf8 suffix"); - return; + unpack_abort("bad utf8 suffix"); } if (suffix == 0 && i >= SUFFIX_SKIP_1) { @@ -1079,7 +1061,7 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) continue; } bytes &chars = allsuffixes[i]; - uint size3 = suffix * 3; // max Utf8 length + uint32_t size3 = suffix * 3; // max Utf8 length bool isMalloc = (suffix > SMALL); if (isMalloc) { @@ -1095,7 +1077,7 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) } chars.set(charbuf.grow(size3 + 1), size3); } - CHECK; + byte *chp = chars.ptr; for (int j = 0; j < suffix; j++) { @@ -1106,7 +1088,6 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) if (isMalloc) { chars.realloc(chp - chars.ptr); - CHECK; tmallocs.add(chars.ptr); // free it later } else @@ -1131,8 +1112,7 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) int prefix = (i < PREFIX_SKIP_2) ? 0 : cp_Utf8_prefix.getInt(); if (prefix < 0 || prefix + suffix < 0) { - abort("bad utf8 prefix"); - return; + unpack_abort("bad utf8 prefix"); } bytes &chars = allsuffixes[i]; if (suffix == 0 && i >= SUFFIX_SKIP_1) @@ -1161,7 +1141,7 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) if (chars.ptr != nullptr) continue; // already input int suffix = (int)chars.len; // pick up the hack - uint size3 = suffix * 3; + uint32_t size3 = suffix * 3; if (suffix == 0) continue; // done with empty string chars.malloc(size3); @@ -1174,7 +1154,6 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) chp = store_Utf8_char(chp, ch); } chars.realloc(chp - chars.ptr); - CHECK; tmallocs.add(chars.ptr); // free it later // cp_Utf8_big_chars.done(); cp_Utf8_big_chars = saved_band; // reset the band for the next string @@ -1185,9 +1164,8 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) // Finally, sew together all the prefixes and suffixes. bytes bigbuf; bigbuf.malloc(maxlen * 3 + 1); // max Utf8 length, plus slop for nullptr - CHECK; - int prevlen = 0; // previous string length (in chars) - tmallocs.add(bigbuf.ptr); // free after this block + int prevlen = 0; // previous string length (in chars) + tmallocs.add(bigbuf.ptr); // free after this block cp_Utf8_prefix.rewind(); for (i = 0; i < len; i++) { @@ -1199,7 +1177,7 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) // make sure the prefix value is not corrupted, though: if (prefix > prevlen) { - abort("utf8 prefix overflow"); + unpack_abort("utf8 prefix overflow"); return; } fillp = skip_Utf8_chars(bigbuf.ptr, prefix); @@ -1211,7 +1189,6 @@ void unpacker::read_Utf8_values(entry *cpMap, int len) bytes &value = cpMap[i].value.b; value.set(U_NEW(byte, add_size(length, 1)), length); value.copyFrom(bigbuf.ptr, length); - CHECK; // Index all Utf8 strings entry *&htref = cp.hashTabRef(CONSTANT_Utf8, value); if (htref == nullptr) @@ -1256,14 +1233,12 @@ void unpacker::read_single_refs(band &cp_band, byte refTag, entry *cpMap, int le assert(refTag == CONSTANT_Utf8); cp_band.setIndexByTag(refTag); cp_band.readData(len); - CHECK; int indexTag = (cp_band.bn == e_cp_Class) ? CONSTANT_Class : 0; for (int i = 0; i < len; i++) { entry &e = cpMap[i]; e.refs = U_NEW(entry *, e.nrefs = 1); entry *utf = cp_band.getRef(); - CHECK; e.refs[0] = utf; e.value.b = utf->value.b; // copy value of Utf8 string to self if (indexTag != 0) @@ -1290,14 +1265,12 @@ void unpacker::read_double_refs(band &cp_band, byte ref1Tag, byte ref2Tag, entry cp_band2.setIndexByTag(ref2Tag); cp_band1.readData(len); cp_band2.readData(len); - CHECK; for (int i = 0; i < len; i++) { entry &e = cpMap[i]; e.refs = U_NEW(entry *, e.nrefs = 2); e.refs[0] = cp_band1.getRef(); e.refs[1] = cp_band2.getRef(); - CHECK; } // cp_band1.done(); // cp_band2.done(); @@ -1308,14 +1281,12 @@ void unpacker::read_signature_values(entry *cpMap, int len) { cp_Signature_form.setIndexByTag(CONSTANT_Utf8); cp_Signature_form.readData(len); - CHECK; int ncTotal = 0; int i; for (i = 0; i < len; i++) { entry &e = cpMap[i]; entry &form = *cp_Signature_form.getRef(); - CHECK; int nc = 0; for (const char *ncp = form.utf8String(); *ncp; ncp++) @@ -1326,7 +1297,6 @@ void unpacker::read_signature_values(entry *cpMap, int len) ncTotal += nc; e.refs = U_NEW(entry *, cpMap[i].nrefs = 1 + nc); - CHECK; e.refs[0] = &form; } // cp_Signature_form.done(); @@ -1338,7 +1308,6 @@ void unpacker::read_signature_values(entry *cpMap, int len) for (int j = 1; j < e.nrefs; j++) { e.refs[j] = cp_Signature_classes.getRef(); - CHECK; } } // cp_Signature_classes.done(); @@ -1410,19 +1379,16 @@ void unpacker::read_cp() assert(false); break; } - CHECK; } cp.expandSignatures(); - CHECK; cp.initMemberIndexes(); - CHECK; #define SNAME(n, s) #s "\0" const char *symNames = (ALL_ATTR_DO(SNAME) ""); #undef SNAME - for (int sn = 0; sn < cpool::s_LIMIT; sn++) + for (int sn = 0; sn < constant_pool::s_LIMIT; sn++) { assert(symNames[0] >= '0' && symNames[0] <= 'Z'); // sanity bytes name; @@ -1469,7 +1435,6 @@ unpacker::attr_definitions::defineLayout(int idx, entry *nameEntry, const char * { const char *name = nameEntry->value.b.strval(); layout_definition *lo = defineLayout(idx, name, layout); - CHECK_0; lo->nameEntry = nameEntry; return lo; } @@ -1482,10 +1447,10 @@ unpacker::layout_definition *unpacker::attr_definitions::defineLayout(int idx, c { // Fixed attr. if (idx >= (int)flag_limit) - abort("attribute index too large"); + unpack_abort("attribute index too large"); if (isRedefined(idx)) - abort("redefined attribute index"); - redef |= ((julong)1 << idx); + unpack_abort("redefined attribute index"); + redef |= ((uint64_t)1 << idx); } else { @@ -1493,7 +1458,6 @@ unpacker::layout_definition *unpacker::attr_definitions::defineLayout(int idx, c overflow_count.add(0); // make a new counter } layout_definition *lo = U_NEW(layout_definition, 1); - CHECK_0; lo->idx = idx; lo->name = name; lo->layout = layout; @@ -1501,7 +1465,6 @@ unpacker::layout_definition *unpacker::attr_definitions::defineLayout(int idx, c { layouts.add(nullptr); } - CHECK_0; layouts.get(idx) = lo; return lo; } @@ -1522,13 +1485,11 @@ band **unpacker::attr_definitions::buildBands(unpacker::layout_definition *lo) bands_made = 0x10000; // base number for bands made const char *lp = lo->layout; lp = parseLayout(lp, lo->elems, -1); - CHECK_0; if (lp[0] != '\0' || band_stack.length() > 0) { - abort("garbage at end of layout"); + unpack_abort("garbage at end of layout"); } band_stack.popTo(0); - CHECK_0; // Fix up callables to point at their callees. band **bands = lo->elems; @@ -1540,7 +1501,7 @@ band **unpacker::attr_definitions::buildBands(unpacker::layout_definition *lo) { if (bands[num_callables]->le_kind != EK_CBLE) { - abort("garbage mixed with callables"); + unpack_abort("garbage mixed with callables"); break; } num_callables += 1; @@ -1554,7 +1515,7 @@ band **unpacker::attr_definitions::buildBands(unpacker::layout_definition *lo) int call_num = call.le_len; if (call_num < 0 || call_num >= num_callables) { - abort("bad call in layout"); + unpack_abort("bad call in layout"); break; } band &cble = *bands[call_num]; @@ -1636,7 +1597,6 @@ const char *unpacker::attr_definitions::parseIntLayout(const char *lp, band *&re { const char *lp0 = lp; band *b = U_NEW(band, 1); - CHECK_(lp); char le = *lp++; int spec = UNSIGNED5_spec; if (le == 'S' && can_be_signed) @@ -1667,7 +1627,7 @@ const char *unpacker::attr_definitions::parseIntLayout(const char *lp, band *&re le_len = 0; break; default: - abort("bad layout element"); + unpack_abort("bad layout element"); } b->le_len = le_len; band_stack.add(b); @@ -1704,15 +1664,13 @@ const char *unpacker::attr_definitions::parseNumeral(const char *lp, int &res) } if (lp == dp) { - abort("missing numeral in layout"); - return ""; + unpack_abort("missing numeral in layout"); } lp = dp; if (con < 0 && !(sgn && con == -con)) { // (Portability note: Misses the error if int is not 32 bits.) - abort("numeral overflow"); - return ""; + unpack_abort("numeral overflow"); } if (sgn) con = -con; @@ -1732,7 +1690,6 @@ band **unpacker::attr_definitions::popBody(int bs_base) { int nb = bs_limit - bs_base; band **res = U_NEW(band *, add_size(nb, 1)); - CHECK_(no_bands); for (int i = 0; i < nb; i++) { band *b = (band *)band_stack.get(bs_base + i); @@ -1789,12 +1746,11 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, b->le_bci = EK_BCO; b->defc = coding::findBySpec(BRANCH5_spec); break; - case 'N': // replication: 'N' uint '[' elem ... ']' + case 'N': // replication: 'N' uint32_t '[' elem ... ']' lp = parseIntLayout(lp, b, EK_REPL); assert(*lp == '['); ++lp; lp = parseLayout(lp, b->le_body, curCble); - CHECK_(lp); break; case 'T': // union: 'T' any_int union_case* '(' ')' '[' body ']' lp = parseIntLayout(lp, b, EK_UN, can_be_signed); @@ -1803,13 +1759,12 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, for (;;) { // for each case band &k_case = *U_NEW(band, 1); - CHECK_(lp); band_stack.add(&k_case); k_case.le_kind = EK_CASE; k_case.bn = bands_made++; if (*lp++ != '(') { - abort("bad union case"); + unpack_abort("bad union case"); return ""; } if (*lp++ != ')') @@ -1827,19 +1782,19 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, // new in version 160, allow (1-5) for (1,2,3,4,5) if (u->majver < JAVA6_PACKAGE_MAJOR_VERSION) { - abort("bad range in union case label (old archive format)"); + unpack_abort( + "bad range in union case label (old archive format)"); return ""; } int caselimit = caseval; lp++; lp = parseNumeral(lp, caselimit); if (caseval >= caselimit || - (uint)(caselimit - caseval) > 0x10000) + (uint32_t)(caselimit - caseval) > 0x10000) { // Note: 0x10000 is arbitrary implementation restriction. // We can remove it later if it's important to. - abort("bad range in union case label"); - return ""; + unpack_abort("bad range in union case label"); } for (;;) { @@ -1855,13 +1810,11 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, } if (*lp++ != ')') { - abort("bad case label"); - return ""; + unpack_abort("bad case label"); } // save away the case labels int ntags = band_stack.length() - case_base; int *tags = U_NEW(int, add_size(ntags, 1)); - CHECK_(lp); k_case.le_casetags = tags; *tags++ = ntags; for (int i = 0; i < ntags; i++) @@ -1869,13 +1822,11 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, *tags++ = ptrlowbits(band_stack.get(case_base + i)); } band_stack.popTo(case_base); - CHECK_(lp); } // Got le_casetags. Now grab the body. assert(*lp == '['); ++lp; lp = parseLayout(lp, k_case.le_body, curCble); - CHECK_(lp); if (k_case.le_casetags == nullptr) break; // done } @@ -1885,7 +1836,6 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, case '(': // call: '(' -?NN* ')' { band &call = *U_NEW(band, 1); - CHECK_(lp); band_stack.add(&call); call.le_kind = EK_CALL; call.bn = bands_made++; @@ -1896,11 +1846,9 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, call_num += curCble; // numeral is self-relative offset call.le_len = call_num; // use le_len as scratch calls_to_link.add(&call); - CHECK_(lp); if (*lp++ != ')') { - abort("bad call label"); - return ""; + unpack_abort("bad call label"); } } break; @@ -1964,7 +1912,7 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, } if (ixTag == CONSTANT_None) { - abort("bad reference layout"); + unpack_abort("bad reference layout"); break; } bool nullOK = false; @@ -1983,12 +1931,11 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, // [callable1][callable2]... if (!top_level) { - abort("bad nested callable"); + unpack_abort("bad nested callable"); break; } curCble += 1; band &cble = *U_NEW(band, 1); - CHECK_(lp); band_stack.add(&cble); cble.le_kind = EK_CBLE; cble.bn = bands_made++; @@ -2005,10 +1952,8 @@ const char *unpacker::attr_definitions::parseLayout(const char *lp, band **&res, done = true; break; default: - abort("bad layout"); - break; + unpack_abort("bad layout"); } - CHECK_(lp); } // Return the accumulated bands: @@ -2095,10 +2040,8 @@ void unpacker::read_attr_defs() attr_definition_name.readData(attr_definition_count); attr_definition_layout.readData(attr_definition_count); - CHECK; - // Initialize correct predef bits, to distinguish predefs from new defs. -#define ORBIT(n, s) | ((julong)1 << n) +#define ORBIT(n, s) | ((uint64_t)1 << n) attr_defs[ATTR_CONTEXT_CLASS].predef = (0 X_ATTR_DO(ORBIT) CLASS_ATTR_DO(ORBIT)); attr_defs[ATTR_CONTEXT_FIELD].predef = (0 X_ATTR_DO(ORBIT) FIELD_ATTR_DO(ORBIT)); attr_defs[ATTR_CONTEXT_METHOD].predef = (0 X_ATTR_DO(ORBIT) METHOD_ATTR_DO(ORBIT)); @@ -2120,7 +2063,6 @@ void unpacker::read_attr_defs() int idx = ADH_BYTE_INDEX(header); entry *name = attr_definition_name.getRef(); entry *layout = attr_definition_layout.getRef(); - CHECK; attr_defs[attrc].defineLayout(idx, name, layout->value.b.strval()); } } @@ -2160,7 +2102,7 @@ static int lastIndexOf(int chmin, int chmax, bytes &x, int pos) return -1; } -inner_class *cpool::getIC(entry *inner) +inner_class *constant_pool::getIC(entry *inner) { if (inner == nullptr) return nullptr; @@ -2172,7 +2114,7 @@ inner_class *cpool::getIC(entry *inner) return ic; } -inner_class *cpool::getFirstChildIC(entry *outer) +inner_class *constant_pool::getFirstChildIC(entry *outer) { if (outer == nullptr) return nullptr; @@ -2184,7 +2126,7 @@ inner_class *cpool::getFirstChildIC(entry *outer) return ic; } -inner_class *cpool::getNextChildIC(inner_class *child) +inner_class *constant_pool::getNextChildIC(inner_class *child) { inner_class *ic = child->next_sibling; assert(ic == nullptr || ic->outer == child->outer); @@ -2202,7 +2144,6 @@ void unpacker::read_ics() ics = U_NEW(inner_class, ic_count); ic_this_class.readData(ic_count); ic_flags.readData(ic_count); - CHECK; // Scan flags to get count of long-form bands. int long_forms = 0; for (i = 0; i < ic_count; i++) @@ -2215,12 +2156,11 @@ void unpacker::read_ics() } flags &= ~ACC_IC_LONG_FORM; entry *inner = ic_this_class.getRef(); - CHECK; - uint inord = inner->inord; - assert(inord < (uint)cp.tag_count[CONSTANT_Class]); + uint32_t inord = inner->inord; + assert(inord < (uint32_t)cp.tag_count[CONSTANT_Class]); if (ic_index[inord] != nullptr) { - abort("identical inner class"); + unpack_abort("identical inner class"); break; } ic_index[inord] = &ics[i]; @@ -2228,7 +2168,6 @@ void unpacker::read_ics() ics[i].flags = flags; assert(cp.getIC(inner) == &ics[i]); } - CHECK; // ic_this_class.done(); // ic_flags.done(); ic_outer_class.readData(long_forms); @@ -2256,8 +2195,7 @@ void unpacker::read_ics() dollar2 = lastIndexOf(DOLLAR_MIN, DOLLAR_MAX, n, nlen); if (dollar2 < 0) { - abort(); - return; + unpack_abort(); } assert(dollar2 >= pkglen); if (isDigitString(n, dollar2 + 1, nlen)) @@ -2296,10 +2234,10 @@ void unpacker::read_ics() // update child/sibling list if (ics[i].outer != nullptr) { - uint outord = ics[i].outer->inord; + uint32_t outord = ics[i].outer->inord; if (outord != NO_INORD) { - assert(outord < (uint)cp.tag_count[CONSTANT_Class]); + assert(outord < (uint32_t)cp.tag_count[CONSTANT_Class]); ics[i].next_sibling = ic_child_index[outord]; ic_child_index[outord] = &ics[i]; } @@ -2316,8 +2254,6 @@ void unpacker::read_classes() class_interface_count.readData(class_count); class_interface.readData(class_interface_count.getIntTotal()); - CHECK; - #if 0 int i; // Make a little mark on super-classes. @@ -2332,27 +2268,18 @@ void unpacker::read_classes() class_field_count.readData(class_count); class_method_count.readData(class_count); - CHECK; - int field_count = class_field_count.getIntTotal(); int method_count = class_method_count.getIntTotal(); field_descr.readData(field_count); read_attrs(ATTR_CONTEXT_FIELD, field_count); - CHECK; - method_descr.readData(method_count); read_attrs(ATTR_CONTEXT_METHOD, method_count); - - CHECK; - read_attrs(ATTR_CONTEXT_CLASS, class_count); - CHECK; - read_code_headers(); } -int unpacker::attr_definitions::predefCount(uint idx) +int unpacker::attr_definitions::predefCount(uint32_t idx) { return isPredefined(idx) ? flag_count[idx] : 0; } @@ -2364,27 +2291,23 @@ void unpacker::read_attrs(int attrc, int obj_count) int i, idx, count; - CHECK; - bool haveLongFlags = ad.haveLongFlags(); band &xxx_flags_hi = ad.xxx_flags_hi(); if (haveLongFlags) xxx_flags_hi.readData(obj_count); - CHECK; band &xxx_flags_lo = ad.xxx_flags_lo(); xxx_flags_lo.readData(obj_count); - CHECK; // pre-scan flags, counting occurrences of each index bit - julong indexMask = ad.flagIndexMask(); // which flag bits are index bits? + uint64_t indexMask = ad.flagIndexMask(); // which flag bits are index bits? for (i = 0; i < obj_count; i++) { - julong indexBits = xxx_flags_hi.getLong(xxx_flags_lo, haveLongFlags); + uint64_t indexBits = xxx_flags_hi.getLong(xxx_flags_lo, haveLongFlags); if ((indexBits & ~indexMask) > (ushort) - 1) { - abort("undefined attribute flag bit"); + unpack_abort("undefined attribute flag bit"); return; } indexBits &= indexMask; // ignore classfile flag bits @@ -2400,19 +2323,17 @@ void unpacker::read_attrs(int attrc, int obj_count) band &xxx_attr_count = ad.xxx_attr_count(); // There is one count element for each 1<<16 bit set in flags: xxx_attr_count.readData(ad.predefCount(X_ATTR_OVERFLOW)); - CHECK; band &xxx_attr_indexes = ad.xxx_attr_indexes(); int overflowIndexCount = xxx_attr_count.getIntTotal(); xxx_attr_indexes.readData(overflowIndexCount); - CHECK; // pre-scan attr indexes, counting occurrences of each value for (i = 0; i < overflowIndexCount; i++) { idx = xxx_attr_indexes.getInt(); if (!ad.isIndex(idx)) { - abort("attribute index out of bounds"); + unpack_abort("attribute index out of bounds"); return; } ad.getCount(idx) += 1; @@ -2428,7 +2349,6 @@ void unpacker::read_attrs(int attrc, int obj_count) { // Build the bands lazily, only when they are used. band **bands = ad.buildBands(lo); - CHECK; if (lo->hasCallables()) { for (i = 0; bands[i] != nullptr; i++) @@ -2443,7 +2363,6 @@ void unpacker::read_attrs(int attrc, int obj_count) } } ad.xxx_attr_calls().readData(backwardCounts); - CHECK; // Read built-in bands. // Mostly, these are hand-coded equivalents to readBandData(). @@ -2453,53 +2372,44 @@ void unpacker::read_attrs(int attrc, int obj_count) count = ad.predefCount(CLASS_ATTR_SourceFile); class_SourceFile_RUN.readData(count); - CHECK; count = ad.predefCount(CLASS_ATTR_EnclosingMethod); class_EnclosingMethod_RC.readData(count); class_EnclosingMethod_RDN.readData(count); - CHECK; count = ad.predefCount(X_ATTR_Signature); class_Signature_RS.readData(count); - CHECK; ad.readBandData(X_ATTR_RuntimeVisibleAnnotations); ad.readBandData(X_ATTR_RuntimeInvisibleAnnotations); count = ad.predefCount(CLASS_ATTR_InnerClasses); class_InnerClasses_N.readData(count); - CHECK; count = class_InnerClasses_N.getIntTotal(); class_InnerClasses_RC.readData(count); class_InnerClasses_F.readData(count); - CHECK; + // Drop remaining columns wherever flags are zero: count -= class_InnerClasses_F.getIntCount(0); class_InnerClasses_outer_RCN.readData(count); class_InnerClasses_name_RUN.readData(count); - CHECK; count = ad.predefCount(CLASS_ATTR_ClassFile_version); class_ClassFile_version_minor_H.readData(count); class_ClassFile_version_major_H.readData(count); - CHECK; break; case ATTR_CONTEXT_FIELD: count = ad.predefCount(FIELD_ATTR_ConstantValue); field_ConstantValue_KQ.readData(count); - CHECK; count = ad.predefCount(X_ATTR_Signature); field_Signature_RS.readData(count); - CHECK; ad.readBandData(X_ATTR_RuntimeVisibleAnnotations); ad.readBandData(X_ATTR_RuntimeInvisibleAnnotations); - CHECK; break; case ATTR_CONTEXT_METHOD: @@ -2511,18 +2421,15 @@ void unpacker::read_attrs(int attrc, int obj_count) method_Exceptions_N.readData(count); count = method_Exceptions_N.getIntTotal(); method_Exceptions_RC.readData(count); - CHECK; count = ad.predefCount(X_ATTR_Signature); method_Signature_RS.readData(count); - CHECK; ad.readBandData(X_ATTR_RuntimeVisibleAnnotations); ad.readBandData(X_ATTR_RuntimeInvisibleAnnotations); ad.readBandData(METHOD_ATTR_RuntimeVisibleParameterAnnotations); ad.readBandData(METHOD_ATTR_RuntimeInvisibleParameterAnnotations); ad.readBandData(METHOD_ATTR_AnnotationDefault); - CHECK; break; case ATTR_CONTEXT_CODE: @@ -2531,14 +2438,12 @@ void unpacker::read_attrs(int attrc, int obj_count) // disable this feature in old archives! if (count != 0 && majver < JAVA6_PACKAGE_MAJOR_VERSION) { - abort("undefined StackMapTable attribute (old archive format)"); + unpack_abort("undefined StackMapTable attribute (old archive format)"); return; } code_StackMapTable_N.readData(count); - CHECK; count = code_StackMapTable_N.getIntTotal(); code_StackMapTable_frame_T.readData(count); - CHECK; // the rest of it depends in a complicated way on frame tags { int fat_frame_count = 0; @@ -2583,23 +2488,18 @@ void unpacker::read_attrs(int attrc, int obj_count) // deal completely with fat frames: offset_count += fat_frame_count; code_StackMapTable_local_N.readData(fat_frame_count); - CHECK; type_count += code_StackMapTable_local_N.getIntTotal(); code_StackMapTable_stack_N.readData(fat_frame_count); type_count += code_StackMapTable_stack_N.getIntTotal(); - CHECK; // read the rest: code_StackMapTable_offset.readData(offset_count); code_StackMapTable_T.readData(type_count); - CHECK; // (7) [RCH] count = code_StackMapTable_T.getIntCount(7); code_StackMapTable_RC.readData(count); - CHECK; // (8) [PH] count = code_StackMapTable_T.getIntCount(8); code_StackMapTable_P.readData(count); - CHECK; } count = ad.predefCount(CODE_ATTR_LineNumberTable); @@ -2644,7 +2544,7 @@ void unpacker::read_attrs(int attrc, int obj_count) void unpacker::attr_definitions::readBandData(int idx) { int j; - uint count = getCount(idx); + uint32_t count = getCount(idx); if (count == 0) return; layout_definition *lo = getLayout(idx); @@ -2674,12 +2574,12 @@ void unpacker::attr_definitions::readBandData(int idx) } } // Now consult whichever callables have non-zero entry counts. - readBandData(bands, (uint) - 1); + readBandData(bands, (uint32_t) - 1); } } // Recursive helper to the previous function: -void unpacker::attr_definitions::readBandData(band **body, uint count) +void unpacker::attr_definitions::readBandData(band **body, uint32_t count) { int j, k; for (j = 0; body[j] != nullptr; j++) @@ -2778,7 +2678,7 @@ void unpacker::putlayout(band **body) int prevBCI = -1; if (body == NULL) { - abort("putlayout: unexpected NULL for body"); + unpack_abort("putlayout: unexpected NULL for body"); return; } for (i = 0; body[i] != nullptr; i++) @@ -2959,7 +2859,6 @@ void unpacker::get_code_header(int &max_stack, int &max_na_locals, int &handler_ void unpacker::read_code_headers() { code_headers.readData(code_count); - CHECK; int totalHandlerCount = 0; int totalFlagsCount = 0; for (int i = 0; i < code_count; i++) @@ -2983,7 +2882,6 @@ void unpacker::read_code_headers() code_max_na_locals.readData(); code_handler_count.readData(); totalHandlerCount += code_handler_count.getIntTotal(); - CHECK; // Read handler specifications. // Cf. PackageReader.readCodeHandlers. @@ -2991,13 +2889,11 @@ void unpacker::read_code_headers() code_handler_end_PO.readData(totalHandlerCount); code_handler_catch_PO.readData(totalHandlerCount); code_handler_class_RCN.readData(totalHandlerCount); - CHECK; read_attrs(ATTR_CONTEXT_CODE, totalFlagsCount); - CHECK; } -static inline bool is_in_range(uint n, uint min, uint max) +static inline bool is_in_range(uint32_t n, uint32_t min, uint32_t max) { return n - min <= max - min; // unsigned arithmetic! } @@ -3095,7 +2991,6 @@ unpacker::read_bcs() // read from bc_codes and bc_case_count fillbytes all_switch_ops; all_switch_ops.init(); - CHECK; // Read directly from rp/rplimit. // Do this later: bc_codes.readData(...) @@ -3121,8 +3016,7 @@ unpacker::read_bcs() } if (opptr == oplimit) { - abort(); - break; + unpack_abort(); } int bc = *opptr++ & 0xFF; bool isWide = false; @@ -3130,8 +3024,7 @@ unpacker::read_bcs() { if (opptr == oplimit) { - abort(); - break; + unpack_abort(); } bc = *opptr++ & 0xFF; isWide = true; @@ -3208,8 +3101,6 @@ unpacker::read_bcs() doneScanningMethod: { } - if (aborting()) - break; } // Go through the formality, so we can use it in a regular fashion later: @@ -3245,7 +3136,6 @@ void unpacker::read_bands() byte *rp0 = rp; read_file_header(); - CHECK; if (cp.nentries == 0) { @@ -3257,32 +3147,27 @@ void unpacker::read_bands() check_options(); read_cp(); - CHECK; read_attr_defs(); - CHECK; read_ics(); - CHECK; read_classes(); - CHECK; read_bcs(); - CHECK; read_files(); } /// CP routines -entry *&cpool::hashTabRef(byte tag, bytes &b) +entry *&constant_pool::hashTabRef(byte tag, bytes &b) { - uint hash = tag + (int)b.len; + uint32_t hash = tag + (int)b.len; for (int i = 0; i < (int)b.len; i++) { hash = hash * 31 + (0xFF & b.ptr[i]); } entry **ht = hashTab; int hlen = hashTabLength; - assert((hlen & (hlen - 1)) == 0); // must be power of 2 - uint hash1 = hash & (hlen - 1); // == hash % hlen - uint hash2 = 0; // lazily computed (requires mod op.) + assert((hlen & (hlen - 1)) == 0); // must be power of 2 + uint32_t hash1 = hash & (hlen - 1); // == hash % hlen + uint32_t hash2 = 0; // lazily computed (requires mod op.) int probes = 0; while (ht[hash1] != nullptr) { @@ -3293,9 +3178,9 @@ entry *&cpool::hashTabRef(byte tag, bytes &b) // Note: hash2 must be relatively prime to hlen, hence the "|1". hash2 = (((hash % 499) & (hlen - 1)) | 1); hash1 += hash2; - if (hash1 >= (uint)hlen) + if (hash1 >= (uint32_t)hlen) hash1 -= hlen; - assert(hash1 < (uint)hlen); + assert(hash1 < (uint32_t)hlen); assert(++probes < hlen); } return ht[hash1]; @@ -3310,7 +3195,7 @@ static void insert_extra(entry *e, ptrlist &extras) // Note: We will sort the list (by string-name) later. } -entry *cpool::ensureUtf8(bytes &b) +entry *constant_pool::ensureUtf8(bytes &b) { entry *&ix = hashTabRef(CONSTANT_Utf8, b); if (ix != nullptr) @@ -3318,7 +3203,7 @@ entry *cpool::ensureUtf8(bytes &b) // Make one. if (nentries == maxentries) { - abort("cp utf8 overflow"); + unpack_abort("cp utf8 overflow"); return &entries[tag_base[CONSTANT_Utf8]]; // return something } entry &e = entries[nentries++]; @@ -3329,7 +3214,7 @@ entry *cpool::ensureUtf8(bytes &b) return ix = &e; } -entry *cpool::ensureClass(bytes &b) +entry *constant_pool::ensureClass(bytes &b) { entry *&ix = hashTabRef(CONSTANT_Class, b); if (ix != nullptr) @@ -3337,7 +3222,7 @@ entry *cpool::ensureClass(bytes &b) // Make one. if (nentries == maxentries) { - abort("cp class overflow"); + unpack_abort("cp class overflow"); return &entries[tag_base[CONSTANT_Class]]; // return something } entry &e = entries[nentries++]; @@ -3353,7 +3238,7 @@ entry *cpool::ensureClass(bytes &b) return &e; } -void cpool::expandSignatures() +void constant_pool::expandSignatures() { int i; int nsigs = 0; @@ -3362,7 +3247,6 @@ void cpool::expandSignatures() int sig_limit = tag_count[CONSTANT_Signature] + first_sig; fillbytes buf; buf.init(1 << 10); - CHECK; for (i = first_sig; i < sig_limit; i++) { entry &e = entries[i]; @@ -3418,7 +3302,7 @@ void cpool::expandSignatures() } } -void cpool::initMemberIndexes() +void constant_pool::initMemberIndexes() { // This function does NOT refer to any class schema. // It is totally internal to the cpool. @@ -3489,7 +3373,7 @@ void cpool::initMemberIndexes() u->free_temps(); } -void entry::requestOutputIndex(cpool &cp, int req) +void entry::requestOutputIndex(constant_pool &cp, int req) { assert(outputIndex <= NOT_REQUESTED); // must not have assigned indexes yet if (tag == CONSTANT_Signature) @@ -3514,7 +3398,7 @@ void entry::requestOutputIndex(cpool &cp, int req) } } -void cpool::resetOutputIndexes() +void constant_pool::resetOutputIndexes() { int i; int noes = outputEntries.length(); @@ -3571,7 +3455,7 @@ extern "C" int outputEntry_cmp(const void *e1p, const void *e2p) return compare_Utf8_chars(e1.value.b, e2.value.b); } -void cpool::computeOutputIndexes() +void constant_pool::computeOutputIndexes() { int i; @@ -3596,114 +3480,9 @@ void cpool::computeOutputIndexes() } // Unpacker Start - -const char str_tf[] = "true\0false"; -#undef STR_TRUE -#undef STR_FALSE -#define STR_TRUE (&str_tf[0]) -#define STR_FALSE (&str_tf[5]) - -const char *unpacker::get_option(const char *prop) -{ - if (prop == nullptr) - return nullptr; - if (strcmp(prop, UNPACK_DEFLATE_HINT) == 0) - { - return deflate_hint_or_zero == 0 ? nullptr : STR_TF(deflate_hint_or_zero > 0); -#ifdef HAVE_STRIP - } - else if (strcmp(prop, UNPACK_STRIP_COMPILE) == 0) - { - return STR_TF(strip_compile); - } - else if (strcmp(prop, UNPACK_STRIP_DEBUG) == 0) - { - return STR_TF(strip_debug); - } - else if (strcmp(prop, UNPACK_STRIP_JCOV) == 0) - { - return STR_TF(strip_jcov); -#endif /*HAVE_STRIP*/ - } - else if (strcmp(prop, UNPACK_REMOVE_PACKFILE) == 0) - { - return STR_TF(remove_packfile); - } - else if (strcmp(prop, DEBUG_VERBOSE) == 0) - { - return saveIntStr(verbose); - } - else if (strcmp(prop, UNPACK_MODIFICATION_TIME) == 0) - { - return (modification_time_or_zero == 0) ? nullptr - : saveIntStr(modification_time_or_zero); - } - else - { - return NULL; // unknown option ignore - } -} - -bool unpacker::set_option(const char *prop, const char *value) -{ - if (prop == NULL) - return false; - if (strcmp(prop, UNPACK_DEFLATE_HINT) == 0) - { - deflate_hint_or_zero = - ((value == nullptr || strcmp(value, "keep") == 0) ? 0 : BOOL_TF(value) ? +1 : -1); -#ifdef HAVE_STRIP - } - else if (strcmp(prop, UNPACK_STRIP_COMPILE) == 0) - { - strip_compile = STR_TF(value); - } - else if (strcmp(prop, UNPACK_STRIP_DEBUG) == 0) - { - strip_debug = STR_TF(value); - } - else if (strcmp(prop, UNPACK_STRIP_JCOV) == 0) - { - strip_jcov = STR_TF(value); -#endif /*HAVE_STRIP*/ - } - else if (strcmp(prop, UNPACK_REMOVE_PACKFILE) == 0) - { - remove_packfile = STR_TF(value); - } - else if (strcmp(prop, DEBUG_VERBOSE) == 0) - { - verbose = (value == nullptr) ? 0 : atoi(value); - } - else if (strcmp(prop, UNPACK_MODIFICATION_TIME) == 0) - { - if (value == nullptr || (strcmp(value, "keep") == 0)) - { - modification_time_or_zero = 0; - } - else if (strcmp(value, "now") == 0) - { - time_t now; - time(&now); - modification_time_or_zero = (int)now; - } - else - { - modification_time_or_zero = atoi(value); - if (modification_time_or_zero == 0) - modification_time_or_zero = 1; // make non-zero - } - } - else - { - return false; // unknown option ignore - } - return true; -} - // Deallocate all internal storage and reset to a clean state. // Do not disturb any input or output connections, including -// infileptr, infileno, inbytes, read_input_fn, jarout, or errstrm. +// infileptr, inbytes, read_input_fn, jarout, or errstrm. // Do not reset any unpack options. void unpacker::reset() { @@ -3737,26 +3516,19 @@ void unpacker::reset() this->free(); this->init(read_input_fn); -// restore selected interface state: -#define SAVE(x) this->x = save_u.x - SAVE(infileptr); // buffered - SAVE(infileno); // unbuffered - SAVE(inbytes); // direct - SAVE(jarout); - SAVE(gzin); - SAVE(verbose); // verbose level, 0 means no output - SAVE(strip_compile); - SAVE(strip_debug); - SAVE(strip_jcov); - SAVE(remove_packfile); - SAVE(deflate_hint_or_zero); // ==0 means not set, otherwise -1 or 1 - SAVE(modification_time_or_zero); - SAVE(bytes_read_before_reset); - SAVE(bytes_written_before_reset); - SAVE(files_written_before_reset); - SAVE(classes_written_before_reset); - SAVE(segments_read_before_reset); -#undef SAVE + // restore selected interface state: + infileptr = save_u.infileptr; + inbytes = save_u.inbytes; + jarout = save_u.jarout; + gzin = save_u.gzin; + verbose = save_u.verbose; + deflate_hint_or_zero = save_u.deflate_hint_or_zero; + modification_time_or_zero = save_u.modification_time_or_zero; + bytes_read_before_reset = save_u.bytes_read_before_reset; + bytes_written_before_reset = save_u.bytes_written_before_reset; + files_written_before_reset = save_u.files_written_before_reset; + classes_written_before_reset = save_u.classes_written_before_reset; + segments_read_before_reset = save_u.segments_read_before_reset; // Note: If we use strip_names, watch out: They get nuked here. } @@ -3774,32 +3546,6 @@ void unpacker::init(read_input_fn_t input_fn) attr_defs[i].u = u; // set up outer ptr } -const char *unpacker::get_abort_message() -{ - return abort_message; -} - -void unpacker::dump_options() -{ - static const char *opts[] = { - UNPACK_DEFLATE_HINT, -#ifdef HAVE_STRIP - UNPACK_STRIP_COMPILE, UNPACK_STRIP_DEBUG, UNPACK_STRIP_JCOV, -#endif /*HAVE_STRIP*/ - UNPACK_REMOVE_PACKFILE, DEBUG_VERBOSE, UNPACK_MODIFICATION_TIME, nullptr}; - for (int i = 0; opts[i] != nullptr; i++) - { - const char *str = get_option(opts[i]); - if (str == nullptr) - { - if (verbose == 0) - continue; - str = "(not set)"; - } - fprintf(stderr, "%s=%s\n", opts[i], str); - } -} - // Usage: unpack a byte buffer // packptr is a reference to byte buffer containing a // packed file and len is the length of the buffer. @@ -3833,7 +3579,6 @@ void unpacker::check_options() // Turn off per-file modtime by force. archive_options &= ~AO_HAVE_FILE_MODTIME; } - // %%% strip_compile, etc... } // classfile writing @@ -3854,7 +3599,7 @@ void unpacker::reset_cur_classfile() requested_ics.empty(); } -cpindex *cpool::getKQIndex() +cpindex *constant_pool::getKQIndex() { char ch = '?'; if (u->cur_descr != nullptr) @@ -3887,21 +3632,21 @@ cpindex *cpool::getKQIndex() tag = CONSTANT_Integer; break; default: - abort("bad KQ reference"); + unpack_abort("bad KQ reference"); break; } return getIndex(tag); } -uint unpacker::to_bci(uint bii) +uint32_t unpacker::to_bci(uint32_t bii) { - uint len = bcimap.length(); - uint *map = (uint *)bcimap.base(); + uint32_t len = bcimap.length(); + uint32_t *map = (uint32_t *)bcimap.base(); assert(len > 0); // must be initialized before using to_bci if (bii < len) return map[bii]; // Else it's a fractional or out-of-range BCI. - uint key = bii - len; + uint32_t key = bii - len; for (int i = len;; i--) { if (map[i - 1] - (i - 1) <= key) @@ -4060,7 +3805,6 @@ unpacker::write_bc_ops() --wp; // not really part of the code int size = bc_escrefsize.getInt(); entry *ref = bc_escref.getRefN(); - CHECK; switch (size) { case 1: @@ -4114,7 +3858,7 @@ unpacker::write_bc_ops() if (ref == nullptr) break; // oops, bad input assert(ref->tag == CONSTANT_Methodref); - if (ref->memberDescr()->descrName() == cp.sym[cpool::s_lt_init_gt]) + if (ref->memberDescr()->descrName() == cp.sym[constant_pool::s_lt_init_gt]) { if (which_init++ == coding) break; @@ -4139,7 +3883,6 @@ unpacker::write_bc_ops() putu1_fast(origBC); } entry *ref = bc_which->getRef(); - CHECK; putref(ref); continue; } @@ -4162,7 +3905,6 @@ unpacker::write_bc_ops() if (bc_which != nullptr) { entry *ref = bc_which->getRefCommon(bc_which->ix, bc_which->nullOK); - CHECK; if (ref == nullptr && bc_which == &bc_classref) { // Shorthand for class self-references. @@ -4285,7 +4027,6 @@ unpacker::write_code() handler_count = code_handler_count.getInt(); int siglen = cur_descr->descrType()->typeSize(); - CHECK; if ((cur_descr_flags & ACC_STATIC) == 0) siglen++; max_locals += siglen; @@ -4296,7 +4037,6 @@ unpacker::write_code() // Write the bytecodes themselves. write_bc_ops(); - CHECK; byte *bcbasewp = wp_at(bcbase); putu4_at(bcbasewp, (int)(wp - (bcbasewp + 4))); // size of code attr @@ -4311,10 +4051,9 @@ unpacker::write_code() bii += code_handler_catch_PO.getInt(); putu2(to_bci(bii)); putref(code_handler_class_RCN.getRefN()); - CHECK; } - julong indexBits = cflags; + uint64_t indexBits = cflags; if (cflags < 0) { bool haveLongFlags = attr_defs[ATTR_CONTEXT_CODE].haveLongFlags(); @@ -4323,9 +4062,8 @@ unpacker::write_code() write_attrs(ATTR_CONTEXT_CODE, indexBits); } -int unpacker::write_attrs(int attrc, julong indexBits) +int unpacker::write_attrs(int attrc, uint64_t indexBits) { - CHECK_0; if (indexBits == 0) { // Quick short-circuit. @@ -4338,9 +4076,9 @@ int unpacker::write_attrs(int attrc, julong indexBits) int i, j, j2, idx, count; int oiCount = 0; - if (ad.isPredefined(X_ATTR_OVERFLOW) && (indexBits & ((julong)1 << X_ATTR_OVERFLOW)) != 0) + if (ad.isPredefined(X_ATTR_OVERFLOW) && (indexBits & ((uint64_t)1 << X_ATTR_OVERFLOW)) != 0) { - indexBits -= ((julong)1 << X_ATTR_OVERFLOW); + indexBits -= ((uint64_t)1 << X_ATTR_OVERFLOW); oiCount = ad.xxx_attr_count().getInt(); } @@ -4371,7 +4109,6 @@ int unpacker::write_attrs(int attrc, julong indexBits) entry *aname = nullptr; entry *ref; // scratch size_t abase = put_empty(2 + 4); - CHECK_0; if (idx < (int)ad.flag_limit && ad.isPredefined(idx)) { // Switch on the attrc and idx simultaneously. @@ -4396,15 +4133,14 @@ int unpacker::write_attrs(int attrc, julong indexBits) case ADH_BYTE(ATTR_CONTEXT_CLASS, CLASS_ATTR_InnerClasses) : // note the existence of this attr, but save for later if (cur_class_has_local_ics) - abort("too many InnerClasses attrs"); + unpack_abort("too many InnerClasses attrs"); cur_class_has_local_ics = true; wp = wp_at(abase); continue; case ADH_BYTE(ATTR_CONTEXT_CLASS, CLASS_ATTR_SourceFile) : - aname = cp.sym[cpool::s_SourceFile]; + aname = cp.sym[constant_pool::s_SourceFile]; ref = class_SourceFile_RUN.getRefN(); - CHECK_0; if (ref == nullptr) { bytes &n = cur_class->ref(0)->value.b; @@ -4431,23 +4167,23 @@ int unpacker::write_attrs(int attrc, julong indexBits) break; case ADH_BYTE(ATTR_CONTEXT_CLASS, CLASS_ATTR_EnclosingMethod) : - aname = cp.sym[cpool::s_EnclosingMethod]; + aname = cp.sym[constant_pool::s_EnclosingMethod]; putref(class_EnclosingMethod_RC.getRefN()); putref(class_EnclosingMethod_RDN.getRefN()); break; case ADH_BYTE(ATTR_CONTEXT_FIELD, FIELD_ATTR_ConstantValue) : - aname = cp.sym[cpool::s_ConstantValue]; + aname = cp.sym[constant_pool::s_ConstantValue]; putref(field_ConstantValue_KQ.getRefUsing(cp.getKQIndex())); break; case ADH_BYTE(ATTR_CONTEXT_METHOD, METHOD_ATTR_Code) : - aname = cp.sym[cpool::s_Code]; + aname = cp.sym[constant_pool::s_Code]; write_code(); break; case ADH_BYTE(ATTR_CONTEXT_METHOD, METHOD_ATTR_Exceptions) : - aname = cp.sym[cpool::s_Exceptions]; + aname = cp.sym[constant_pool::s_Exceptions]; putu2(count = method_Exceptions_N.getInt()); for (j = 0; j < count; j++) { @@ -4456,7 +4192,7 @@ int unpacker::write_attrs(int attrc, julong indexBits) break; case ADH_BYTE(ATTR_CONTEXT_CODE, CODE_ATTR_StackMapTable) : - aname = cp.sym[cpool::s_StackMapTable]; + aname = cp.sym[constant_pool::s_StackMapTable]; // (keep this code aligned with its brother in unpacker::read_attrs) putu2(count = code_StackMapTable_N.getInt()); for (j = 0; j < count; j++) @@ -4504,7 +4240,7 @@ int unpacker::write_attrs(int attrc, julong indexBits) break; case ADH_BYTE(ATTR_CONTEXT_CODE, CODE_ATTR_LineNumberTable) : - aname = cp.sym[cpool::s_LineNumberTable]; + aname = cp.sym[constant_pool::s_LineNumberTable]; putu2(count = code_LineNumberTable_N.getInt()); for (j = 0; j < count; j++) { @@ -4514,7 +4250,7 @@ int unpacker::write_attrs(int attrc, julong indexBits) break; case ADH_BYTE(ATTR_CONTEXT_CODE, CODE_ATTR_LocalVariableTable) : - aname = cp.sym[cpool::s_LocalVariableTable]; + aname = cp.sym[constant_pool::s_LocalVariableTable]; putu2(count = code_LocalVariableTable_N.getInt()); for (j = 0; j < count; j++) { @@ -4530,7 +4266,7 @@ int unpacker::write_attrs(int attrc, julong indexBits) break; case ADH_BYTE(ATTR_CONTEXT_CODE, CODE_ATTR_LocalVariableTypeTable) : - aname = cp.sym[cpool::s_LocalVariableTypeTable]; + aname = cp.sym[constant_pool::s_LocalVariableTypeTable]; putu2(count = code_LocalVariableTypeTable_N.getInt()); for (j = 0; j < count; j++) { @@ -4546,24 +4282,24 @@ int unpacker::write_attrs(int attrc, julong indexBits) break; case ADH_BYTE(ATTR_CONTEXT_CLASS, X_ATTR_Signature) : - aname = cp.sym[cpool::s_Signature]; + aname = cp.sym[constant_pool::s_Signature]; putref(class_Signature_RS.getRefN()); break; case ADH_BYTE(ATTR_CONTEXT_FIELD, X_ATTR_Signature) : - aname = cp.sym[cpool::s_Signature]; + aname = cp.sym[constant_pool::s_Signature]; putref(field_Signature_RS.getRefN()); break; case ADH_BYTE(ATTR_CONTEXT_METHOD, X_ATTR_Signature) : - aname = cp.sym[cpool::s_Signature]; + aname = cp.sym[constant_pool::s_Signature]; putref(method_Signature_RS.getRefN()); break; case ADH_BYTE(ATTR_CONTEXT_CLASS, X_ATTR_Deprecated) : case ADH_BYTE(ATTR_CONTEXT_FIELD, X_ATTR_Deprecated) : case ADH_BYTE(ATTR_CONTEXT_METHOD, X_ATTR_Deprecated) : - aname = cp.sym[cpool::s_Deprecated]; + aname = cp.sym[constant_pool::s_Deprecated]; // no data break; } @@ -4575,7 +4311,7 @@ int unpacker::write_attrs(int attrc, julong indexBits) layout_definition *lo = ad.getLayout(idx); if (lo == nullptr) { - abort("bad layout index"); + unpack_abort("bad layout index"); break; } assert((int)lo->idx == idx); @@ -4600,8 +4336,7 @@ int unpacker::write_attrs(int attrc, julong indexBits) } if (aname == nullptr) - abort("bad attribute index"); - CHECK_0; + unpack_abort("bad attribute index"); byte *wp1 = wp; wp = wp_at(abase); @@ -4628,7 +4363,6 @@ int unpacker::write_attrs(int attrc, julong indexBits) void unpacker::write_members(int num, int attrc) { - CHECK; attr_definitions &ad = attr_defs[attrc]; band &member_flags_hi = ad.xxx_flags_hi(); band &member_flags_lo = ad.xxx_flags_lo(); @@ -4636,18 +4370,16 @@ void unpacker::write_members(int num, int attrc) bool haveLongFlags = ad.haveLongFlags(); putu2(num); - julong indexMask = attr_defs[attrc].flagIndexMask(); + uint64_t indexMask = attr_defs[attrc].flagIndexMask(); for (int i = 0; i < num; i++) { - julong mflags = member_flags_hi.getLong(member_flags_lo, haveLongFlags); + uint64_t mflags = member_flags_hi.getLong(member_flags_lo, haveLongFlags); entry *mdescr = member_descr.getRef(); cur_descr = mdescr; putu2(cur_descr_flags = (ushort)(mflags & ~indexMask)); - CHECK; putref(mdescr->descrName()); putref(mdescr->descrType()); write_attrs(attrc, (mflags & indexMask)); - CHECK; } cur_descr = nullptr; } @@ -4669,14 +4401,12 @@ void unpacker::write_classfile_tail() attr_definitions &ad = attr_defs[ATTR_CONTEXT_CLASS]; bool haveLongFlags = ad.haveLongFlags(); - julong kflags = class_flags_hi.getLong(class_flags_lo, haveLongFlags); - julong indexMask = ad.flagIndexMask(); + uint64_t kflags = class_flags_hi.getLong(class_flags_lo, haveLongFlags); + uint64_t indexMask = ad.flagIndexMask(); cur_class = class_this.getRef(); cur_super = class_super.getRef(); - CHECK; - if (cur_super == cur_class) cur_super = nullptr; // special representation for java/lang/Object @@ -4693,7 +4423,6 @@ void unpacker::write_classfile_tail() write_members(class_field_count.getInt(), ATTR_CONTEXT_FIELD); write_members(class_method_count.getInt(), ATTR_CONTEXT_METHOD); - CHECK; cur_class_has_local_ics = false; // may be set true by write_attrs @@ -4760,7 +4489,6 @@ void unpacker::write_classfile_tail() { inner_class &extra_ic = extra_ics[i]; extra_ic.inner = class_InnerClasses_RC.getRef(); - CHECK; // Find the corresponding equivalent global IC: inner_class *global_ic = cp.getIC(extra_ic.inner); int flags = class_InnerClasses_F.getInt(); @@ -4769,7 +4497,7 @@ void unpacker::write_classfile_tail() // The extra IC is simply a copy of a global IC. if (global_ic == nullptr) { - abort("bad reference to inner class"); + unpack_abort("bad reference to inner class"); break; } extra_ic = (*global_ic); // fill in rest of fields @@ -4812,7 +4540,7 @@ void unpacker::write_classfile_tail() if (local_ics > 0) { // append the new attribute: - putref(cp.sym[cpool::s_InnerClasses]); + putref(cp.sym[constant_pool::s_InnerClasses]); putu4(2 + 2 * 4 * local_ics); putu2(local_ics); PTRLIST_QSORT(requested_ics, raw_address_cmp); @@ -4832,7 +4560,6 @@ void unpacker::write_classfile_tail() putu2(ic->flags); } } - assert(local_ics == 0); // must balance putu2_at(wp_at(naOffset), ++na); // increment class attr count } @@ -4844,7 +4571,6 @@ void unpacker::write_classfile_tail() } requested_ics.empty(); - CHECK; close_output(); // rewrite CP references in the tail @@ -4868,7 +4594,6 @@ void unpacker::write_classfile_tail() assert(false); // should not reach here } } - CHECK; } void unpacker::write_classfile_head() @@ -4919,7 +4644,7 @@ void unpacker::write_classfile_head() putu2(e.refs[1]->getOutputIndex()); break; default: - abort(ERROR_INTERNAL); + unpack_abort(ERROR_INTERNAL); } } close_output(); @@ -4927,7 +4652,6 @@ void unpacker::write_classfile_head() unpacker::file *unpacker::get_next_file() { - CHECK_0; free_temps(); if (files_remaining == 0) { @@ -4936,9 +4660,9 @@ unpacker::file *unpacker::get_next_file() cur_file.size = 0; if (archive_size != 0) { - julong predicted_size = unsized_bytes_read + archive_size; + uint64_t predicted_size = unsized_bytes_read + archive_size; if (predicted_size != bytes_read) - abort("archive header had incorrect size"); + unpack_abort("archive header had incorrect size"); } return nullptr; } @@ -4953,7 +4677,6 @@ unpacker::file *unpacker::get_next_file() if (files_written < file_count) { entry *e = file_name.getRef(); - CHECK_0; cur_file.name = e->utf8String(); bool haveLongSize = ((archive_options & AO_HAVE_FILE_SIZE_HI) != 0); cur_file.size = file_size_hi.getLong(file_size_lo, haveLongSize); @@ -4973,20 +4696,17 @@ unpacker::file *unpacker::get_next_file() classes_written += 1; if (cur_file.size != 0) { - abort("class file size transmitted"); - return nullptr; + unpack_abort("class file size transmitted"); } reset_cur_classfile(); // write the meat of the classfile: write_classfile_tail(); cur_file.data[1] = cur_classfile_tail.b; - CHECK_0; // write the CP of the classfile, second: write_classfile_head(); cur_file.data[0] = cur_classfile_head.b; - CHECK_0; cur_file.size += cur_file.data[0].len; cur_file.size += cur_file.data[1].len; @@ -5006,8 +4726,7 @@ unpacker::file *unpacker::get_next_file() if (cur_file.size != (size_t)cur_file.size) { // Silly size specified. - abort("resource file too large"); - return nullptr; + unpack_abort("resource file too large"); } size_t rpleft = input_remaining(); if (rpleft > 0) @@ -5024,7 +4743,6 @@ unpacker::file *unpacker::get_next_file() bytes_read += fleft; // Credit it to the overall archive size. } } - CHECK_0; bytes_written += cur_file.size; files_written += 1; return &cur_file; @@ -5034,7 +4752,7 @@ unpacker::file *unpacker::get_next_file() void unpacker::write_file_to_jar(unpacker::file *f) { size_t htsize = f->data[0].len + f->data[1].len; - julong fsize = f->size; + uint64_t fsize = f->size; if (htsize == fsize) { jarout->addJarEntry(f->name, f->deflate_hint(), f->modtime, f->data[0], f->data[1]); @@ -5070,10 +4788,9 @@ void unpacker::write_file_to_jar(unpacker::file *f) input.ensureSize(fleft); } rplimit = rp = input.base(); - CHECK; input.setLimit(rp + fleft); if (!ensure_input(fleft)) - abort("EOF reading resource file"); + unpack_abort("EOF reading resource file"); part2.ptr = input_scan(); part2.len = input_remaining(); rplimit = rp = input.base(); @@ -5085,14 +4802,3 @@ void unpacker::write_file_to_jar(unpacker::file *f) fprintf(stderr, "Wrote " LONG_LONG_FORMAT " bytes to: %s\n", fsize, f->name); } } - -void unpacker::abort(const char *message) -{ - if (message == nullptr) - message = "error unpacking archive"; - if (message[0] == '@') - ++message; - fprintf(stderr, "%s\n", message); - fflush(stderr); - exit(-1); -} diff --git a/depends/pack200/src/unpack.h b/depends/pack200/src/unpack.h index 11f7bbe1..0100700d 100644 --- a/depends/pack200/src/unpack.h +++ b/depends/pack200/src/unpack.h @@ -27,7 +27,7 @@ struct jar; struct gunzip; struct band; -struct cpool; +struct constant_pool; struct entry; struct cpindex; struct inner_class; @@ -35,7 +35,7 @@ struct value_stream; struct cpindex { - uint len; + uint32_t len; entry *base1; // base of primary index entry **base2; // base of secondary index byte ixTag; // type of entries (!= CONSTANT_None), plus 64 if sub-index @@ -44,7 +44,7 @@ struct cpindex SUB_TAG = 64 }; - entry *get(uint i); + entry *get(uint32_t i); void init(int len_, entry *base1_, int ixTag_) { @@ -62,12 +62,12 @@ struct cpindex } }; -struct cpool +struct constant_pool { - uint nentries; + uint32_t nentries; entry *entries; entry *first_extra_entry; - uint maxentries; // total allocated size of entries + uint32_t maxentries; // total allocated size of entries // Position and size of each homogeneous subrange: int tag_count[CONSTANT_Limit]; @@ -89,7 +89,7 @@ struct cpool ptrlist outputEntries; // list of entry* needing output idx assigned entry **hashTab; - uint hashTabLength; + uint32_t hashTabLength; entry *&hashTabRef(byte tag, bytes &b); entry *ensureUtf8(bytes &b); entry *ensureClass(bytes &b); @@ -117,12 +117,12 @@ struct cpool int getCount(byte tag) { - assert((uint)tag < CONSTANT_Limit); + assert((uint32_t)tag < CONSTANT_Limit); return tag_count[tag]; } cpindex *getIndex(byte tag) { - assert((uint)tag < CONSTANT_Limit); + assert((uint32_t)tag < CONSTANT_Limit); return &tag_index[tag]; } cpindex *getKQIndex(); // uses cur_descr @@ -133,10 +133,6 @@ struct cpool void computeOutputOrder(); void computeOutputIndexes(); void resetOutputIndexes(); - - // error handling - inline void abort(const char *msg); - inline bool aborting(); }; /* @@ -149,7 +145,7 @@ struct unpacker struct file { const char *name; - julong size; + uint64_t size; int modtime; int options; bytes data[2]; @@ -161,12 +157,8 @@ struct unpacker } }; - // global pointer to self, if not running under JNI (not multi-thread safe) - static unpacker *non_mt_current; - // if running Unix-style, here are the inputs and outputs FILE *infileptr; // buffered - int infileno; // unbuffered bytes inbytes; // direct gunzip *gzin; // gunzip filter, if any jar *jarout; // output JAR file @@ -174,19 +166,13 @@ struct unpacker // pointer to self, for U_NEW macro unpacker *u; - // private abort message string, allocated to PATH_MAX*2 - const char *abort_message; ptrlist mallocs; // list of guys to free when we are all done ptrlist tmallocs; // list of guys to free on next client request fillbytes smallbuf; // supplies small alloc requests fillbytes tsmallbuf; // supplies temporary small alloc requests // option management members - int verbose; // verbose level, 0 means no output - bool strip_compile; - bool strip_debug; - bool strip_jcov; - bool remove_packfile; + int verbose; // verbose level, 0 means no output int deflate_hint_or_zero; // ==0 means not set, otherwise -1 or 1 int modification_time_or_zero; @@ -196,11 +182,12 @@ struct unpacker bool free_input; // must the input buffer be freed? byte *rp; // read pointer (< rplimit <= input.limit()) byte *rplimit; // how much of the input block has been read? - julong bytes_read; + uint64_t bytes_read; int unsized_bytes_read; // callback to read at least one byte, up to available input - typedef jlong (*read_input_fn_t)(unpacker *self, void *buf, jlong minlen, jlong maxlen); + typedef int64_t (*read_input_fn_t)(unpacker *self, void *buf, int64_t minlen, + int64_t maxlen); read_input_fn_t read_input_fn; // archive header fields @@ -218,7 +205,7 @@ struct unpacker // engine state band *all_bands; // indexed by band_number byte *meta_rp; // read-pointer into (copy of) band_headers - cpool cp; // all constant pool information + constant_pool cp; // all constant pool information inner_class *ics; // InnerClasses // output stream @@ -239,7 +226,7 @@ struct unpacker fillbytes cur_classfile_tail; int files_written; // also tells which file we're working on int classes_written; // also tells which class we're working on - julong bytes_written; + uint64_t bytes_written; intlist bcimap; fillbytes class_fixup_type; intlist class_fixup_offset; @@ -250,8 +237,8 @@ struct unpacker ptrlist requested_ics; // which ics need output? // stats pertaining to multiple segments (updated on reset) - julong bytes_read_before_reset; - julong bytes_written_before_reset; + uint64_t bytes_read_before_reset; + uint64_t bytes_written_before_reset; int files_written_before_reset; int classes_written_before_reset; int segments_read_before_reset; @@ -259,7 +246,7 @@ struct unpacker // attribute state struct layout_definition { - uint idx; // index (0..31...) which identifies this layout + uint32_t idx; // index (0..31...) which identifies this layout const char *name; // name of layout entry *nameEntry; const char *layout; // string of layout (not yet parsed) @@ -280,9 +267,9 @@ struct unpacker unpacker *u; // pointer to self, for U_NEW macro int xxx_flags_hi_bn; // locator for flags, count, indexes, calls bands int attrc; // ATTR_CONTEXT_CLASS, etc. - uint flag_limit; // 32 or 63, depending on archive_options bit - julong predef; // mask of built-in definitions - julong redef; // mask of local flag definitions or redefinitions + uint32_t flag_limit; // 32 or 63, depending on archive_options bit + uint64_t predef; // mask of built-in definitions + uint64_t redef; // mask of local flag definitions or redefinitions ptrlist layouts; // local (compressor-defined) defs, in index order int flag_count[X_ATTR_LIMIT_FLAGS_HI]; intlist overflow_count; @@ -321,12 +308,12 @@ struct unpacker band **popBody(int band_stack_base); // pops a body off band_stack // Read data into the bands of the idx-th layout. - void readBandData(int idx); // parse layout, make bands, read data - void readBandData(band **body, uint count); // recursive helper + void readBandData(int idx); // parse layout, make bands, read data + void readBandData(band **body, uint32_t count); // recursive helper - layout_definition *getLayout(uint idx) + layout_definition *getLayout(uint32_t idx) { - if (idx >= (uint)layouts.length()) + if (idx >= (uint32_t)layouts.length()) return nullptr; return (layout_definition *)layouts.get(idx); } @@ -344,33 +331,33 @@ struct unpacker } // Return flag_count if idx is predef and not redef, else zero. - int predefCount(uint idx); + int predefCount(uint32_t idx); - bool isRedefined(uint idx) + bool isRedefined(uint32_t idx) { if (idx >= flag_limit) return false; return (bool)((redef >> idx) & 1); } - bool isPredefined(uint idx) + bool isPredefined(uint32_t idx) { if (idx >= flag_limit) return false; return (bool)(((predef & ~redef) >> idx) & 1); } - julong flagIndexMask() + uint64_t flagIndexMask() { return (predef | redef); } - bool isIndex(uint idx) + bool isIndex(uint32_t idx) { assert(flag_limit != 0); // must be set up already if (idx < flag_limit) return (bool)(((predef | redef) >> idx) & 1); else - return (idx - flag_limit < (uint)overflow_count.length()); + return (idx - flag_limit < (uint32_t)overflow_count.length()); } - int &getCount(uint idx) + int &getCount(uint32_t idx) { assert(isIndex(idx)); if (idx < flag_limit) @@ -378,14 +365,6 @@ struct unpacker else return overflow_count.get(idx - flag_limit); } - bool aborting() - { - return u->aborting(); - } - void abort(const char *msg) - { - u->abort(msg); - } }; attr_definitions attr_defs[ATTR_CONTEXT_LIMIT]; @@ -407,10 +386,8 @@ struct unpacker bool set_option(const char *option, const char *value); const char *get_option(const char *option); - void dump_options(); - // Fetching input. - bool ensure_input(jlong more); + bool ensure_input(int64_t more); byte *input_scan() { return rp; @@ -473,12 +450,6 @@ struct unpacker sprintf(buf, "%d", num); return saveStr(buf); } - const char *get_abort_message(); - void abort(const char *s = nullptr); - bool aborting() - { - return abort_message != nullptr; - } static unpacker *current(); // find current instance // Output management @@ -514,7 +485,7 @@ struct unpacker } void putu2(int n); // { putu2_at(put_space(2), n); } void putu4(int n); // { putu4_at(put_space(4), n); } - void putu8(jlong n); // { putu8_at(put_space(8), n); } + void putu8(int64_t n); // { putu8_at(put_space(8), n); } void putref(entry *e); // { putu2_at(put_space(2), putref_index(e, 2)); } void putu1ref(entry *e); // { putu1_at(put_space(1), putref_index(e, 1)); } int putref_index(entry *e, int size); // size in [1..2] @@ -530,7 +501,7 @@ struct unpacker { return wpbase + offset; } - uint to_bci(uint bii); + uint32_t to_bci(uint32_t bii); void get_code_header(int &max_stack, int &max_na_locals, int &handler_count, int &cflags); band *ref_band_for_self_op(int bc, bool &isAloadVar, int &origBCVar); band *ref_band_for_op(int bc); @@ -543,7 +514,7 @@ struct unpacker } static void putu2_at(byte *wp, int n); static void putu4_at(byte *wp, int n); - static void putu8_at(byte *wp, jlong n); + static void putu8_at(byte *wp, int64_t n); // Private stuff void reset_cur_classfile(); @@ -552,7 +523,7 @@ struct unpacker void write_code(); void write_bc_ops(); void write_members(int num, int attrc); // attrc=ATTR_CONTEXT_FIELD/METHOD - int write_attrs(int attrc, julong indexBits); + int write_attrs(int attrc, uint64_t indexBits); // The readers void read_bands(); @@ -574,12 +545,3 @@ struct unpacker void read_double_refs(band &cp_band, byte ref1Tag, byte ref2Tag, entry *cpMap, int len); void read_signature_values(entry *cpMap, int len); }; - -inline void cpool::abort(const char *msg) -{ - u->abort(msg); -} -inline bool cpool::aborting() -{ - return u->aborting(); -} diff --git a/depends/pack200/src/unpack200.cpp b/depends/pack200/src/unpack200.cpp new file mode 100644 index 00000000..c6aa0b02 --- /dev/null +++ b/depends/pack200/src/unpack200.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "constants.h" +#include "utils.h" +#include "defines.h" +#include "bytes.h" +#include "coding.h" +#include "unpack200.h" +#include "unpack.h" +#include "zip.h" + +// Callback for fetching data, Unix style. +static int64_t read_input_via_stdio(unpacker *u, void *buf, int64_t minlen, int64_t maxlen) +{ + assert(u->infileptr != nullptr); + assert(minlen <= maxlen); // don't talk nonsense + int64_t numread = 0; + char *bufptr = (char *)buf; + while (numread < minlen) + { + // read available input, up to buf.length or maxlen + int readlen = (1 << 16); + if (readlen > (maxlen - numread)) + readlen = (int)(maxlen - numread); + int nr = 0; + + nr = (int)fread(bufptr, 1, readlen, u->infileptr); + if (nr <= 0) + { + if (errno != EINTR) + break; + nr = 0; + } + numread += nr; + bufptr += nr; + assert(numread <= maxlen); + } + return numread; +} + +enum +{ + EOF_MAGIC = 0, + BAD_MAGIC = -1 +}; + +static int read_magic(unpacker *u, char peek[], int peeklen) +{ + assert(peeklen == 4); // magic numbers are always 4 bytes + int64_t nr = (u->read_input_fn)(u, peek, peeklen, peeklen); + if (nr != peeklen) + { + return (nr == 0) ? EOF_MAGIC : BAD_MAGIC; + } + int magic = 0; + for (int i = 0; i < peeklen; i++) + { + magic <<= 8; + magic += peek[i] & 0xFF; + } + return magic; +} + +void unpack_200(std::string input_path, std::string output_path) +{ + unpacker u; + int status = 0; + + FILE *input = fopen(input_path.c_str(), "rb"); + if (!input) + { + throw std::runtime_error("Can't open input file" + input_path); + } + FILE *output = fopen(output_path.c_str(), "wb"); + if (!output) + { + fclose(output); + throw std::runtime_error("Can't open output file" + output_path); + } + u.init(read_input_via_stdio); + + // initialize jar output + // the output takes ownership of the file handle + jar jarout; + jarout.init(&u); + jarout.jarfp = output; + + // the input doesn't + u.infileptr = input; + + // read the magic! + char peek[4]; + int magic; + magic = read_magic(&u, peek, (int)sizeof(peek)); + + // if it is a gzip encoded file, we need an extra gzip input filter + if ((magic & GZIP_MAGIC_MASK) == GZIP_MAGIC) + { + gunzip *gzin = NEW(gunzip, 1); + gzin->init(&u); + // FIXME: why the side effects? WHY? + u.gzin->start(magic); + u.start(); + } + else + { + // otherwise, feed the bytes to the unpacker directly + u.start(peek, sizeof(peek)); + } + + // Note: The checks to u.aborting() are necessary to gracefully + // terminate processing when the first segment throws an error. + for (;;) + { + // Each trip through this loop unpacks one segment + // and then resets the unpacker. + for (unpacker::file *filep; (filep = u.get_next_file()) != nullptr;) + { + u.write_file_to_jar(filep); + } + + // Peek ahead for more data. + magic = read_magic(&u, peek, (int)sizeof(peek)); + if (magic != (int)JAVA_PACKAGE_MAGIC) + { + if (magic != EOF_MAGIC) + unpack_abort("garbage after end of pack archive"); + break; // all done + } + + // Release all storage from parsing the old segment. + u.reset(); + // Restart, beginning with the peek-ahead. + u.start(peek, sizeof(peek)); + } + u.finish(); + u.free(); // tidy up malloc blocks + fclose(input); +} diff --git a/depends/pack200/src/utils.cpp b/depends/pack200/src/utils.cpp index 3ea8c92e..0b7d91ca 100644 --- a/depends/pack200/src/utils.cpp +++ b/depends/pack200/src/utils.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -57,35 +58,14 @@ void *must_malloc(size_t size) } else { - unpack_abort(ERROR_ENOMEM); + throw std::runtime_error(ERROR_ENOMEM); } return ptr; } -void unpack_abort(const char *msg, unpacker *u) +void unpack_abort(const char *msg) { if (msg == nullptr) msg = "corrupt pack file or internal error"; - if (u == nullptr) - u = unpacker::current(); - if (u == nullptr) - { - fprintf(stderr, "Error: unpacker: %s\n", msg); - ::abort(); - return; - } - u->abort(msg); -} - -bool unpack_aborting(unpacker *u) -{ - if (u == nullptr) - u = unpacker::current(); - if (u == nullptr) - { - fprintf(stderr, "Error: unpacker: no current instance\n"); - ::abort(); - return true; - } - return u->aborting(); + throw std::runtime_error(msg); } diff --git a/depends/pack200/src/utils.h b/depends/pack200/src/utils.h index 0ce6b7d8..5a3dc8f6 100644 --- a/depends/pack200/src/utils.h +++ b/depends/pack200/src/utils.h @@ -25,6 +25,8 @@ // Definitions of our util functions +#include + void *must_malloc(size_t size); // overflow management @@ -46,9 +48,6 @@ inline size_t add_size(size_t size1, size_t size2, int size3) return add_size(add_size(size1, size2), size3); } -// These may be expensive, because they have to go via Java TSD, -// if the optional u argument is missing. struct unpacker; -extern void unpack_abort(const char *msg, unpacker *u = nullptr); -extern bool unpack_aborting(unpacker *u = nullptr); - +/// This throws an exception! +extern void unpack_abort(const char *msg = nullptr); diff --git a/depends/pack200/src/zip.cpp b/depends/pack200/src/zip.cpp index f1bc25ad..baea2ba7 100644 --- a/depends/pack200/src/zip.cpp +++ b/depends/pack200/src/zip.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -47,29 +48,14 @@ #include "zip.h" -#ifdef NO_ZLIB - -inline bool jar::deflate_bytes(bytes &head, bytes &tail) -{ - return false; -} -inline uint jar::get_crc32(uint c, uchar *ptr, uint len) -{ - return 0; -} -#define Z_NULL NULL - -#else // Have ZLIB - #include -inline uint jar::get_crc32(uint c, uchar *ptr, uint len) +inline uint32_t jar::get_crc32(uint32_t c, uchar *ptr, uint32_t len) { return crc32(c, ptr, len); } -#endif // End of ZLIB - +// FIXME: this is bullshit. Do real endianness detection. #ifdef sparc #define SWAP_BYTES(a) ((((a) << 8) & 0xff00) | 0x00ff) & (((a) >> 8) | 0xff00) #else @@ -107,7 +93,7 @@ void jar::write_data(void *buff, int len) void jar::add_to_jar_directory(const char *fname, bool store, int modtime, int len, int clen, uint32_t crc) { - uint fname_length = (uint)strlen(fname); + uint32_t fname_length = (uint32_t)strlen(fname); ushort header[23]; if (modtime == 0) modtime = default_modtime; @@ -169,9 +155,9 @@ void jar::add_to_jar_directory(const char *fname, bool store, int modtime, int l } void jar::write_jar_header(const char *fname, bool store, int modtime, int len, int clen, - uint crc) + uint32_t crc) { - uint fname_length = (uint)strlen(fname); + uint32_t fname_length = (uint32_t)strlen(fname); ushort header[15]; if (modtime == 0) modtime = default_modtime; @@ -218,12 +204,10 @@ void jar::write_jar_header(const char *fname, bool store, int modtime, int len, write_data((char *)fname, (int)fname_length); } -static const char marker_comment[] = ZIP_ARCHIVE_MARKER_COMMENT; - void jar::write_central_directory() { bytes mc; - mc.set(marker_comment); + mc.set("PACK200"); ushort header[11]; @@ -278,11 +262,11 @@ void jar::addJarEntry(const char *fname, bool deflate_hint, int modtime, bytes & int len = (int)(head.len + tail.len); int clen = 0; - uint crc = get_crc32(0, Z_NULL, 0); + uint32_t crc = get_crc32(0, Z_NULL, 0); if (head.len != 0) - crc = get_crc32(crc, (uchar *)head.ptr, (uint)head.len); + crc = get_crc32(crc, (uchar *)head.ptr, (uint32_t)head.len); if (tail.len != 0) - crc = get_crc32(crc, (uchar *)tail.ptr, (uint)tail.len); + crc = get_crc32(crc, (uchar *)tail.ptr, (uint32_t)tail.len); bool deflate = (deflate_hint && len > 0); @@ -452,10 +436,10 @@ bool jar::deflate_bytes(bytes &head, bytes &tail) } // Callback for fetching data from a GZIP input stream -static jlong read_input_via_gzip(unpacker *u, void *buf, jlong minlen, jlong maxlen) +static int64_t read_input_via_gzip(unpacker *u, void *buf, int64_t minlen, int64_t maxlen) { assert(minlen <= maxlen); // don't talk nonsense - jlong numread = 0; + int64_t numread = 0; char *bufptr = (char *)buf; char *inbuf = u->gzin->inbuf; size_t inbuflen = sizeof(u->gzin->inbuf); @@ -476,7 +460,7 @@ static jlong read_input_via_gzip(unpacker *u, void *buf, jlong minlen, jlong max int error = inflate(&zs, Z_NO_FLUSH); if (error != Z_OK && error != Z_STREAM_END) { - u->abort("error inflating input"); + unpack_abort("error inflating input"); break; } int nr = readlen - zs.avail_out; @@ -505,7 +489,7 @@ static jlong read_input_via_gzip(unpacker *u, void *buf, jlong minlen, jlong max // %%% should check final CRC and length here // %%% should check for concatenated *.gz files here if (zs.avail_in > 0) - u->abort("garbage after end of deflated input stream"); + unpack_abort("garbage after end of deflated input stream"); // pop this filter off: u->gzin->free(); break; @@ -577,15 +561,11 @@ void gunzip::start(int magic) if (gz_flg & FHCRC) read_fixed_field(gz_hcrc, sizeof(gz_hcrc)); - if (aborting()) - return; - // now the input stream is ready to read into the inflater int error = inflateInit2((z_stream *)zstream, -MAX_WBITS); if (error != Z_OK) { - abort("cannot create input"); - return; + unpack_abort("cannot create input"); } } @@ -602,9 +582,7 @@ void gunzip::free() void gunzip::read_fixed_field(char *buf, size_t buflen) { - if (aborting()) - return; - jlong nr = ((unpacker::read_input_fn_t)read_input_fn)(u, buf, buflen, buflen); + int64_t nr = ((unpacker::read_input_fn_t)read_input_fn)(u, buf, buflen, buflen); if ((size_t)nr != buflen) - u->abort("short stream header"); + unpack_abort("short stream header"); } diff --git a/depends/pack200/src/zip.h b/depends/pack200/src/zip.h index 1b6a8b02..67ec24da 100644 --- a/depends/pack200/src/zip.h +++ b/depends/pack200/src/zip.h @@ -24,7 +24,7 @@ */ #include typedef unsigned short ushort; -typedef unsigned int uint; +typedef unsigned int uint32_t; typedef unsigned char uchar; struct unpacker; @@ -42,7 +42,7 @@ struct jar // Private members fillbytes central_directory; ushort central_directory_count; - uint output_file_offset; + uint32_t output_file_offset; fillbytes deflated; // temporary buffer // pointer to outer unpacker, for error checks etc. @@ -85,17 +85,7 @@ struct jar // The definitions of these depend on the NO_ZLIB option: bool deflate_bytes(bytes &head, bytes &tail); - static uint get_crc32(uint c, unsigned char *ptr, uint len); - - // error handling - void abort(const char *msg) - { - unpack_abort(msg, u); - } - bool aborting() - { - return unpack_aborting(u); - } + static uint32_t get_crc32(uint32_t c, unsigned char *ptr, uint32_t len); }; struct gunzip @@ -105,7 +95,7 @@ struct gunzip // pointer to outer unpacker, for error checks etc. unpacker *u; - void *read_input_fn; // underlying byte stream + void *read_input_fn; // underlying \bchar\b stream void *zstream; // inflater state char inbuf[1 << 14]; // input buffer @@ -117,14 +107,4 @@ struct gunzip // private stuff void read_fixed_field(char *buf, size_t buflen); - - // error handling - void abort(const char *msg) - { - unpack_abort(msg, u); - } - bool aborting() - { - return unpack_aborting(u); - } }; diff --git a/depends/xz-embedded/CMakeLists.txt b/depends/xz-embedded/CMakeLists.txt index a71002fb..f1c6eb8d 100644 --- a/depends/xz-embedded/CMakeLists.txt +++ b/depends/xz-embedded/CMakeLists.txt @@ -3,7 +3,7 @@ project(xz-embedded) option(XZ_BUILD_BCJ "Build xz-embedded with BCJ support (native binary optimization)" OFF) option(XZ_BUILD_CRC64 "Build xz-embedded with CRC64 checksum support" ON) -option(XZ_BUILD_MINIDEC "Build a tiny utility that decompresses xz streams" ON) +option(XZ_BUILD_MINIDEC "Build a tiny utility that decompresses xz streams" OFF) set(CMAKE_C_FLAGS "-std=c99") @@ -19,6 +19,7 @@ src/xz_lzma2.h src/xz_private.h src/xz_stream.h ) +# TODO: look into what would be needed for plain old lzma # checksum checks add_definitions(-DXZ_DEC_ANY_CHECK) @@ -28,18 +29,12 @@ if(XZ_BUILD_CRC64) endif() # TODO: add SHA256 -# uncomment these, if required. if(XZ_BUILD_BCJ) add_definitions(-DXZ_DEC_X86 -DXZ_DEC_POWERPC -DXZ_DEC_IA64) add_definitions(-DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_SPARC) LIST(APPEND XZ_SOURCES src/xz_dec_bcj.c) endif() -# Static link! -ADD_DEFINITIONS(-DXZ_STATIC) - -add_definitions(-DXZ_LIBRARY) - -add_library(xz-embedded SHARED ${XZ_SOURCES}) +add_library(xz-embedded STATIC ${XZ_SOURCES}) add_executable(xzminidec xzminidec.c) target_link_libraries(xzminidec xz-embedded) -- cgit From 8b0f8b9e597eb50ff9323037fd5fa1b9e330c467 Mon Sep 17 00:00:00 2001 From: Petr Mrázek Date: Mon, 30 Sep 2013 02:34:46 +0200 Subject: ``Working'' forge unpackers. Needs a lot of hardening but good for alpha. --- CMakeLists.txt | 6 +- depends/pack200/CMakeLists.txt | 1 + depends/pack200/src/unpack.cpp | 6 +- depends/pack200/src/unpack200.cpp | 3 + depends/xz-embedded/CMakeLists.txt | 20 +-- depends/xz-embedded/include/xz.h | 15 ++ gui/LegacyModEditDialog.cpp | 2 +- gui/OneSixModEditDialog.cpp | 2 +- logic/ForgeInstaller.cpp | 5 + logic/LegacyUpdate.cpp | 2 +- logic/OneSixAssets.cpp | 4 +- logic/OneSixLibrary.cpp | 14 +- logic/OneSixLibrary.h | 8 +- logic/OneSixUpdate.cpp | 9 +- logic/OneSixVersion.cpp | 12 +- logic/lists/ForgeVersionList.cpp | 2 +- logic/lists/MinecraftVersionList.h | 2 +- logic/net/DownloadJob.cpp | 16 ++- logic/net/DownloadJob.h | 8 +- logic/net/ForgeXzDownload.cpp | 277 +++++++++++++++++++++++++++++++++++++ logic/net/ForgeXzDownload.h | 35 +++++ 21 files changed, 413 insertions(+), 36 deletions(-) create mode 100644 logic/net/ForgeXzDownload.cpp create mode 100644 logic/net/ForgeXzDownload.h (limited to 'depends/xz-embedded') diff --git a/CMakeLists.txt b/CMakeLists.txt index bb813a09..a3afe5d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,9 +52,11 @@ add_subdirectory(depends/launcher) # Add xz decompression add_subdirectory(depends/xz-embedded) +include_directories(${XZ_INCLUDE_DIR}) # Add pack200 decompression add_subdirectory(depends/pack200) +include_directories(${PACK200_INCLUDE_DIR}) ######## MultiMC Libs ######## @@ -231,6 +233,8 @@ logic/net/ByteArrayDownload.h logic/net/ByteArrayDownload.cpp logic/net/CacheDownload.h logic/net/CacheDownload.cpp +logic/net/ForgeXzDownload.h +logic/net/ForgeXzDownload.cpp logic/net/DownloadJob.h logic/net/DownloadJob.cpp logic/net/HttpMetaCache.h @@ -354,7 +358,7 @@ ADD_EXECUTABLE(MultiMC MACOSX_BUNDLE WIN32 # Link QT5_USE_MODULES(MultiMC Widgets Network Xml) -TARGET_LINK_LIBRARIES(MultiMC quazip libUtil libSettings libGroupView ${MultiMC_LINK_ADDITIONAL_LIBS}) +TARGET_LINK_LIBRARIES(MultiMC quazip xz-embedded unpack200 libUtil libSettings libGroupView ${MultiMC_LINK_ADDITIONAL_LIBS}) ADD_DEPENDENCIES(MultiMC MultiMCLauncher) diff --git a/depends/pack200/CMakeLists.txt b/depends/pack200/CMakeLists.txt index 657e303c..3e41d378 100644 --- a/depends/pack200/CMakeLists.txt +++ b/depends/pack200/CMakeLists.txt @@ -37,6 +37,7 @@ src/zip.cpp src/zip.h ) +SET(PACK200_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include" PARENT_SCOPE) include_directories(include) add_library(unpack200 STATIC ${PACK200_SRC}) diff --git a/depends/pack200/src/unpack.cpp b/depends/pack200/src/unpack.cpp index 8a66d42a..d7de1b22 100644 --- a/depends/pack200/src/unpack.cpp +++ b/depends/pack200/src/unpack.cpp @@ -1523,7 +1523,8 @@ band **unpacker::attr_definitions::buildBands(unpacker::layout_definition *lo) call.le_body[0] = &cble; // Distinguish backward calls and callables: assert(cble.le_kind == EK_CBLE); - assert(cble.le_len == call_num); + //FIXME: hit this one + //assert(cble.le_len == call_num); cble.le_back |= call.le_back; } calls_to_link.popTo(0); @@ -2777,7 +2778,8 @@ void unpacker::putlayout(band **body) { band &cble = *b.le_body[0]; assert(cble.le_kind == EK_CBLE); - assert(cble.le_len == b.le_len); + //FIXME: hit this one + //assert(cble.le_len == b.le_len); putlayout(cble.le_body); } break; diff --git a/depends/pack200/src/unpack200.cpp b/depends/pack200/src/unpack200.cpp index c6aa0b02..2ff8c34a 100644 --- a/depends/pack200/src/unpack200.cpp +++ b/depends/pack200/src/unpack200.cpp @@ -156,8 +156,11 @@ void unpack_200(std::string input_path, std::string output_path) magic = read_magic(&u, peek, (int)sizeof(peek)); if (magic != (int)JAVA_PACKAGE_MAGIC) { + // we do not feel strongly about this kind of thing... + /* if (magic != EOF_MAGIC) unpack_abort("garbage after end of pack archive"); + */ break; // all done } diff --git a/depends/xz-embedded/CMakeLists.txt b/depends/xz-embedded/CMakeLists.txt index f1c6eb8d..d4987f76 100644 --- a/depends/xz-embedded/CMakeLists.txt +++ b/depends/xz-embedded/CMakeLists.txt @@ -8,33 +8,25 @@ option(XZ_BUILD_MINIDEC "Build a tiny utility that decompresses xz streams" OFF) set(CMAKE_C_FLAGS "-std=c99") include_directories(include) +SET(XZ_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include" PARENT_SCOPE) + +# See include/xz.h for manual feature configuration +# tweak this list and xz.h to fit your needs set(XZ_SOURCES include/xz.h src/xz_config.h src/xz_crc32.c +src/xz_crc64.c src/xz_dec_lzma2.c src/xz_dec_stream.c src/xz_lzma2.h src/xz_private.h src/xz_stream.h +# src/xz_dec_bcj.c ) # TODO: look into what would be needed for plain old lzma -# checksum checks -add_definitions(-DXZ_DEC_ANY_CHECK) -if(XZ_BUILD_CRC64) - add_definitions(-DXZ_USE_CRC64) - LIST(APPEND XZ_SOURCES src/xz_crc64.c) -endif() -# TODO: add SHA256 - -if(XZ_BUILD_BCJ) - add_definitions(-DXZ_DEC_X86 -DXZ_DEC_POWERPC -DXZ_DEC_IA64) - add_definitions(-DXZ_DEC_ARM -DXZ_DEC_ARMTHUMB -DXZ_DEC_SPARC) - LIST(APPEND XZ_SOURCES src/xz_dec_bcj.c) -endif() - add_library(xz-embedded STATIC ${XZ_SOURCES}) add_executable(xzminidec xzminidec.c) target_link_libraries(xzminidec xz-embedded) diff --git a/depends/xz-embedded/include/xz.h b/depends/xz-embedded/include/xz.h index 0a4b38d3..49a96f7b 100644 --- a/depends/xz-embedded/include/xz.h +++ b/depends/xz-embedded/include/xz.h @@ -23,6 +23,21 @@ extern "C" { #endif +/* Definitions that determine available features */ +#define XZ_DEC_ANY_CHECK 1 +#define XZ_USE_CRC64 1 + +// native machine code compression stuff +/* +#define XZ_DEC_X86 +#define XZ_DEC_POWERPC +#define XZ_DEC_IA64 +#define XZ_DEC_ARM +#define XZ_DEC_ARMTHUMB +#define XZ_DEC_SPARC +*/ + + /* In Linux, this is used to make extern functions static when needed. */ #ifndef XZ_EXTERN # define XZ_EXTERN extern diff --git a/gui/LegacyModEditDialog.cpp b/gui/LegacyModEditDialog.cpp index 20296769..053aef6b 100644 --- a/gui/LegacyModEditDialog.cpp +++ b/gui/LegacyModEditDialog.cpp @@ -210,7 +210,7 @@ void LegacyModEditDialog::on_addForgeBtn_clicked() if(entry->stale) { DownloadJob * fjob = new DownloadJob("Forge download"); - fjob->add(forge->universal_url, entry); + fjob->addCacheDownload(forge->universal_url, entry); ProgressDialog dlg(this); dlg.exec(fjob); if(dlg.result() == QDialog::Accepted) diff --git a/gui/OneSixModEditDialog.cpp b/gui/OneSixModEditDialog.cpp index 94fea933..f2e7c5d2 100644 --- a/gui/OneSixModEditDialog.cpp +++ b/gui/OneSixModEditDialog.cpp @@ -160,7 +160,7 @@ void OneSixModEditDialog::on_forgeBtn_clicked() if (entry->stale) { DownloadJob *fjob = new DownloadJob("Forge download"); - fjob->add(forgeVersion->installer_url, entry); + fjob->addCacheDownload(forgeVersion->installer_url, entry); ProgressDialog dlg(this); dlg.exec(fjob); if (dlg.result() == QDialog::Accepted) diff --git a/logic/ForgeInstaller.cpp b/logic/ForgeInstaller.cpp index bcba00e9..9ae3f1e1 100644 --- a/logic/ForgeInstaller.cpp +++ b/logic/ForgeInstaller.cpp @@ -100,11 +100,16 @@ bool ForgeInstaller::apply(QSharedPointer to) for (auto lib : m_forge_version->libraries) { QString libName = lib->name(); + // WARNING: This could actually break. // if this is the actual forge lib, set an absolute url for the download if (libName.contains("minecraftforge")) { lib->setAbsoluteUrl(m_universal_url); } + else if (libName.contains("scala")) + { + lib->setHint("forge-pack-xz"); + } if (blacklist.contains(libName)) continue; diff --git a/logic/LegacyUpdate.cpp b/logic/LegacyUpdate.cpp index 84d3d830..d8e622dd 100644 --- a/logic/LegacyUpdate.cpp +++ b/logic/LegacyUpdate.cpp @@ -228,7 +228,7 @@ void LegacyUpdate::jarStart() urlstr += intended_version_id + "/" + intended_version_id + ".jar"; auto dljob = new DownloadJob("Minecraft.jar for version " + intended_version_id); - dljob->add(QUrl(urlstr), inst->defaultBaseJar()); + dljob->addFileDownload(QUrl(urlstr), inst->defaultBaseJar()); legacyDownloadJob.reset(dljob); connect(dljob, SIGNAL(succeeded()), SLOT(jarFinished())); connect(dljob, SIGNAL(failed()), SLOT(jarFailed())); diff --git a/logic/OneSixAssets.cpp b/logic/OneSixAssets.cpp index 5bdd29d7..ca7a5534 100644 --- a/logic/OneSixAssets.cpp +++ b/logic/OneSixAssets.cpp @@ -113,7 +113,7 @@ void OneSixAssets::fetchXMLFinished() auto entry = metacache->resolveEntry("assets", keyStr, etagStr); if(entry->stale) { - job->add(QUrl(prefix + keyStr), entry); + job->addCacheDownload(QUrl(prefix + keyStr), entry); } } if(job->size()) @@ -130,7 +130,7 @@ void OneSixAssets::fetchXMLFinished() void OneSixAssets::start() { auto job = new DownloadJob("Assets index"); - job->add(QUrl ( "http://s3.amazonaws.com/Minecraft.Resources/" )); + job->addByteArrayDownload(QUrl ( "http://s3.amazonaws.com/Minecraft.Resources/" )); connect ( job, SIGNAL(succeeded()), SLOT ( fetchXMLFinished() ) ); index_job.reset ( job ); job->start(); diff --git a/logic/OneSixLibrary.cpp b/logic/OneSixLibrary.cpp index 8da1fde7..63d42646 100644 --- a/logic/OneSixLibrary.cpp +++ b/logic/OneSixLibrary.cpp @@ -105,12 +105,24 @@ QString OneSixLibrary::absoluteUrl() return m_absolute_url; } +void OneSixLibrary::setHint(QString hint) +{ + m_hint = hint; +} + +QString OneSixLibrary::hint() +{ + return m_hint; +} + QJsonObject OneSixLibrary::toJson() { QJsonObject libRoot; libRoot.insert("name", m_name); if(m_absolute_url.size()) - libRoot.insert("MMC-absulute_url", m_absolute_url); + libRoot.insert("MMC-absoluteUrl", m_absolute_url); + if(m_hint.size()) + libRoot.insert("MMC-hint", m_hint); if(m_base_url != "https://s3.amazonaws.com/Minecraft.Download/libraries/") libRoot.insert("url", m_base_url); if (isNative() && m_native_suffixes.size()) diff --git a/logic/OneSixLibrary.h b/logic/OneSixLibrary.h index f3106483..2a16d8e1 100644 --- a/logic/OneSixLibrary.h +++ b/logic/OneSixLibrary.h @@ -19,6 +19,8 @@ private: // custom values /// absolute URL. takes precedence over m_download_path, if defined QString m_absolute_url; + /// download hint - how to actually get the library + QString m_hint; // derived values used for real things /// a decent name fit for display @@ -91,8 +93,12 @@ public: QString downloadUrl(); /// Get the relative path where the library should be saved QString storagePath(); - + /// set an absolute URL for the library. This is an MMC extension. void setAbsoluteUrl(QString absolute_url); QString absoluteUrl(); + + /// set a hint about how to treat the library. This is an MMC extension. + void setHint(QString hint); + QString hint(); }; diff --git a/logic/OneSixUpdate.cpp b/logic/OneSixUpdate.cpp index 73bd9403..41d8f599 100644 --- a/logic/OneSixUpdate.cpp +++ b/logic/OneSixUpdate.cpp @@ -75,7 +75,7 @@ void OneSixUpdate::versionFileStart() QString urlstr("http://s3.amazonaws.com/Minecraft.Download/versions/"); urlstr += targetVersion->descriptor() + "/" + targetVersion->descriptor() + ".json"; auto job = new DownloadJob("Version index"); - job->add(QUrl(urlstr)); + job->addByteArrayDownload(QUrl(urlstr)); specificVersionDownloadJob.reset(job); connect(specificVersionDownloadJob.data(), SIGNAL(succeeded()), SLOT(versionFileFinished())); @@ -158,7 +158,7 @@ void OneSixUpdate::jarlibStart() targetstr += version->id + "/" + version->id + ".jar"; auto job = new DownloadJob("Libraries for instance " + inst->name()); - job->add(QUrl(urlstr), targetstr); + job->addFileDownload(QUrl(urlstr), targetstr); jarlibDownloadJob.reset(job); auto libs = version->getActiveNativeLibs(); @@ -171,7 +171,10 @@ void OneSixUpdate::jarlibStart() auto entry = metacache->resolveEntry("libraries", lib->storagePath()); if (entry->stale) { - jarlibDownloadJob->add(download_path, entry); + if(lib->hint() == "forge-pack-xz") + jarlibDownloadJob->addForgeXzDownload(download_path, entry); + else + jarlibDownloadJob->addCacheDownload(download_path, entry); } } connect(jarlibDownloadJob.data(), SIGNAL(succeeded()), SLOT(jarlibFinished())); diff --git a/logic/OneSixVersion.cpp b/logic/OneSixVersion.cpp index 663d903a..64a47562 100644 --- a/logic/OneSixVersion.cpp +++ b/logic/OneSixVersion.cpp @@ -71,11 +71,21 @@ QSharedPointer fromJsonV4(QJsonObject root, { library->setBaseUrl(urlVal.toString()); } - auto urlAbsVal = libObj.value("MMC-absulute_url"); + auto hintVal = libObj.value("MMC-hint"); + if (hintVal.isString()) + { + library->setHint(hintVal.toString()); + } + auto urlAbsVal = libObj.value("MMC-absoluteUrl"); + auto urlAbsuVal = libObj.value("MMC-absulute_url"); // compatibility if (urlAbsVal.isString()) { library->setAbsoluteUrl(urlAbsVal.toString()); } + else if(urlAbsuVal.isString()) + { + library->setAbsoluteUrl(urlAbsuVal.toString()); + } // Extract excludes (if any) auto extractVal = libObj.value("extract"); if (extractVal.isObject()) diff --git a/logic/lists/ForgeVersionList.cpp b/logic/lists/ForgeVersionList.cpp index 721f2c0a..e2adbf3b 100644 --- a/logic/lists/ForgeVersionList.cpp +++ b/logic/lists/ForgeVersionList.cpp @@ -162,7 +162,7 @@ void ForgeListLoadTask::executeTask() auto job = new DownloadJob("Version index"); // we do not care if the version is stale or not. auto forgeListEntry = MMC->metacache()->resolveEntry("minecraftforge", "list.json"); - job->add(QUrl(JSON_URL), forgeListEntry); + job->addCacheDownload(QUrl(JSON_URL), forgeListEntry); listJob.reset(job); connect(listJob.data(), SIGNAL(succeeded()), SLOT(list_downloaded())); connect(listJob.data(), SIGNAL(failed()), SLOT(versionFileFailed())); diff --git a/logic/lists/MinecraftVersionList.h b/logic/lists/MinecraftVersionList.h index fb28ddfe..ed68efbb 100644 --- a/logic/lists/MinecraftVersionList.h +++ b/logic/lists/MinecraftVersionList.h @@ -46,7 +46,7 @@ public: protected: QList m_vlist; - bool m_loaded; + bool m_loaded = false; protected slots: virtual void updateListData(QList versions); diff --git a/logic/net/DownloadJob.cpp b/logic/net/DownloadJob.cpp index 8da1f39b..03a69555 100644 --- a/logic/net/DownloadJob.cpp +++ b/logic/net/DownloadJob.cpp @@ -7,7 +7,7 @@ #include -ByteArrayDownloadPtr DownloadJob::add(QUrl url) +ByteArrayDownloadPtr DownloadJob::addByteArrayDownload(QUrl url) { ByteArrayDownloadPtr ptr(new ByteArrayDownload(url)); ptr->index_within_job = downloads.size(); @@ -17,7 +17,7 @@ ByteArrayDownloadPtr DownloadJob::add(QUrl url) return ptr; } -FileDownloadPtr DownloadJob::add(QUrl url, QString rel_target_path) +FileDownloadPtr DownloadJob::addFileDownload(QUrl url, QString rel_target_path) { FileDownloadPtr ptr(new FileDownload(url, rel_target_path)); ptr->index_within_job = downloads.size(); @@ -27,7 +27,7 @@ FileDownloadPtr DownloadJob::add(QUrl url, QString rel_target_path) return ptr; } -CacheDownloadPtr DownloadJob::add(QUrl url, MetaEntryPtr entry) +CacheDownloadPtr DownloadJob::addCacheDownload(QUrl url, MetaEntryPtr entry) { CacheDownloadPtr ptr(new CacheDownload(url, entry)); ptr->index_within_job = downloads.size(); @@ -37,6 +37,16 @@ CacheDownloadPtr DownloadJob::add(QUrl url, MetaEntryPtr entry) return ptr; } +ForgeXzDownloadPtr DownloadJob::addForgeXzDownload(QUrl url, MetaEntryPtr entry) +{ + ForgeXzDownloadPtr ptr(new ForgeXzDownload(url, entry)); + ptr->index_within_job = downloads.size(); + downloads.append(ptr); + parts_progress.append(part_info()); + total_progress++; + return ptr; +} + void DownloadJob::partSucceeded(int index) { // do progress. all slots are 1 in size at least diff --git a/logic/net/DownloadJob.h b/logic/net/DownloadJob.h index 5d5ba01a..8c32950a 100644 --- a/logic/net/DownloadJob.h +++ b/logic/net/DownloadJob.h @@ -5,6 +5,7 @@ #include "FileDownload.h" #include "CacheDownload.h" #include "HttpMetaCache.h" +#include "ForgeXzDownload.h" #include "logic/tasks/ProgressProvider.h" class DownloadJob; @@ -20,9 +21,10 @@ public: explicit DownloadJob(QString job_name) :ProgressProvider(), m_job_name(job_name){}; - ByteArrayDownloadPtr add(QUrl url); - FileDownloadPtr add(QUrl url, QString rel_target_path); - CacheDownloadPtr add(QUrl url, MetaEntryPtr entry); + ByteArrayDownloadPtr addByteArrayDownload(QUrl url); + FileDownloadPtr addFileDownload(QUrl url, QString rel_target_path); + CacheDownloadPtr addCacheDownload(QUrl url, MetaEntryPtr entry); + ForgeXzDownloadPtr addForgeXzDownload(QUrl url, MetaEntryPtr entry); DownloadPtr operator[](int index) { diff --git a/logic/net/ForgeXzDownload.cpp b/logic/net/ForgeXzDownload.cpp new file mode 100644 index 00000000..b7e7eedf --- /dev/null +++ b/logic/net/ForgeXzDownload.cpp @@ -0,0 +1,277 @@ +#include "MultiMC.h" +#include "ForgeXzDownload.h" +#include + +#include +#include +#include +#include + +ForgeXzDownload::ForgeXzDownload(QUrl url, MetaEntryPtr entry) + : Download() +{ + QString urlstr = url.toString(); + urlstr.append(".pack.xz"); + m_url = QUrl(urlstr); + m_entry = entry; + m_target_path = entry->getFullPath(); + m_status = Job_NotStarted; + m_opened_for_saving = false; +} + +void ForgeXzDownload::start() +{ + if (!m_entry->stale) + { + emit succeeded(index_within_job); + return; + } + // can we actually create the real, final file? + if (!ensureFilePathExists(m_target_path)) + { + emit failed(index_within_job); + return; + } + qDebug() << "Downloading " << m_url.toString(); + QNetworkRequest request(m_url); + request.setRawHeader(QString("If-None-Match").toLatin1(), m_entry->etag.toLatin1()); + + auto worker = MMC->qnam(); + QNetworkReply *rep = worker->get(request); + + m_reply = QSharedPointer(rep, &QObject::deleteLater); + connect(rep, SIGNAL(downloadProgress(qint64, qint64)), + SLOT(downloadProgress(qint64, qint64))); + connect(rep, SIGNAL(finished()), SLOT(downloadFinished())); + connect(rep, SIGNAL(error(QNetworkReply::NetworkError)), + SLOT(downloadError(QNetworkReply::NetworkError))); + connect(rep, SIGNAL(readyRead()), SLOT(downloadReadyRead())); +} + +void ForgeXzDownload::downloadProgress(qint64 bytesReceived, qint64 bytesTotal) +{ + emit progress(index_within_job, bytesReceived, bytesTotal); +} + +void ForgeXzDownload::downloadError(QNetworkReply::NetworkError error) +{ + // error happened during download. + // TODO: log the reason why + m_status = Job_Failed; +} + +void ForgeXzDownload::downloadFinished() +{ + // if the download succeeded + if (m_status != Job_Failed) + { + // nothing went wrong... + m_status = Job_Finished; + if (m_opened_for_saving) + { + // we actually downloaded something! process and isntall it + decompressAndInstall(); + return; + } + else + { + // something bad happened + m_pack200_xz_file.remove(); + m_reply.clear(); + emit failed(index_within_job); + return; + } + } + // else the download failed + else + { + m_pack200_xz_file.close(); + m_pack200_xz_file.remove(); + m_reply.clear(); + emit failed(index_within_job); + return; + } +} + +void ForgeXzDownload::downloadReadyRead() +{ + + if (!m_opened_for_saving) + { + if (!m_pack200_xz_file.open()) + { + /* + * Can't open the file... the job failed + */ + m_reply->abort(); + emit failed(index_within_job); + return; + } + m_opened_for_saving = true; + } + m_pack200_xz_file.write(m_reply->readAll()); +} + +#include "xz.h" +#include "unpack200.h" + +const size_t buffer_size = 8196; + +void ForgeXzDownload::decompressAndInstall() +{ + // rewind the downloaded temp file + m_pack200_xz_file.seek(0); + // de-xz'd file + QTemporaryFile pack200_file; + pack200_file.open(); + + bool xz_success = false; + // first, de-xz + { + uint8_t in[buffer_size]; + uint8_t out[buffer_size]; + struct xz_buf b; + struct xz_dec *s; + enum xz_ret ret; + xz_crc32_init(); + xz_crc64_init(); + s = xz_dec_init(XZ_DYNALLOC, 1 << 26); + if (s == nullptr) + { + xz_dec_end(s); + emit failed(index_within_job); + return; + } + b.in = in; + b.in_pos = 0; + b.in_size = 0; + b.out = out; + b.out_pos = 0; + b.out_size = buffer_size; + while (!xz_success) + { + if (b.in_pos == b.in_size) + { + b.in_size = m_pack200_xz_file.read((char*)in, sizeof(in)); + b.in_pos = 0; + } + + ret = xz_dec_run(s, &b); + + if (b.out_pos == sizeof(out)) + { + if (pack200_file.write((char*)out, b.out_pos) != b.out_pos) + { + // msg = "Write error\n"; + xz_dec_end(s); + emit failed(index_within_job); + return; + } + + b.out_pos = 0; + } + + if (ret == XZ_OK) + continue; + + if (ret == XZ_UNSUPPORTED_CHECK) + { + // unsupported check. this is OK, but we should log this + continue; + } + + if (pack200_file.write((char*)out, b.out_pos) != b.out_pos ) + { + // write error + pack200_file.close(); + xz_dec_end(s); + return; + } + + switch (ret) + { + case XZ_STREAM_END: + xz_dec_end(s); + xz_success = true; + break; + + case XZ_MEM_ERROR: + qDebug() << "Memory allocation failed\n"; + xz_dec_end(s); + emit failed(index_within_job); + return; + + case XZ_MEMLIMIT_ERROR: + qDebug() << "Memory usage limit reached\n"; + xz_dec_end(s); + emit failed(index_within_job); + return; + + case XZ_FORMAT_ERROR: + qDebug() << "Not a .xz file\n"; + xz_dec_end(s); + emit failed(index_within_job); + return; + + case XZ_OPTIONS_ERROR: + qDebug() << "Unsupported options in the .xz headers\n"; + xz_dec_end(s); + emit failed(index_within_job); + return; + + case XZ_DATA_ERROR: + case XZ_BUF_ERROR: + qDebug() << "File is corrupt\n"; + xz_dec_end(s); + emit failed(index_within_job); + return; + + default: + qDebug() << "Bug!\n"; + xz_dec_end(s); + emit failed(index_within_job); + return; + } + } + } + + // revert pack200 + pack200_file.close(); + QString pack_name = pack200_file.fileName(); + try + { + unpack_200(pack_name.toStdString(), m_target_path.toStdString()); + } + catch(std::runtime_error & err) + { + qDebug() << "Error unpacking " << pack_name.toUtf8() << " : " << err.what(); + QFile f(m_target_path); + if(f.exists()) + f.remove(); + emit failed(index_within_job); + return; + } + + QFile jar_file(m_target_path); + + if (!jar_file.open(QIODevice::ReadOnly)) + { + jar_file.remove(); + emit failed(index_within_job); + return; + } + m_entry->md5sum = QCryptographicHash::hash(jar_file.readAll(), QCryptographicHash::Md5) + .toHex() + .constData(); + jar_file.close(); + + QFileInfo output_file_info(m_target_path); + m_entry->etag = m_reply->rawHeader("ETag").constData(); + m_entry->last_changed_timestamp = + output_file_info.lastModified().toUTC().toMSecsSinceEpoch(); + m_entry->stale = false; + MMC->metacache()->updateEntry(m_entry); + + m_reply.clear(); + emit succeeded(index_within_job); +} diff --git a/logic/net/ForgeXzDownload.h b/logic/net/ForgeXzDownload.h new file mode 100644 index 00000000..8cb47783 --- /dev/null +++ b/logic/net/ForgeXzDownload.h @@ -0,0 +1,35 @@ +#pragma once + +#include "Download.h" +#include "HttpMetaCache.h" +#include +#include + +class ForgeXzDownload : public Download +{ + Q_OBJECT +public: + MetaEntryPtr m_entry; + /// is the saving file already open? + bool m_opened_for_saving; + /// if saving to file, use the one specified in this string + QString m_target_path; + /// this is the output file, if any + QTemporaryFile m_pack200_xz_file; + +public: + explicit ForgeXzDownload(QUrl url, MetaEntryPtr entry); + +protected slots: + virtual void downloadProgress(qint64 bytesReceived, qint64 bytesTotal); + virtual void downloadError(QNetworkReply::NetworkError error); + virtual void downloadFinished(); + virtual void downloadReadyRead(); + +public slots: + virtual void start(); +private: + void decompressAndInstall(); +}; + +typedef QSharedPointer ForgeXzDownloadPtr; -- cgit