diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2009-09-15 23:40:08 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2009-09-15 23:40:08 +0200 |
commit | f2c184be835fbcbd04d06fce22783c6a5d37b563 (patch) | |
tree | 62f0f5ca34a5768e0472cd4a7527801018e2af25 | |
parent | ba98603264125aceac59c36a36dfbee0f7f67c7f (diff) |
unlzma: fixed speedup/shrink by Pascal Bellard (pascal.bellard AT ads-lu.com)
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | archival/Config.in | 4 | ||||
-rw-r--r-- | archival/libunarchive/decompress_unlzma.c | 250 |
2 files changed, 108 insertions, 146 deletions
diff --git a/archival/Config.in b/archival/Config.in index 71b953819..cae7f20bb 100644 --- a/archival/Config.in +++ b/archival/Config.in @@ -298,8 +298,8 @@ config FEATURE_LZMA_FAST default n depends on UNLZMA help - This option reduces decompression time by about 33% at the cost of - a 2K bigger binary. + This option reduces decompression time by about 25% at the cost of + a 1K bigger binary. config UNZIP bool "unzip" diff --git a/archival/libunarchive/decompress_unlzma.c b/archival/libunarchive/decompress_unlzma.c index 33e5cd65d..4478cd2e3 100644 --- a/archival/libunarchive/decompress_unlzma.c +++ b/archival/libunarchive/decompress_unlzma.c @@ -8,14 +8,15 @@ * * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. */ - #include "libbb.h" #include "unarchive.h" #if ENABLE_FEATURE_LZMA_FAST # define speed_inline ALWAYS_INLINE +# define size_inline #else # define speed_inline +# define size_inline ALWAYS_INLINE #endif @@ -44,36 +45,48 @@ typedef struct { #define RC_MODEL_TOTAL_BITS 11 -/* Called twice: once at startup and once in rc_normalize() */ -static void rc_read(rc_t *rc) +/* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */ +static size_inline void rc_read(rc_t *rc) { int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); +//TODO: return -1 instead +//This will make unlzma delete broken unpacked file on unpack errors if (buffer_size <= 0) bb_error_msg_and_die("unexpected EOF"); rc->ptr = RC_BUFFER; rc->buffer_end = RC_BUFFER + buffer_size; } +/* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */ +static void rc_do_normalize(rc_t *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} + /* Called once */ -static rc_t* rc_init(int fd) /*, int buffer_size) */ +static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */ { int i; rc_t *rc; - rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE); + rc = xzalloc(sizeof(*rc) + RC_BUFFER_SIZE); rc->fd = fd; - /* rc->buffer_size = buffer_size; */ - rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE; - rc->ptr = rc->buffer_end; + /* rc->ptr = rc->buffer_end; */ - rc->code = 0; - rc->range = 0xFFFFFFFF; for (i = 0; i < 5; i++) { +#if ENABLE_FEATURE_LZMA_FAST if (rc->ptr >= rc->buffer_end) rc_read(rc); rc->code = (rc->code << 8) | *rc->ptr++; +#else + rc_do_normalize(rc); +#endif } + rc->range = 0xFFFFFFFF; return rc; } @@ -83,14 +96,6 @@ static ALWAYS_INLINE void rc_free(rc_t *rc) free(rc); } -/* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */ -static void rc_do_normalize(rc_t *rc) -{ - if (rc->ptr >= rc->buffer_end) - rc_read(rc); - rc->range <<= 8; - rc->code = (rc->code << 8) | *rc->ptr++; -} static ALWAYS_INLINE void rc_normalize(rc_t *rc) { if (rc->range < (1 << RC_TOP_BITS)) { @@ -98,49 +103,28 @@ static ALWAYS_INLINE void rc_normalize(rc_t *rc) } } -/* rc_is_bit_0 is called 9 times */ -/* Why rc_is_bit_0_helper exists? - * Because we want to always expose (rc->code < rc->bound) to optimizer. - * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined - * only if we compile for speed. - */ -static speed_inline uint32_t rc_is_bit_0_helper(rc_t *rc, uint16_t *p) +/* rc_is_bit_1 is called 9 times */ +static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p) { rc_normalize(rc); rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); - return rc->bound; -} -static ALWAYS_INLINE int rc_is_bit_0(rc_t *rc, uint16_t *p) -{ - uint32_t t = rc_is_bit_0_helper(rc, p); - return rc->code < t; -} - -/* Called ~10 times, but very small, thus inlined */ -static speed_inline void rc_update_bit_0(rc_t *rc, uint16_t *p) -{ - rc->range = rc->bound; - *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; -} -static speed_inline void rc_update_bit_1(rc_t *rc, uint16_t *p) -{ + if (rc->code < rc->bound) { + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; + return 0; + } rc->range -= rc->bound; rc->code -= rc->bound; *p -= *p >> RC_MOVE_BITS; + return 1; } /* Called 4 times in unlzma loop */ -static int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) +static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) { - if (rc_is_bit_0(rc, p)) { - rc_update_bit_0(rc, p); - *symbol *= 2; - return 0; - } else { - rc_update_bit_1(rc, p); - *symbol = *symbol * 2 + 1; - return 1; - } + int ret = rc_is_bit_1(rc, p); + *symbol = *symbol * 2 + ret; + return ret; } /* Called once */ @@ -236,14 +220,11 @@ unpack_lzma_stream(int src_fd, int dst_fd) int lc, pb, lp; uint32_t pos_state_mask; uint32_t literal_pos_mask; - uint32_t pos; uint16_t *p; - uint16_t *prob; - uint16_t *prob_lit; int num_bits; int num_probs; rc_t *rc; - int i, mi; + int i; uint8_t *buffer; uint8_t previous_byte = 0; size_t buffer_pos = 0, global_pos = 0; @@ -251,14 +232,17 @@ unpack_lzma_stream(int src_fd, int dst_fd) int state = 0; uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; - xread(src_fd, &header, sizeof(header)); + if (full_read(src_fd, &header, sizeof(header)) != sizeof(header) + || header.pos >= (9 * 5 * 5) + ) { + bb_error_msg("bad lzma header"); + return -1; + } - if (header.pos >= (9 * 5 * 5)) - bb_error_msg_and_die("bad header"); - mi = header.pos / 9; + i = header.pos / 9; lc = header.pos % 9; - pb = mi / 5; - lp = mi % 5; + pb = i / 5; + lp = i % 5; pos_state_mask = (1 << pb) - 1; literal_pos_mask = (1 << lp) - 1; @@ -266,13 +250,13 @@ unpack_lzma_stream(int src_fd, int dst_fd) header.dst_size = SWAP_LE64(header.dst_size); if (header.dict_size == 0) - header.dict_size = 1; + header.dict_size++; buffer = xmalloc(MIN(header.dst_size, header.dict_size)); num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); p = xmalloc(num_probs * sizeof(*p)); - num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + num_probs += LZMA_LITERAL - LZMA_BASE_SIZE; for (i = 0; i < num_probs; i++) p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; @@ -280,11 +264,13 @@ unpack_lzma_stream(int src_fd, int dst_fd) while (global_pos + buffer_pos < header.dst_size) { int pos_state = (buffer_pos + global_pos) & pos_state_mask; + uint16_t *prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; + + if (!rc_is_bit_1(rc, prob)) { + static const char next_state[LZMA_NUM_STATES] = + { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + int mi = 1; - prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; - if (rc_is_bit_0(rc, prob)) { - mi = 1; - rc_update_bit_0(rc, prob); prob = (p + LZMA_LITERAL + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) + (previous_byte >> (8 - lc)) @@ -294,8 +280,8 @@ unpack_lzma_stream(int src_fd, int dst_fd) if (state >= LZMA_NUM_LIT_STATES) { int match_byte; + uint32_t pos = buffer_pos - rep0; - pos = buffer_pos - rep0; while (pos >= header.dict_size) pos += header.dict_size; match_byte = buffer[pos]; @@ -304,22 +290,16 @@ unpack_lzma_stream(int src_fd, int dst_fd) match_byte <<= 1; bit = match_byte & 0x100; - prob_lit = prob + 0x100 + bit + mi; - bit ^= (rc_get_bit(rc, prob_lit, &mi) << 8); /* 0x100 or 0 */ + bit ^= (rc_get_bit(rc, prob + 0x100 + bit + mi, &mi) << 8); /* 0x100 or 0 */ if (bit) break; } while (mi < 0x100); } while (mi < 0x100) { - prob_lit = prob + mi; - rc_get_bit(rc, prob_lit, &mi); + rc_get_bit(rc, prob + mi, &mi); } - state -= 3; - if (state < 4-3) - state = 0; - if (state >= 10-3) - state -= 6-3; + state = next_state[state]; previous_byte = (uint8_t) mi; #if ENABLE_FEATURE_LZMA_FAST @@ -338,59 +318,46 @@ unpack_lzma_stream(int src_fd, int dst_fd) #endif } else { int offset; - uint16_t *prob_len; + uint16_t *prob2; +#define prob_len prob2 - rc_update_bit_1(rc, prob); - prob = p + LZMA_IS_REP + state; - if (rc_is_bit_0(rc, prob)) { - rc_update_bit_0(rc, prob); + prob2 = p + LZMA_IS_REP + state; + if (!rc_is_bit_1(rc, prob2)) { rep3 = rep2; rep2 = rep1; rep1 = rep0; state = state < LZMA_NUM_LIT_STATES ? 0 : 3; - prob = p + LZMA_LEN_CODER; + prob2 = p + LZMA_LEN_CODER; } else { - rc_update_bit_1(rc, prob); - prob = p + LZMA_IS_REP_G0 + state; - if (rc_is_bit_0(rc, prob)) { - rc_update_bit_0(rc, prob); - prob = (p + LZMA_IS_REP_0_LONG + prob2 += LZMA_IS_REP_G0 - LZMA_IS_REP; + if (!rc_is_bit_1(rc, prob2)) { + prob2 = (p + LZMA_IS_REP_0_LONG + (state << LZMA_NUM_POS_BITS_MAX) + pos_state ); - if (rc_is_bit_0(rc, prob)) { - rc_update_bit_0(rc, prob); - - state = state < LZMA_NUM_LIT_STATES ? 9 : 11; + if (!rc_is_bit_1(rc, prob2)) { #if ENABLE_FEATURE_LZMA_FAST - pos = buffer_pos - rep0; + uint32_t pos = buffer_pos - rep0; + state = state < LZMA_NUM_LIT_STATES ? 9 : 11; while (pos >= header.dict_size) pos += header.dict_size; previous_byte = buffer[pos]; goto one_byte1; #else + state = state < LZMA_NUM_LIT_STATES ? 9 : 11; len = 1; goto string; #endif - } else { - rc_update_bit_1(rc, prob); } } else { uint32_t distance; - rc_update_bit_1(rc, prob); - prob = p + LZMA_IS_REP_G1 + state; - if (rc_is_bit_0(rc, prob)) { - rc_update_bit_0(rc, prob); - distance = rep1; - } else { - rc_update_bit_1(rc, prob); - prob = p + LZMA_IS_REP_G2 + state; - if (rc_is_bit_0(rc, prob)) { - rc_update_bit_0(rc, prob); - distance = rep2; - } else { - rc_update_bit_1(rc, prob); + prob2 += LZMA_IS_REP_G1 - LZMA_IS_REP_G0; + distance = rep1; + if (rc_is_bit_1(rc, prob2)) { + prob2 += LZMA_IS_REP_G2 - LZMA_IS_REP_G1; + distance = rep2; + if (rc_is_bit_1(rc, prob2)) { distance = rep3; rep3 = rep2; } @@ -400,31 +367,27 @@ unpack_lzma_stream(int src_fd, int dst_fd) rep0 = distance; } state = state < LZMA_NUM_LIT_STATES ? 8 : 11; - prob = p + LZMA_REP_LEN_CODER; + prob2 = p + LZMA_REP_LEN_CODER; } - prob_len = prob + LZMA_LEN_CHOICE; - if (rc_is_bit_0(rc, prob_len)) { - rc_update_bit_0(rc, prob_len); - prob_len = (prob + LZMA_LEN_LOW - + (pos_state << LZMA_LEN_NUM_LOW_BITS)); + prob_len = prob2 + LZMA_LEN_CHOICE; + num_bits = LZMA_LEN_NUM_LOW_BITS; + if (!rc_is_bit_1(rc, prob_len)) { + prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE + + (pos_state << LZMA_LEN_NUM_LOW_BITS); offset = 0; - num_bits = LZMA_LEN_NUM_LOW_BITS; } else { - rc_update_bit_1(rc, prob_len); - prob_len = prob + LZMA_LEN_CHOICE_2; - if (rc_is_bit_0(rc, prob_len)) { - rc_update_bit_0(rc, prob_len); - prob_len = (prob + LZMA_LEN_MID - + (pos_state << LZMA_LEN_NUM_MID_BITS)); + prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE; + if (!rc_is_bit_1(rc, prob_len)) { + prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2 + + (pos_state << LZMA_LEN_NUM_MID_BITS); offset = 1 << LZMA_LEN_NUM_LOW_BITS; - num_bits = LZMA_LEN_NUM_MID_BITS; + num_bits += LZMA_LEN_NUM_MID_BITS - LZMA_LEN_NUM_LOW_BITS; } else { - rc_update_bit_1(rc, prob_len); - prob_len = prob + LZMA_LEN_HIGH; + prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2; offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + (1 << LZMA_LEN_NUM_MID_BITS)); - num_bits = LZMA_LEN_NUM_HIGH_BITS; + num_bits += LZMA_LEN_NUM_HIGH_BITS - LZMA_LEN_NUM_LOW_BITS; } } rc_bit_tree_decode(rc, prob_len, num_bits, &len); @@ -432,37 +395,36 @@ unpack_lzma_stream(int src_fd, int dst_fd) if (state < 4) { int pos_slot; + uint16_t *prob3; state += LZMA_NUM_LIT_STATES; - prob = p + LZMA_POS_SLOT + + prob3 = p + LZMA_POS_SLOT + ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : LZMA_NUM_LEN_TO_POS_STATES - 1) << LZMA_NUM_POS_SLOT_BITS); - rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS, - &pos_slot); + rc_bit_tree_decode(rc, prob3, + LZMA_NUM_POS_SLOT_BITS, &pos_slot); + rep0 = pos_slot; if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { - num_bits = (pos_slot >> 1) - 1; + int i2, mi2, num_bits2 = (pos_slot >> 1) - 1; rep0 = 2 | (pos_slot & 1); if (pos_slot < LZMA_END_POS_MODEL_INDEX) { - rep0 <<= num_bits; - prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; + rep0 <<= num_bits2; + prob3 = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; } else { - num_bits -= LZMA_NUM_ALIGN_BITS; - while (num_bits--) + for (; num_bits2 != LZMA_NUM_ALIGN_BITS; num_bits2--) rep0 = (rep0 << 1) | rc_direct_bit(rc); - prob = p + LZMA_ALIGN; rep0 <<= LZMA_NUM_ALIGN_BITS; - num_bits = LZMA_NUM_ALIGN_BITS; + prob3 = p + LZMA_ALIGN; } - i = 1; - mi = 1; - while (num_bits--) { - if (rc_get_bit(rc, prob + mi, &mi)) - rep0 |= i; - i <<= 1; + i2 = 1; + mi2 = 1; + while (num_bits2--) { + if (rc_get_bit(rc, prob3 + mi2, &mi2)) + rep0 |= i2; + i2 <<= 1; } - } else - rep0 = pos_slot; + } if (++rep0 == 0) break; } @@ -470,7 +432,7 @@ unpack_lzma_stream(int src_fd, int dst_fd) len += LZMA_MATCH_MIN_LEN; IF_NOT_FEATURE_LZMA_FAST(string:) do { - pos = buffer_pos - rep0; + uint32_t pos = buffer_pos - rep0; while (pos >= header.dict_size) pos += header.dict_size; previous_byte = buffer[pos]; |