From 77f1ec1b9bf100e6c10aa0856c7156e321511785 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Sat, 13 Oct 2007 03:36:03 +0000 Subject: bzip2: port bzip2 1.0.4 to busybox. note: bzip2 code resides in separate directory (archival/bz/*) and is covered by BSD-style license. code size: 13k --- archival/Config.in | 16 +- archival/Kbuild | 1 + archival/bz/LICENSE | 44 ++ archival/bz/README | 90 +++ archival/bz/blocksort.c | 1128 ++++++++++++++++++++++++++++++ archival/bz/bzlib.c | 433 ++++++++++++ archival/bz/bzlib.h | 63 ++ archival/bz/bzlib_private.h | 234 +++++++ archival/bz/compress.c | 671 ++++++++++++++++++ archival/bz/crctable.c | 105 +++ archival/bz/huffman.c | 188 +++++ archival/bz/randtable.c | 85 +++ archival/bzip2.c | 166 +++++ archival/gzip.c | 11 +- archival/libunarchive/Kbuild | 14 +- archival/libunarchive/decompress_unzip.c | 2 +- include/applets.h | 1 + include/libbb.h | 3 + include/platform.h | 6 +- include/usage.h | 14 +- 20 files changed, 3253 insertions(+), 22 deletions(-) create mode 100644 archival/bz/LICENSE create mode 100644 archival/bz/README create mode 100644 archival/bz/blocksort.c create mode 100644 archival/bz/bzlib.c create mode 100644 archival/bz/bzlib.h create mode 100644 archival/bz/bzlib_private.h create mode 100644 archival/bz/compress.c create mode 100644 archival/bz/crctable.c create mode 100644 archival/bz/huffman.c create mode 100644 archival/bz/randtable.c create mode 100644 archival/bzip2.c diff --git a/archival/Config.in b/archival/Config.in index 49070da06..8d31ec771 100644 --- a/archival/Config.in +++ b/archival/Config.in @@ -48,12 +48,22 @@ config BUNZIP2 conventional LZ77/LZ78-based compressors, and approaches the performance of the PPM family of statistical compressors. - The BusyBox bunzip2 applet is limited to de-compression only. - On an x86 system, this applet adds about 11K. - Unless you have a specific application which requires bunzip2, you should probably say N here. +config BZIP2 + bool "bzip2" + default n + help + bzip2 is a compression utility using the Burrows-Wheeler block + sorting text compression algorithm, and Huffman coding. Compression + is generally considerably better than that achieved by more + conventional LZ77/LZ78-based compressors, and approaches the + performance of the PPM family of statistical compressors. + + Unless you have a specific application which requires bzip2, you + should probably say N here. + config CPIO bool "cpio" default n diff --git a/archival/Kbuild b/archival/Kbuild index 07b442f15..72dbdda0e 100644 --- a/archival/Kbuild +++ b/archival/Kbuild @@ -9,6 +9,7 @@ libs-y += libunarchive/ lib-y:= lib-$(CONFIG_AR) += ar.o lib-$(CONFIG_BUNZIP2) += bbunzip.o +lib-$(CONFIG_BZIP2) += bzip2.o bbunzip.o lib-$(CONFIG_UNLZMA) += bbunzip.o lib-$(CONFIG_CPIO) += cpio.o lib-$(CONFIG_DPKG) += dpkg.o diff --git a/archival/bz/LICENSE b/archival/bz/LICENSE new file mode 100644 index 000000000..f01f08022 --- /dev/null +++ b/archival/bz/LICENSE @@ -0,0 +1,44 @@ +bzip2 applet in busybox is based on lightly-modified source +of bzip2 version 1.0.4. bzip2 source is distributed +under the following conditions (copied verbatim from LICENSE file) +=========================================================== + + +This program, "bzip2", the associated library "libbzip2", and all +documentation, are copyright (C) 1996-2006 Julian R Seward. All +rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + +3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + +4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Julian Seward, Cambridge, UK. +jseward@bzip.org +bzip2/libbzip2 version 1.0.4 of 20 December 2006 diff --git a/archival/bz/README b/archival/bz/README new file mode 100644 index 000000000..ef06d67da --- /dev/null +++ b/archival/bz/README @@ -0,0 +1,90 @@ +This file is an abridged version of README from bizp2 1.0.4 +Build instructions (which are not relevant to busyboxed bzip2) +are removed. +=========================================================== + + +This is the README for bzip2/libzip2. +This version is fully compatible with the previous public releases. + +------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in this file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ + +Please read and be aware of the following: + + +WARNING: + + This program and library (attempts to) compress data by + performing several non-trivial transformations on it. + Unless you are 100% familiar with *all* the algorithms + contained herein, and with the consequences of modifying them, + you should NOT meddle with the compression or decompression + machinery. Incorrect changes can and very likely *will* + lead to disastrous loss of data. + + +DISCLAIMER: + + I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE + USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED. + + Every compression of a file implies an assumption that the + compressed file can be decompressed to reproduce the original. + Great efforts in design, coding and testing have been made to + ensure that this program works correctly. However, the complexity + of the algorithms, and, in particular, the presence of various + special cases in the code which occur with very low but non-zero + probability make it impossible to rule out the possibility of bugs + remaining in the program. DO NOT COMPRESS ANY DATA WITH THIS + PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER + SMALL, THAT THE DATA WILL NOT BE RECOVERABLE. + + That is not to say this program is inherently unreliable. + Indeed, I very much hope the opposite is true. bzip2/libbzip2 + has been carefully constructed and extensively tested. + + +PATENTS: + + To the best of my knowledge, bzip2/libbzip2 does not use any + patented algorithms. However, I do not have the resources + to carry out a patent search. Therefore I cannot give any + guarantee of the above statement. + + +I hope you find bzip2 useful. Feel free to contact me at + jseward@bzip.org +if you have any suggestions or queries. Many people mailed me with +comments, suggestions and patches after the releases of bzip-0.15, +bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1, +1.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this +feedback. I thank you for your comments. + +bzip2's "home" is http://www.bzip.org/ + +Julian Seward +jseward@bzip.org +Cambridge, UK. + +18 July 1996 (version 0.15) +25 August 1996 (version 0.21) + 7 August 1997 (bzip2, version 0.1) +29 August 1997 (bzip2, version 0.1pl2) +23 August 1998 (bzip2, version 0.9.0) + 8 June 1999 (bzip2, version 0.9.5) + 4 Sept 1999 (bzip2, version 0.9.5d) + 5 May 2000 (bzip2, version 1.0pre8) +30 December 2001 (bzip2, version 1.0.2pre1) +15 February 2005 (bzip2, version 1.0.3) +20 December 2006 (bzip2, version 1.0.4) diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c new file mode 100644 index 000000000..7d2856ba7 --- /dev/null +++ b/archival/bz/blocksort.c @@ -0,0 +1,1128 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Block sorting machinery ---*/ +/*--- blocksort.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------*/ +/*--- Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +inline +void fallbackSimpleSort(uint32_t* fmap, + uint32_t* eclass, + int32_t lo, + int32_t hi) +{ + int32_t i, j, tmp; + uint32_t ec_tmp; + + if (lo == hi) return; + + if (hi - lo > 3) { + for (i = hi-4; i >= lo; i--) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) + fmap[j-4] = fmap[j]; + fmap[j-4] = tmp; + } + } + + for (i = hi-1; i >= lo; i--) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) + fmap[j-1] = fmap[j]; + fmap[j-1] = tmp; + } +} + + +/*---------------------------------------------*/ +#define fswap(zz1, zz2) \ +{ \ + int32_t zztmp = zz1; \ + zz1 = zz2; \ + zz2 = zztmp; \ +} + +#define fvswap(zzp1, zzp2, zzn) \ +{ \ + int32_t yyp1 = (zzp1); \ + int32_t yyp2 = (zzp2); \ + int32_t yyn = (zzn); \ + while (yyn > 0) { \ + fswap(fmap[yyp1], fmap[yyp2]); \ + yyp1++; \ + yyp2++; \ + yyn--; \ + } \ +} + + +#define fmin(a,b) ((a) < (b)) ? (a) : (b) + +#define fpush(lz,hz) { \ + stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + sp++; \ +} + +#define fpop(lz,hz) { \ + sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ +} + +#define FALLBACK_QSORT_SMALL_THRESH 10 +#define FALLBACK_QSORT_STACK_SIZE 100 + + +static +void fallbackQSort3(uint32_t* fmap, + uint32_t* eclass, + int32_t loSt, + int32_t hiSt) +{ + int32_t unLo, unHi, ltLo, gtHi, n, m; + int32_t sp, lo, hi; + uint32_t med, r, r3; + int32_t stackLo[FALLBACK_QSORT_STACK_SIZE]; + int32_t stackHi[FALLBACK_QSORT_STACK_SIZE]; + + r = 0; + + sp = 0; + fpush(loSt, hiSt); + + while (sp > 0) { + AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004); + + fpop(lo, hi); + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort(fmap, eclass, lo, hi); + continue; + } + + /* Random partitioning. Median of 3 sometimes fails to + * avoid bad cases. Median of 9 seems to help but + * looks rather expensive. This too seems to work but + * is cheaper. Guidance for the magic constants + * 7621 and 32768 is taken from Sedgewick's algorithms + * book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + r3 = r % 3; + if (r3 == 0) + med = eclass[fmap[lo]]; + else if (r3 == 1) + med = eclass[fmap[(lo+hi)>>1]]; + else + med = eclass[fmap[hi]]; + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) break; + n = (int32_t)eclass[fmap[unLo]] - (int32_t)med; + if (n == 0) { + fswap(fmap[unLo], fmap[ltLo]); + ltLo++; + unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) break; + n = (int32_t)eclass[fmap[unHi]] - (int32_t)med; + if (n == 0) { + fswap(fmap[unHi], fmap[gtHi]); + gtHi--; unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; + } + + AssertD(unHi == unLo-1, "fallbackQSort3(2)"); + + if (gtHi < ltLo) continue; + + n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); + m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush(lo, n); + fpush(m, hi); + } else { + fpush(m, hi); + fpush(lo, n); + } + } +} + +#undef fmin +#undef fpush +#undef fpop +#undef fswap +#undef fvswap +#undef FALLBACK_QSORT_SMALL_THRESH +#undef FALLBACK_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + * nblock > 0 + * eclass exists for [0 .. nblock-1] + * ((UChar*)eclass) [0 .. nblock-1] holds block + * ptr exists for [0 .. nblock-1] + * + * Post: + * ((UChar*)eclass) [0 .. nblock-1] holds block + * All other areas of eclass destroyed + * fmap [0 .. nblock-1] holds sorted order + * bhtab[0 .. 2+(nblock/32)] destroyed +*/ + +#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) +#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) +#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) +#define WORD_BH(zz) bhtab[(zz) >> 5] +#define UNALIGNED_BH(zz) ((zz) & 0x01f) + +static +void fallbackSort(uint32_t* fmap, + uint32_t* eclass, + uint32_t* bhtab, + int32_t nblock) +{ + int32_t ftab[257]; + int32_t ftabCopy[256]; + int32_t H, i, j, k, l, r, cc, cc1; + int32_t nNotDone; + int32_t nBhtab; + UChar* eclass8 = (UChar*)eclass; + + /* + * Initial 1-char radix sort to generate + * initial fmap and initial BH bits. + */ + for (i = 0; i < 257; i++) ftab[i] = 0; + for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; + for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; + for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; + + for (i = 0; i < nblock; i++) { + j = eclass8[i]; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 2 + (nblock / 32); + for (i = 0; i < nBhtab; i++) bhtab[i] = 0; + for (i = 0; i < 256; i++) SET_BH(ftab[i]); + + /* + * Inductively refine the buckets. Kind-of an + * "exponential radix sort" (!), inspired by the + * Manber-Myers suffix array construction algorithm. + */ + + /*-- set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + SET_BH(nblock + 2*i); + CLEAR_BH(nblock + 2*i + 1); + } + + /*-- the log(N) loop --*/ + H = 1; + while (1) { + j = 0; + for (i = 0; i < nblock; i++) { + if (ISSET_BH(i)) + j = i; + k = fmap[i] - H; + if (k < 0) + k += nblock; + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (1) { + + /*-- find the next non-singleton bucket --*/ + k = r + 1; + while (ISSET_BH(k) && UNALIGNED_BH(k)) + k++; + if (ISSET_BH(k)) { + while (WORD_BH(k) == 0xffffffff) k += 32; + while (ISSET_BH(k)) k++; + } + l = k - 1; + if (l >= nblock) + break; + while (!ISSET_BH(k) && UNALIGNED_BH(k)) + k++; + if (!ISSET_BH(k)) { + while (WORD_BH(k) == 0x00000000) k += 32; + while (!ISSET_BH(k)) k++; + } + r = k - 1; + if (r >= nblock) + break; + + /*-- now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3(fmap, eclass, l, r); + + /*-- scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { + SET_BH(i); + cc = cc1; + }; + } + } + } + + H *= 2; + if (H > nblock || nNotDone == 0) + break; + } + + /* + * Reconstruct the original block in + * eclass8 [0 .. nblock-1], since the + * previous phase destroyed it. + */ + j = 0; + for (i = 0; i < nblock; i++) { + while (ftabCopy[j] == 0) + j++; + ftabCopy[j]--; + eclass8[fmap[i]] = (UChar)j; + } + AssertH(j < 256, 1005); +} + +#undef SET_BH +#undef CLEAR_BH +#undef ISSET_BH +#undef WORD_BH +#undef UNALIGNED_BH + + +/*---------------------------------------------*/ +/*--- The main, O(N^2 log(N)) sorting ---*/ +/*--- algorithm. Faster for "normal" ---*/ +/*--- non-repetitive blocks. ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +inline +Bool mainGtU( + uint32_t i1, + uint32_t i2, + UChar* block, + uint16_t* quadrant, + uint32_t nblock, + int32_t* budget) +{ + int32_t k; + UChar c1, c2; + uint16_t s1, s2; + +///unrolling + AssertD(i1 != i2, "mainGtU"); + /* 1 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 2 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 3 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 4 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 5 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 6 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 7 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 8 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 9 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 10 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 11 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + /* 12 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + + k = nblock + 8; + +///unrolling + do { + /* 1 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 2 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 3 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 4 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 5 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 6 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 7 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + /* 8 */ + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + + if (i1 >= nblock) i1 -= nblock; + if (i2 >= nblock) i2 -= nblock; + + k -= 8; + (*budget)--; + } while (k >= 0); + + return False; +} + + +/*---------------------------------------------*/ +/* + * Knuth's increments seem to work better + * than Incerpi-Sedgewick here. Possibly + * because the number of elems to sort is + * usually small, typically <= 20. + */ +static +const int32_t incs[14] = { + 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484 +}; + +static +void mainSimpleSort(uint32_t* ptr, + UChar* block, + uint16_t* quadrant, + int32_t nblock, + int32_t lo, + int32_t hi, + int32_t d, + int32_t* budget) +{ + int32_t i, j, h, bigN, hp; + uint32_t v; + + bigN = hi - lo + 1; + if (bigN < 2) return; + + hp = 0; + while (incs[hp] < bigN) hp++; + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (1) { + +///unrolling + /*-- copy 1 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 2 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 3 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU (ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + if (*budget < 0) return; + } + } +} + + +/*---------------------------------------------*/ +/* + * The following is an implementation of + * an elegant 3-way quicksort for strings, + * described in a paper "Fast Algorithms for + * Sorting and Searching Strings", by Robert + * Sedgewick and Jon L. Bentley. + */ + +#define mswap(zz1, zz2) \ +{ \ + int32_t zztmp = zz1; \ + zz1 = zz2; \ + zz2 = zztmp; \ +} + +#define mvswap(zzp1, zzp2, zzn) \ +{ \ + int32_t yyp1 = (zzp1); \ + int32_t yyp2 = (zzp2); \ + int32_t yyn = (zzn); \ + while (yyn > 0) { \ + mswap(ptr[yyp1], ptr[yyp2]); \ + yyp1++; \ + yyp2++; \ + yyn--; \ + } \ +} + +static +inline +UChar mmed3(UChar a, UChar b, UChar c) +{ + UChar t; + if (a > b) { + t = a; + a = b; + b = t; + }; + if (b > c) { + b = c; + if (a > b) + b = a; + } + return b; +} + +#define mmin(a,b) ((a) < (b)) ? (a) : (b) + +#define mpush(lz,hz,dz) \ +{ \ + stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + stackD [sp] = dz; \ + sp++; \ +} + +#define mpop(lz,hz,dz) \ +{ \ + sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ + dz = stackD [sp]; \ +} + + +#define mnextsize(az) (nextHi[az]-nextLo[az]) + +#define mnextswap(az,bz) \ +{ \ + int32_t tz; \ + tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ + tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ + tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \ +} + +#define MAIN_QSORT_SMALL_THRESH 20 +#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) +#define MAIN_QSORT_STACK_SIZE 100 + +static +void mainQSort3(uint32_t* ptr, + UChar* block, + uint16_t* quadrant, + int32_t nblock, + int32_t loSt, + int32_t hiSt, + int32_t dSt, + int32_t* budget) +{ + int32_t unLo, unHi, ltLo, gtHi, n, m, med; + int32_t sp, lo, hi, d; + + int32_t stackLo[MAIN_QSORT_STACK_SIZE]; + int32_t stackHi[MAIN_QSORT_STACK_SIZE]; + int32_t stackD [MAIN_QSORT_STACK_SIZE]; + + int32_t nextLo[3]; + int32_t nextHi[3]; + int32_t nextD [3]; + + sp = 0; + mpush(loSt, hiSt, dSt); + + while (sp > 0) { + AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001); + + mpop(lo, hi, d); + if (hi - lo < MAIN_QSORT_SMALL_THRESH + || d > MAIN_QSORT_DEPTH_THRESH + ) { + mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget); + if (*budget < 0) + return; + continue; + } + + med = (int32_t) mmed3(block[ptr[lo ] + d], + block[ptr[hi ] + d], + block[ptr[(lo+hi) >> 1] + d]); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) + break; + n = ((int32_t)block[ptr[unLo]+d]) - med; + if (n == 0) { + mswap(ptr[unLo], ptr[ltLo]); + ltLo++; + unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) + break; + n = ((int32_t)block[ptr[unHi]+d]) - med; + if (n == 0) { + mswap(ptr[unHi], ptr[gtHi]); + gtHi--; + unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) + break; + mswap(ptr[unLo], ptr[unHi]); + unLo++; + unHi--; + } + + AssertD(unHi == unLo-1, "mainQSort3(2)"); + + if (gtHi < ltLo) { + mpush(lo, hi, d + 1); + continue; + } + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; + nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; + nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; + + if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); + if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); + if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); + + AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)"); + AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)"); + + mpush (nextLo[0], nextHi[0], nextD[0]); + mpush (nextLo[1], nextHi[1], nextD[1]); + mpush (nextLo[2], nextHi[2], nextD[2]); + } +} + +#undef mswap +#undef mvswap +#undef mpush +#undef mpop +#undef mmin +#undef mnextsize +#undef mnextswap +#undef MAIN_QSORT_SMALL_THRESH +#undef MAIN_QSORT_DEPTH_THRESH +#undef MAIN_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + * nblock > N_OVERSHOOT + * block32 exists for [0 .. nblock-1 +N_OVERSHOOT] + * ((UChar*)block32) [0 .. nblock-1] holds block + * ptr exists for [0 .. nblock-1] + * + * Post: + * ((UChar*)block32) [0 .. nblock-1] holds block + * All other areas of block32 destroyed + * ftab[0 .. 65536] destroyed + * ptr [0 .. nblock-1] holds sorted order + * if (*budget < 0), sorting was abandoned + */ + +#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) +#define SETMASK (1 << 21) +#define CLEARMASK (~(SETMASK)) + +static NOINLINE +void mainSort(uint32_t* ptr, + UChar* block, + uint16_t* quadrant, + uint32_t* ftab, + int32_t nblock, + int32_t* budget) +{ + int32_t i, j, k, ss, sb; + int32_t runningOrder[256]; + Bool bigDone[256]; + int32_t copyStart[256]; + int32_t copyEnd [256]; + UChar c1; + int32_t numQSorted; + uint16_t s; + + /*-- set up the 2-byte frequency table --*/ + /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ + memset(ftab, 0, 65537 * sizeof(ftab[0])); + + j = block[0] << 8; + i = nblock-1; +#if 0 + for (; i >= 3; i -= 4) { + quadrant[i] = 0; + j = (j >> 8) |(((uint16_t)block[i]) << 8); + ftab[j]++; + quadrant[i-1] = 0; + j = (j >> 8) |(((uint16_t)block[i-1]) << 8); + ftab[j]++; + quadrant[i-2] = 0; + j = (j >> 8) |(((uint16_t)block[i-2]) << 8); + ftab[j]++; + quadrant[i-3] = 0; + j = (j >> 8) |(((uint16_t)block[i-3]) << 8); + ftab[j]++; + } +#endif + for (; i >= 0; i--) { + quadrant[i] = 0; + j = (j >> 8) |(((uint16_t)block[i]) << 8); + ftab[j]++; + } + + /*-- (emphasises close relationship of block & quadrant) --*/ + for (i = 0; i < BZ_N_OVERSHOOT; i++) { + block [nblock+i] = block[i]; + quadrant[nblock+i] = 0; + } + + /*-- Complete the initial radix sort --*/ + for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; + + s = block[0] << 8; + i = nblock-1; +#if 0 + for (; i >= 3; i -= 4) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i; + s = (s >> 8) | (block[i-1] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-1; + s = (s >> 8) | (block[i-2] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-2; + s = (s >> 8) | (block[i-3] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i-3; + } +#endif + for (; i >= 0; i--) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] -1; + ftab[s] = j; + ptr[j] = i; + } + + /* + * Now ftab contains the first loc of every small bucket. + * Calculate the running order, from smallest to largest + * big bucket. + */ + for (i = 0; i <= 255; i++) { + bigDone [i] = False; + runningOrder[i] = i; + } + + { + int32_t vv; + /* was: int32_t h = 1; */ + /* do h = 3 * h + 1; while (h <= 256); */ + int32_t h = 364; + + do { + h = h / 3; + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { + runningOrder[j] = runningOrder[j-h]; + j = j - h; + if (j <= (h - 1)) goto zero; + } + zero: + runningOrder[j] = vv; + } + } while (h != 1); + } + + /* + * The main sorting loop. + */ + + numQSorted = 0; + + for (i = 0; i <= 255; i++) { + + /* + * Process big buckets, starting with the least full. + * Basically this is a 3-step process in which we call + * mainQSort3 to sort the small buckets [ss, j], but + * also make a big effort to avoid the calls if we can. + */ + ss = runningOrder[i]; + + /* + * Step 1: + * Complete the big bucket [ss] by quicksorting + * any unsorted small buckets [ss, j], for j != ss. + * Hopefully previous pointer-scanning phases have already + * completed many of the small buckets [ss, j], so + * we don't have to sort them at all. + */ + for (j = 0; j <= 255; j++) { + if (j != ss) { + sb = (ss << 8) + j; + if (!(ftab[sb] & SETMASK)) { + int32_t lo = ftab[sb] & CLEARMASK; + int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3 ( + ptr, block, quadrant, nblock, + lo, hi, BZ_N_RADIX, budget + ); + numQSorted += (hi - lo + 1); + if (*budget < 0) return; + } + } + ftab[sb] |= SETMASK; + } + } + + AssertH(!bigDone[ss], 1006); + + /* + * Step 2: + * Now scan this big bucket [ss] so as to synthesise the + * sorted order for small buckets [t, ss] for all t, + * including, magically, the bucket [ss,ss] too. + * This will avoid doing Real Work in subsequent Step 1's. + */ + { + for (j = 0; j <= 255; j++) { + copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; + copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; + } + for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { + k = ptr[j] - 1; + if (k < 0) + k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[copyStart[c1]++] = k; + } + for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { + k = ptr[j]-1; + if (k < 0) + k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[copyEnd[c1]--] = k; + } + } + + AssertH((copyStart[ss]-1 == copyEnd[ss]) + || + /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. + * Necessity for this case is demonstrated by compressing + * a sequence of approximately 48.5 million of character + * 251; 1.0.0/1.0.1 will then die here. */ + (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), + 1007) + + for (j = 0; j <= 255; j++) + ftab[(j << 8) + ss] |= SETMASK; + + /* + * Step 3: + * The [ss] big bucket is now done. Record this fact, + * and update the quadrant descriptors. Remember to + * update quadrants in the overshoot area too, if + * necessary. The "if (i < 255)" test merely skips + * this updating for the last bucket processed, since + * updating for the last bucket is pointless. + * + * The quadrant array provides a way to incrementally + * cache sort orderings, as they appear, so as to + * make subsequent comparisons in fullGtU() complete + * faster. For repetitive blocks this makes a big + * difference (but not big enough to be able to avoid + * the fallback sorting mechanism, exponential radix sort). + * + * The precise meaning is: at all times: + * + * for 0 <= i < nblock and 0 <= j <= nblock + * + * if block[i] != block[j], + * + * then the relative values of quadrant[i] and + * quadrant[j] are meaningless. + * + * else { + * if quadrant[i] < quadrant[j] + * then the string starting at i lexicographically + * precedes the string starting at j + * + * else if quadrant[i] > quadrant[j] + * then the string starting at j lexicographically + * precedes the string starting at i + * + * else + * the relative ordering of the strings starting + * at i and j has not yet been determined. + * } + */ + bigDone[ss] = True; + + if (i < 255) { + int32_t bbStart = ftab[ss << 8] & CLEARMASK; + int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; + int32_t shifts = 0; + + while ((bbSize >> shifts) > 65534) shifts++; + + for (j = bbSize-1; j >= 0; j--) { + int32_t a2update = ptr[bbStart + j]; + uint16_t qVal = (uint16_t)(j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZ_N_OVERSHOOT) + quadrant[a2update + nblock] = qVal; + } + AssertH(((bbSize-1) >> shifts) <= 65535, 1002); + } + + } +} + +#undef BIGFREQ +#undef SETMASK +#undef CLEARMASK + + +/*---------------------------------------------*/ +/* Pre: + * nblock > 0 + * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] + * ((UChar*)arr2)[0 .. nblock-1] holds block + * arr1 exists for [0 .. nblock-1] + * + * Post: + * ((UChar*)arr2) [0 .. nblock-1] holds block + * All other areas of block destroyed + * ftab[0 .. 65536] destroyed + * arr1[0 .. nblock-1] holds sorted order + */ +static NOINLINE +void BZ2_blockSort(EState* s) +{ + /* In original bzip2 1.0.4, it's a parameter, but 30 + * should work ok. */ + enum { wfact = 30 }; + + uint32_t* ptr = s->ptr; + UChar* block = s->block; + uint32_t* ftab = s->ftab; + int32_t nblock = s->nblock; + uint16_t* quadrant; + int32_t budget; + int32_t i; + + if (nblock < 10000) { + fallbackSort(s->arr1, s->arr2, ftab, nblock); + } else { + /* Calculate the location for quadrant, remembering to get + * the alignment right. Assumes that &(block[0]) is at least + * 2-byte aligned -- this should be ok since block is really + * the first section of arr2. + */ + i = nblock + BZ_N_OVERSHOOT; + if (i & 1) i++; + quadrant = (uint16_t*)(&(block[i])); + + /* (wfact-1) / 3 puts the default-factor-30 + * transition point at very roughly the same place as + * with v0.1 and v0.9.0. + * Not that it particularly matters any more, since the + * resulting compressed stream is now the same regardless + * of whether or not we use the main sort or fallback sort. + */ + budget = nblock * ((wfact-1) / 3); + + mainSort(ptr, block, quadrant, ftab, nblock, &budget); + if (budget < 0) { + fallbackSort(s->arr1, s->arr2, ftab, nblock); + } + } + + s->origPtr = -1; + for (i = 0; i < s->nblock; i++) + if (ptr[i] == 0) { + s->origPtr = i; break; + }; + + AssertH(s->origPtr != -1, 1003); +} + + +/*-------------------------------------------------------------*/ +/*--- end blocksort.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib.c b/archival/bz/bzlib.c new file mode 100644 index 000000000..3125bb0bf --- /dev/null +++ b/archival/bz/bzlib.c @@ -0,0 +1,433 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Library top-level functions. ---*/ +/*--- bzlib.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* CHANGES + * 0.9.0 -- original version. + * 0.9.0a/b -- no changes in this file. + * 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress(). + * fixed bzWrite/bzRead to ignore zero-length requests. + * fixed bzread to correctly handle read requests after EOF. + * wrong parameter order in call to bzDecompressInit in + * bzBuffToBuffDecompress. Fixed. + */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +/*--- Compression stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +#ifndef BZ_NO_STDIO +static void bz_assert_fail(int errcode) +{ + /* if (errcode == 1007) bb_error_msg_and_die("probably bad RAM"); */ + bb_error_msg_and_die("bzip2 internal error %d", errcode); +} +#endif + + +/*---------------------------------------------------*/ +static +void prepare_new_block(EState* s) +{ + int32_t i; + s->nblock = 0; + s->numZ = 0; + s->state_out_pos = 0; + BZ_INITIALISE_CRC(s->blockCRC); + for (i = 0; i < 256; i++) s->inUse[i] = False; + s->blockNo++; +} + + +/*---------------------------------------------------*/ +static +ALWAYS_INLINE +void init_RL(EState* s) +{ + s->state_in_ch = 256; + s->state_in_len = 0; +} + + +static +Bool isempty_RL(EState* s) +{ + if (s->state_in_ch < 256 && s->state_in_len > 0) + return False; + return True; +} + + +/*---------------------------------------------------*/ +static +void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k) +{ + int32_t n; + EState* s; + + s = xzalloc(sizeof(EState)); + s->strm = strm; + + n = 100000 * blockSize100k; + s->arr1 = xmalloc(n * sizeof(uint32_t)); + s->mtfv = (uint16_t*)s->arr1; + s->ptr = (uint32_t*)s->arr1; + s->arr2 = xmalloc((n + BZ_N_OVERSHOOT) * sizeof(uint32_t)); + s->block = (UChar*)s->arr2; + s->ftab = xmalloc(65537 * sizeof(uint32_t)); + + s->state = BZ_S_INPUT; + s->mode = BZ_M_RUNNING; + s->blockSize100k = blockSize100k; + s->nblockMAX = n - 19; + + strm->state = s; + /*strm->total_in = 0;*/ + strm->total_out = 0; + init_RL(s); + prepare_new_block(s); +} + + +/*---------------------------------------------------*/ +static +void add_pair_to_block(EState* s) +{ + int32_t i; + UChar ch = (UChar)(s->state_in_ch); + for (i = 0; i < s->state_in_len; i++) { + BZ_UPDATE_CRC(s->blockCRC, ch); + } + s->inUse[s->state_in_ch] = True; + switch (s->state_in_len) { + case 1: + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + case 2: + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + case 3: + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + break; + default: + s->inUse[s->state_in_len-4] = True; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = (UChar)ch; s->nblock++; + s->block[s->nblock] = ((UChar)(s->state_in_len-4)); + s->nblock++; + break; + } +} + + +/*---------------------------------------------------*/ +static +void flush_RL(EState* s) +{ + if (s->state_in_ch < 256) add_pair_to_block(s); + init_RL(s); +} + + +/*---------------------------------------------------*/ +#define ADD_CHAR_TO_BLOCK(zs, zchh0) \ +{ \ + uint32_t zchh = (uint32_t)(zchh0); \ + /*-- fast track the common case --*/ \ + if (zchh != zs->state_in_ch && zs->state_in_len == 1) { \ + UChar ch = (UChar)(zs->state_in_ch); \ + BZ_UPDATE_CRC(zs->blockCRC, ch); \ + zs->inUse[zs->state_in_ch] = True; \ + zs->block[zs->nblock] = (UChar)ch; \ + zs->nblock++; \ + zs->state_in_ch = zchh; \ + } \ + else \ + /*-- general, uncommon cases --*/ \ + if (zchh != zs->state_in_ch || \ + zs->state_in_len == 255) { \ + if (zs->state_in_ch < 256) \ + add_pair_to_block(zs); \ + zs->state_in_ch = zchh; \ + zs->state_in_len = 1; \ + } else { \ + zs->state_in_len++; \ + } \ +} + + +/*---------------------------------------------------*/ +static +Bool copy_input_until_stop(EState* s) +{ + Bool progress_in = False; + +//vda: cannot simplify this until avail_in_expect is removed + if (s->mode == BZ_M_RUNNING) { + /*-- fast track the common case --*/ + while (1) { + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + progress_in = True; + ADD_CHAR_TO_BLOCK(s, (uint32_t)(*((UChar*)(s->strm->next_in)))); + s->strm->next_in++; + s->strm->avail_in--; + /*s->strm->total_in++;*/ + } + } else { + /*-- general, uncommon case --*/ + while (1) { + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- flush/finish end? --*/ + if (s->avail_in_expect == 0) break; + progress_in = True; + ADD_CHAR_TO_BLOCK(s, (uint32_t)(*((UChar*)(s->strm->next_in)))); + s->strm->next_in++; + s->strm->avail_in--; + /*s->strm->total_in++;*/ + s->avail_in_expect--; + } + } + return progress_in; +} + + +/*---------------------------------------------------*/ +static +Bool copy_output_until_stop(EState* s) +{ + Bool progress_out = False; + + while (1) { + + /*-- no output space? --*/ + if (s->strm->avail_out == 0) break; + + /*-- block done? --*/ + if (s->state_out_pos >= s->numZ) break; + + progress_out = True; + *(s->strm->next_out) = s->zbits[s->state_out_pos]; + s->state_out_pos++; + s->strm->avail_out--; + s->strm->next_out++; + s->strm->total_out++; + } + + return progress_out; +} + + +/*---------------------------------------------------*/ +static +Bool handle_compress(bz_stream *strm) +{ + Bool progress_in = False; + Bool progress_out = False; + EState* s = strm->state; + + while (1) { + if (s->state == BZ_S_OUTPUT) { + progress_out |= copy_output_until_stop(s); + if (s->state_out_pos < s->numZ) break; + if (s->mode == BZ_M_FINISHING + && s->avail_in_expect == 0 + && isempty_RL(s)) + break; + prepare_new_block(s); + s->state = BZ_S_INPUT; + if (s->mode == BZ_M_FLUSHING + && s->avail_in_expect == 0 + && isempty_RL(s)) + break; + } + + if (s->state == BZ_S_INPUT) { + progress_in |= copy_input_until_stop(s); + if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { + flush_RL(s); + BZ2_compressBlock(s, (Bool)(s->mode == BZ_M_FINISHING)); + s->state = BZ_S_OUTPUT; + } else + if (s->nblock >= s->nblockMAX) { + BZ2_compressBlock(s, False); + s->state = BZ_S_OUTPUT; + } else + if (s->strm->avail_in == 0) { + break; + } + } + + } + + return progress_in || progress_out; +} + + +/*---------------------------------------------------*/ +static +int BZ2_bzCompress(bz_stream *strm, int action) +{ + Bool progress; + EState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + preswitch: + switch (s->mode) { + + case BZ_M_IDLE: + return BZ_SEQUENCE_ERROR; + + case BZ_M_RUNNING: + if (action == BZ_RUN) { + progress = handle_compress(strm); + return progress ? BZ_RUN_OK : BZ_PARAM_ERROR; + } + else + if (action == BZ_FLUSH) { + s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FLUSHING; + goto preswitch; + } + else + if (action == BZ_FINISH) { + s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FINISHING; + goto preswitch; + } + else + return BZ_PARAM_ERROR; + + case BZ_M_FLUSHING: + if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR; + progress = handle_compress(strm); + if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + return BZ_FLUSH_OK; + s->mode = BZ_M_RUNNING; + return BZ_RUN_OK; + + case BZ_M_FINISHING: + if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR; + progress = handle_compress(strm); + if (!progress) return BZ_SEQUENCE_ERROR; + if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + return BZ_FINISH_OK; + s->mode = BZ_M_IDLE; + return BZ_STREAM_END; + } + return BZ_OK; /*--not reached--*/ +} + + +/*---------------------------------------------------*/ +static +int BZ2_bzCompressEnd(bz_stream *strm) +{ + EState* s; + if (strm == NULL) return BZ_PARAM_ERROR; + s = strm->state; + if (s == NULL) return BZ_PARAM_ERROR; + if (s->strm != strm) return BZ_PARAM_ERROR; + + if (s->arr1 != NULL) free(s->arr1); + if (s->arr2 != NULL) free(s->arr2); + if (s->ftab != NULL) free(s->ftab); + free(strm->state); + + strm->state = NULL; + + return BZ_OK; +} + + +/*---------------------------------------------------*/ +/*--- Misc convenience stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +#ifdef EXAMPLE_CODE_FOR_MEM_TO_MEM_COMPRESSION +static +int BZ2_bzBuffToBuffCompress(char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL || + source == NULL || + blockSize100k < 1 || blockSize100k > 9) + return BZ_PARAM_ERROR; + + BZ2_bzCompressInit(&strm, blockSize100k); + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = BZ2_bzCompress(&strm, BZ_FINISH); + if (ret == BZ_FINISH_OK) goto output_overflow; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + BZ2_bzCompressEnd(&strm); + return BZ_OK; + + output_overflow: + BZ2_bzCompressEnd(&strm); + return BZ_OUTBUFF_FULL; + + errhandler: + BZ2_bzCompressEnd(&strm); + return ret; +} +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib.h b/archival/bz/bzlib.h new file mode 100644 index 000000000..805e9b328 --- /dev/null +++ b/archival/bz/bzlib.h @@ -0,0 +1,63 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Public header file for the library. ---*/ +/*--- bzlib.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +#define BZ_RUN 0 +#define BZ_FLUSH 1 +#define BZ_FINISH 2 + +#define BZ_OK 0 +#define BZ_RUN_OK 1 +#define BZ_FLUSH_OK 2 +#define BZ_FINISH_OK 3 +#define BZ_STREAM_END 4 +#define BZ_SEQUENCE_ERROR (-1) +#define BZ_PARAM_ERROR (-2) +#define BZ_MEM_ERROR (-3) +#define BZ_DATA_ERROR (-4) +#define BZ_DATA_ERROR_MAGIC (-5) +#define BZ_IO_ERROR (-6) +#define BZ_UNEXPECTED_EOF (-7) +#define BZ_OUTBUFF_FULL (-8) +#define BZ_CONFIG_ERROR (-9) + +typedef struct bz_stream { + char *next_in; + char *next_out; + unsigned avail_in; + unsigned avail_out; + /*unsigned long long total_in;*/ + unsigned long long total_out; + void *state; +} bz_stream; + +/*-- Core (low-level) library functions --*/ + +static void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k); +static int BZ2_bzCompress(bz_stream *strm, int action); +static int BZ2_bzCompressEnd(bz_stream *strm); + +/*-------------------------------------------------------------*/ +/*--- end bzlib.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib_private.h b/archival/bz/bzlib_private.h new file mode 100644 index 000000000..24ffbee9c --- /dev/null +++ b/archival/bz/bzlib_private.h @@ -0,0 +1,234 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Private header file for the library. ---*/ +/*--- bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib.h" */ + +#define BZ_DEBUG 0 +//#define BZ_NO_STDIO 1 - does not work + + +/*-- General stuff. --*/ + +typedef unsigned char Bool; +typedef unsigned char UChar; + +#define True ((Bool)1) +#define False ((Bool)0) + +static void bz_assert_fail(int errcode) ATTRIBUTE_NORETURN; +#define AssertH(cond, errcode) \ +{ \ + if (!(cond)) \ + bz_assert_fail(errcode); \ +} + +#if BZ_DEBUG +#define AssertD(cond, msg) \ +{ \ + if (!(cond)) \ + bb_error_msg_and_die("(debug build): internal error %s", msg); \ +} +#else +#define AssertD(cond, msg) do { } while (0) +#endif + + +/*-- Header bytes. --*/ + +#define BZ_HDR_B 0x42 /* 'B' */ +#define BZ_HDR_Z 0x5a /* 'Z' */ +#define BZ_HDR_h 0x68 /* 'h' */ +#define BZ_HDR_0 0x30 /* '0' */ + +#define BZ_HDR_BZh0 0x425a6830 + +/*-- Constants for the back end. --*/ + +#define BZ_MAX_ALPHA_SIZE 258 +#define BZ_MAX_CODE_LEN 23 + +#define BZ_RUNA 0 +#define BZ_RUNB 1 + +#define BZ_N_GROUPS 6 +#define BZ_G_SIZE 50 +#define BZ_N_ITERS 4 + +#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) + + +/*-- Stuff for randomising repetitive blocks. --*/ + +static const int32_t BZ2_rNums[512]; + +#define BZ_RAND_DECLS \ + int32_t rNToGo; \ + int32_t rTPos \ + +#define BZ_RAND_INIT_MASK \ + s->rNToGo = 0; \ + s->rTPos = 0 \ + +#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0) + +#define BZ_RAND_UPD_MASK \ +{ \ + if (s->rNToGo == 0) { \ + s->rNToGo = BZ2_rNums[s->rTPos]; \ + s->rTPos++; \ + if (s->rTPos == 512) s->rTPos = 0; \ + } \ + s->rNToGo--; \ +} + + +/*-- Stuff for doing CRCs. --*/ + +static const uint32_t BZ2_crc32Table[256]; + +#define BZ_INITIALISE_CRC(crcVar) \ +{ \ + crcVar = 0xffffffffL; \ +} + +#define BZ_FINALISE_CRC(crcVar) \ +{ \ + crcVar = ~(crcVar); \ +} + +#define BZ_UPDATE_CRC(crcVar,cha) \ +{ \ + crcVar = (crcVar << 8) ^ BZ2_crc32Table[(crcVar >> 24) ^ ((UChar)cha)]; \ +} + + +/*-- States and modes for compression. --*/ + +#define BZ_M_IDLE 1 +#define BZ_M_RUNNING 2 +#define BZ_M_FLUSHING 3 +#define BZ_M_FINISHING 4 + +#define BZ_S_OUTPUT 1 +#define BZ_S_INPUT 2 + +#define BZ_N_RADIX 2 +#define BZ_N_QSORT 12 +#define BZ_N_SHELL 18 +#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) + + +/*-- Structure holding all the compression-side stuff. --*/ + +typedef struct EState { + /* pointer back to the struct bz_stream */ + bz_stream *strm; + + /* mode this stream is in, and whether inputting */ + /* or outputting data */ + int32_t mode; + int32_t state; + + /* remembers avail_in when flush/finish requested */ + uint32_t avail_in_expect; //vda: do we need this? + + /* for doing the block sorting */ + uint32_t *arr1; + uint32_t *arr2; + uint32_t *ftab; + int32_t origPtr; + + /* aliases for arr1 and arr2 */ + uint32_t *ptr; + UChar *block; + uint16_t *mtfv; + UChar *zbits; + + /* run-length-encoding of the input */ + uint32_t state_in_ch; + int32_t state_in_len; + BZ_RAND_DECLS; + + /* input and output limits and current posns */ + int32_t nblock; + int32_t nblockMAX; + int32_t numZ; + int32_t state_out_pos; + + /* the buffer for bit stream creation */ + uint32_t bsBuff; + int32_t bsLive; + + /* block and combined CRCs */ + uint32_t blockCRC; + uint32_t combinedCRC; + + /* misc administratium */ + int32_t blockNo; + int32_t blockSize100k; + + /* stuff for coding the MTF values */ + int32_t nMTF; + + /* map of bytes used in block */ + int32_t nInUse; + Bool inUse[256]; + UChar unseqToSeq[256]; + + /* stuff for coding the MTF values */ + int32_t mtfFreq [BZ_MAX_ALPHA_SIZE]; + UChar selector [BZ_MAX_SELECTORS]; + UChar selectorMtf[BZ_MAX_SELECTORS]; + + UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + int32_t code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + int32_t rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; +#ifdef FAST_GROUP6 + /* second dimension: only 3 needed; 4 makes index calculations faster */ + uint32_t len_pack[BZ_MAX_ALPHA_SIZE][4]; +#endif +} EState; + + +/*-- compression. --*/ + +static void +BZ2_blockSort(EState*); + +static void +BZ2_compressBlock(EState*, Bool); + +static void +BZ2_bsInitWrite(EState*); + +static void +BZ2_hbAssignCodes(int32_t*, UChar*, int32_t, int32_t, int32_t); + +static void +BZ2_hbMakeCodeLengths(UChar*, int32_t*, int32_t, int32_t); + +/*-------------------------------------------------------------*/ +/*--- end bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/compress.c b/archival/bz/compress.c new file mode 100644 index 000000000..54426dcc7 --- /dev/null +++ b/archival/bz/compress.c @@ -0,0 +1,671 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Compression machinery (not incl block sorting) ---*/ +/*--- compress.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* CHANGES + * 0.9.0 -- original version. + * 0.9.0a/b -- no changes in this file. + * 0.9.0c -- changed setting of nGroups in sendMTFValues() + * so as to do a bit better on small files +*/ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +/*--- Bit stream I/O ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void BZ2_bsInitWrite(EState* s) +{ + s->bsLive = 0; + s->bsBuff = 0; +} + + +/*---------------------------------------------------*/ +static NOINLINE +void bsFinishWrite(EState* s) +{ + while (s->bsLive > 0) { + s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } +} + + +/*---------------------------------------------------*/ +static +/* Forced inlining results in +600 bytes code, + * 2% faster compression. Not worth it. */ +/*ALWAYS_INLINE*/ +void bsW(EState* s, int32_t n, uint32_t v) +{ + while (s->bsLive >= 8) { + s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } + s->bsBuff |= (v << (32 - s->bsLive - n)); + s->bsLive += n; +} + + +/*---------------------------------------------------*/ +static +void bsPutU32(EState* s, uint32_t u) +{ + bsW(s, 8, (u >> 24) & 0xff); + bsW(s, 8, (u >> 16) & 0xff); + bsW(s, 8, (u >> 8) & 0xff); + bsW(s, 8, u & 0xff); +} + + +/*---------------------------------------------------*/ +static +void bsPutUChar(EState* s, UChar c) +{ + bsW(s, 8, (uint32_t)c); +} + + +/*---------------------------------------------------*/ +/*--- The back end proper ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void makeMaps_e(EState* s) +{ + int32_t i; + s->nInUse = 0; + for (i = 0; i < 256; i++) { + if (s->inUse[i]) { + s->unseqToSeq[i] = s->nInUse; + s->nInUse++; + } + } +} + + +/*---------------------------------------------------*/ +static NOINLINE +void generateMTFValues(EState* s) +{ + UChar yy[256]; + int32_t i, j; + int32_t zPend; + int32_t wr; + int32_t EOB; + + /* + * After sorting (eg, here), + * s->arr1[0 .. s->nblock-1] holds sorted order, + * and + * ((UChar*)s->arr2)[0 .. s->nblock-1] + * holds the original block data. + * + * The first thing to do is generate the MTF values, + * and put them in + * ((uint16_t*)s->arr1)[0 .. s->nblock-1]. + * Because there are strictly fewer or equal MTF values + * than block values, ptr values in this area are overwritten + * with MTF values only when they are no longer needed. + * + * The final compressed bitstream is generated into the + * area starting at + * (UChar*) (&((UChar*)s->arr2)[s->nblock]) + * + * These storage aliases are set up in bzCompressInit(), + * except for the last one, which is arranged in + * compressBlock(). + */ + uint32_t* ptr = s->ptr; + UChar* block = s->block; + uint16_t* mtfv = s->mtfv; + + makeMaps_e(s); + EOB = s->nInUse+1; + + for (i = 0; i <= EOB; i++) + s->mtfFreq[i] = 0; + + wr = 0; + zPend = 0; + for (i = 0; i < s->nInUse; i++) + yy[i] = (UChar) i; + + for (i = 0; i < s->nblock; i++) { + UChar ll_i; + AssertD(wr <= i, "generateMTFValues(1)"); + j = ptr[i]-1; + if (j < 0) + j += s->nblock; + ll_i = s->unseqToSeq[block[j]]; + AssertD(ll_i < s->nInUse, "generateMTFValues(2a)"); + + if (yy[0] == ll_i) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (1) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + { + register UChar rtmp; + register UChar* ryy_j; + register UChar rll_i; + rtmp = yy[1]; + yy[1] = yy[0]; + ryy_j = &(yy[1]); + rll_i = ll_i; + while (rll_i != rtmp) { + register UChar rtmp2; + ryy_j++; + rtmp2 = rtmp; + rtmp = *ryy_j; + *ryy_j = rtmp2; + }; + yy[0] = rtmp; + j = ryy_j - &(yy[0]); + mtfv[wr] = j+1; + wr++; + s->mtfFreq[j+1]++; + } + + } + } + + if (zPend > 0) { + zPend--; + while (1) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) + break; + zPend = (zPend - 2) / 2; + }; + zPend = 0; + } + + mtfv[wr] = EOB; + wr++; + s->mtfFreq[EOB]++; + + s->nMTF = wr; +} + + +/*---------------------------------------------------*/ +#define BZ_LESSER_ICOST 0 +#define BZ_GREATER_ICOST 15 + +static NOINLINE +void sendMTFValues(EState* s) +{ + int32_t v, t, i, j, gs, ge, totc, bt, bc, iter; + int32_t nSelectors, alphaSize, minLen, maxLen, selCtr; + int32_t nGroups, nBytes; + + /* + * UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * is a global since the decoder also needs it. + * + * int32_t code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * are also globals only used in this proc. + * Made global to keep stack frame size small. + */ + + uint16_t cost[BZ_N_GROUPS]; + int32_t fave[BZ_N_GROUPS]; + + uint16_t* mtfv = s->mtfv; + + alphaSize = s->nInUse+2; + for (t = 0; t < BZ_N_GROUPS; t++) + for (v = 0; v < alphaSize; v++) + s->len[t][v] = BZ_GREATER_ICOST; + + /*--- Decide how many coding tables to use ---*/ + AssertH(s->nMTF > 0, 3001); + if (s->nMTF < 200) nGroups = 2; else + if (s->nMTF < 600) nGroups = 3; else + if (s->nMTF < 1200) nGroups = 4; else + if (s->nMTF < 2400) nGroups = 5; else + nGroups = 6; + + /*--- Generate an initial set of coding tables ---*/ + { + int32_t nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = s->nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs-1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize-1) { + ge++; + aFreq += s->mtfFreq[ge]; + } + + if (ge > gs + && nPart != nGroups && nPart != 1 + && ((nGroups-nPart) % 2 == 1) + ) { + aFreq -= s->mtfFreq[ge]; + ge--; + } + + for (v = 0; v < alphaSize; v++) + if (v >= gs && v <= ge) + s->len[nPart-1][v] = BZ_LESSER_ICOST; + else + s->len[nPart-1][v] = BZ_GREATER_ICOST; + + nPart--; + gs = ge+1; + remF -= aFreq; + } + } + + /* + * Iterate up to BZ_N_ITERS times to improve the tables. + */ + for (iter = 0; iter < BZ_N_ITERS; iter++) { + for (t = 0; t < nGroups; t++) + fave[t] = 0; + + for (t = 0; t < nGroups; t++) + for (v = 0; v < alphaSize; v++) + s->rfreq[t][v] = 0; + +#ifdef FAST_GROUP6 + /* + * Set up an auxiliary length table which is used to fast-track + * the common case (nGroups == 6). + */ + if (nGroups == 6) { + for (v = 0; v < alphaSize; v++) { + s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; + s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; + s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; + } + } +#endif + + nSelectors = 0; + totc = 0; + gs = 0; + while (1) { + /*--- Set group start & end marks. --*/ + if (gs >= s->nMTF) + break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) + ge = s->nMTF-1; + + /* + * Calculate the cost of this group as coded + * by each of the coding tables. + */ + for (t = 0; t < nGroups; t++) + cost[t] = 0; +#ifdef FAST_GROUP6 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + register uint32_t cost01, cost23, cost45; + register uint16_t icv; + cost01 = cost23 = cost45 = 0; +#define BZ_ITER(nn) \ + icv = mtfv[gs+(nn)]; \ + cost01 += s->len_pack[icv][0]; \ + cost23 += s->len_pack[icv][1]; \ + cost45 += s->len_pack[icv][2]; + BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); + BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); + BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); + BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); + BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); + BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); + BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); + BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); + BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); + BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); +#undef BZ_ITER + cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; + cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; + cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; + + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + uint16_t icv = mtfv[i]; + for (t = 0; t < nGroups; t++) + cost[t] += s->len[t][icv]; + } + } + /* + * Find the coding table which is best for this group, + * and record its identity in the selector table. + */ + bc = 999999999; + bt = -1; + //bc = cost[0]; + //bt = 0; + for (t = 0; t < nGroups; t++) { + if (cost[t] < bc) { + bc = cost[t]; + bt = t; + } + } + totc += bc; + fave[bt]++; + s->selector[nSelectors] = bt; + nSelectors++; + + /* + * Increment the symbol frequencies for the selected table. + */ +/* ~0.5% faster compress. +800 bytes */ +#if 0 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ +#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++ + BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); + BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); + BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); + BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); + BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); + BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); + BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); + BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); + BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); + BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); +#undef BZ_ITUR + gs = ge+1; + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + while (gs <= ge) { + s->rfreq[bt][mtfv[gs]]++; + gs++; + } + /* already is: gs = ge+1; */ + } + } + + /* + * Recompute the tables based on the accumulated frequencies. + */ + /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See + * comment in huffman.c for details. */ + for (t = 0; t < nGroups; t++) + BZ2_hbMakeCodeLengths(&(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/); + } + + AssertH(nGroups < 8, 3002); + AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003); + + /*--- Compute MTF values for the selectors. ---*/ + { + UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; + + for (i = 0; i < nGroups; i++) + pos[i] = i; + for (i = 0; i < nSelectors; i++) { + ll_i = s->selector[i]; + j = 0; + tmp = pos[j]; + while (ll_i != tmp) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + }; + pos[0] = tmp; + s->selectorMtf[i] = j; + } + }; + + /*--- Assign actual codes for the tables. --*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + AssertH(!(maxLen > 17 /*20*/), 3004); + AssertH(!(minLen < 1), 3005); + BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize); + } + + /*--- Transmit the mapping table. ---*/ + { + Bool inUse16[16]; + for (i = 0; i < 16; i++) { + inUse16[i] = False; + for (j = 0; j < 16; j++) + if (s->inUse[i * 16 + j]) + inUse16[i] = True; + } + + nBytes = s->numZ; + for (i = 0; i < 16; i++) { + if (inUse16[i]) + bsW(s, 1, 1); + else + bsW(s, 1, 0); + } + + for (i = 0; i < 16; i++) { + if (inUse16[i]) { + for (j = 0; j < 16; j++) { + if (s->inUse[i * 16 + j]) + bsW(s, 1, 1); + else + bsW(s, 1, 0); + } + } + } + } + + /*--- Now the selectors. ---*/ + nBytes = s->numZ; + bsW(s, 3, nGroups); + bsW(s, 15, nSelectors); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < s->selectorMtf[i]; j++) + bsW(s, 1, 1); + bsW(s, 1, 0); + } + + /*--- Now the coding tables. ---*/ + nBytes = s->numZ; + + for (t = 0; t < nGroups; t++) { + int32_t curr = s->len[t][0]; + bsW(s, 5, curr); + for (i = 0; i < alphaSize; i++) { + while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ }; + while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ }; + bsW(s, 1, 0); + } + } + + /*--- And finally, the block data proper ---*/ + nBytes = s->numZ; + selCtr = 0; + gs = 0; + while (1) { + if (gs >= s->nMTF) + break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) + ge = s->nMTF-1; + AssertH(s->selector[selCtr] < nGroups, 3006); + +/* Costs 1300 bytes and is _slower_ (on Intel Core 2) */ +#if 0 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + uint16_t mtfv_i; + UChar* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]); + int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); +#define BZ_ITAH(nn) \ + mtfv_i = mtfv[gs+(nn)]; \ + bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i]) + BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); + BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); + BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); + BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); + BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); + BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); + BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); + BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); + BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); + BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); +#undef BZ_ITAH + gs = ge+1; + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + /* code is bit bigger, but moves multiply out of the loop */ + UChar* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]); + int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); + while (gs <= ge) { + bsW(s, + s_len_sel_selCtr[mtfv[gs]], + s_code_sel_selCtr[mtfv[gs]] + ); + gs++; + } + /* already is: gs = ge+1; */ + } + selCtr++; + } + AssertH(selCtr == nSelectors, 3007); +} + + +/*---------------------------------------------------*/ +static +void BZ2_compressBlock(EState* s, Bool is_last_block) +{ + if (s->nblock > 0) { + BZ_FINALISE_CRC(s->blockCRC); + s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); + s->combinedCRC ^= s->blockCRC; + if (s->blockNo > 1) + s->numZ = 0; + + BZ2_blockSort(s); + } + + s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); + + /*-- If this is the first block, create the stream header. --*/ + if (s->blockNo == 1) { + BZ2_bsInitWrite(s); + /*bsPutUChar(s, BZ_HDR_B);*/ + /*bsPutUChar(s, BZ_HDR_Z);*/ + /*bsPutUChar(s, BZ_HDR_h);*/ + /*bsPutUChar(s, (UChar)(BZ_HDR_0 + s->blockSize100k));*/ + bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k); + } + + if (s->nblock > 0) { + /*bsPutUChar(s, 0x31);*/ + /*bsPutUChar(s, 0x41);*/ + /*bsPutUChar(s, 0x59);*/ + /*bsPutUChar(s, 0x26);*/ + bsPutU32(s, 0x31415926); + bsPutUChar(s, 0x53); + bsPutUChar(s, 0x59); + + /*-- Now the block's CRC, so it is in a known place. --*/ + bsPutU32(s, s->blockCRC); + + /* + * Now a single bit indicating (non-)randomisation. + * As of version 0.9.5, we use a better sorting algorithm + * which makes randomisation unnecessary. So always set + * the randomised bit to 'no'. Of course, the decoder + * still needs to be able to handle randomised blocks + * so as to maintain backwards compatibility with + * older versions of bzip2. + */ + bsW(s, 1, 0); + + bsW(s, 24, s->origPtr); + generateMTFValues(s); + sendMTFValues(s); + } + + /*-- If this is the last block, add the stream trailer. --*/ + if (is_last_block) { + /*bsPutUChar(s, 0x17);*/ + /*bsPutUChar(s, 0x72);*/ + /*bsPutUChar(s, 0x45);*/ + /*bsPutUChar(s, 0x38);*/ + bsPutU32(s, 0x17724538); + bsPutUChar(s, 0x50); + bsPutUChar(s, 0x90); + bsPutU32(s, s->combinedCRC); + bsFinishWrite(s); + } +} + + +/*-------------------------------------------------------------*/ +/*--- end compress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/crctable.c b/archival/bz/crctable.c new file mode 100644 index 000000000..dadd372d3 --- /dev/null +++ b/archival/bz/crctable.c @@ -0,0 +1,105 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Table for doing CRCs ---*/ +/*--- crctable.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +/* + * I think this is an implementation of the AUTODIN-II, + * Ethernet & FDDI 32-bit CRC standard. Vaguely derived + * from code by Rob Warnock, in Section 51 of the + * comp.compression FAQ. + */ + +static const uint32_t BZ2_crc32Table[256] = { + 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, + 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, + 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, + 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, + 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, + 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, + 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, + 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, + 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, + 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, + 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, + 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, + 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, + 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, + 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, + 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, + 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, + 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, + 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, + 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, + 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, + 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, + 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, + 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, + 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, + 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, + 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, + 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, + 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, + 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, + 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, + 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, + 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, + 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, + 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, + 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, + 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, + 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, + 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, + 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, + 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, + 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, + 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, + 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, + 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, + 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, + 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, + 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, + 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, + 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, + 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, + 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, + 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, + 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, + 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, + 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, + 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, + 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, + 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, + 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, + 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, + 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, + 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, + 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L +}; + + +/*-------------------------------------------------------------*/ +/*--- end crctable.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c new file mode 100644 index 000000000..d01af11c2 --- /dev/null +++ b/archival/bz/huffman.c @@ -0,0 +1,188 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Huffman coding low-level stuff ---*/ +/*--- huffman.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) +#define DEPTHOF(zz1) ((zz1) & 0x000000ff) +#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) + +#define ADDWEIGHTS(zw1,zw2) \ + (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ + (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) + +#define UPHEAP(z) \ +{ \ + int32_t zz, tmp; \ + zz = z; \ + tmp = heap[zz]; \ + while (weight[tmp] < weight[heap[zz >> 1]]) { \ + heap[zz] = heap[zz >> 1]; \ + zz >>= 1; \ + } \ + heap[zz] = tmp; \ +} + +#define DOWNHEAP(z) \ +{ \ + int32_t zz, yy, tmp; \ + zz = z; tmp = heap[zz]; \ + while (1) { \ + yy = zz << 1; \ + if (yy > nHeap) \ + break; \ + if (yy < nHeap \ + && weight[heap[yy+1]] < weight[heap[yy]]) \ + yy++; \ + if (weight[tmp] < weight[heap[yy]]) \ + break; \ + heap[zz] = heap[yy]; \ + zz = yy; \ + } \ + heap[zz] = tmp; \ +} + + +/*---------------------------------------------------*/ +static +void BZ2_hbMakeCodeLengths(UChar *len, + int32_t *freq, + int32_t alphaSize, + int32_t maxLen) +{ + /* + * Nodes and heap entries run from 1. Entry 0 + * for both the heap and nodes is a sentinel. + */ + int32_t nNodes, nHeap, n1, n2, i, j, k; + Bool tooLong; + + int32_t heap [BZ_MAX_ALPHA_SIZE + 2]; + int32_t weight[BZ_MAX_ALPHA_SIZE * 2]; + int32_t parent[BZ_MAX_ALPHA_SIZE * 2]; + + for (i = 0; i < alphaSize; i++) + weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + + while (1) { + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + UPHEAP(nHeap); + } + + AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001); + + while (nHeap > 1) { + n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); + n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); + nNodes++; + parent[n1] = parent[n2] = nNodes; + weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + UPHEAP(nHeap); + } + + AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002); + + tooLong = False; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { + k = parent[k]; + j++; + } + len[i-1] = j; + if (j > maxLen) + tooLong = True; + } + + if (!tooLong) + break; + + /* 17 Oct 04: keep-going condition for the following loop used + to be 'i < alphaSize', which missed the last element, + theoretically leading to the possibility of the compressor + looping. However, this count-scaling step is only needed if + one of the generated Huffman code words is longer than + maxLen, which up to and including version 1.0.2 was 20 bits, + which is extremely unlikely. In version 1.0.3 maxLen was + changed to 17 bits, which has minimal effect on compression + ratio, but does mean this scaling step is used from time to + time, enough to verify that it works. + + This means that bzip2-1.0.3 and later will only produce + Huffman codes with a maximum length of 17 bits. However, in + order to preserve backwards compatibility with bitstreams + produced by versions pre-1.0.3, the decompressor must still + handle lengths of up to 20. */ + + for (i = 1; i <= alphaSize; i++) { + j = weight[i] >> 8; + j = 1 + (j / 2); + weight[i] = j << 8; + } + } +} + + +/*---------------------------------------------------*/ +static +void BZ2_hbAssignCodes(int32_t *code, + UChar *length, + int32_t minLen, + int32_t maxLen, + int32_t alphaSize) +{ + int32_t n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) { + if (length[i] == n) { + code[i] = vec; + vec++; + }; + } + vec <<= 1; + } +} + + +/*-------------------------------------------------------------*/ +/*--- end huffman.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/randtable.c b/archival/bz/randtable.c new file mode 100644 index 000000000..8a3c562da --- /dev/null +++ b/archival/bz/randtable.c @@ -0,0 +1,85 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Table for randomising repetitive blocks ---*/ +/*--- randtable.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +static const int32_t BZ2_rNums[512] = { + 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, + 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, + 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, + 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, + 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, + 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, + 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, + 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, + 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, + 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, + 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, + 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, + 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, + 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, + 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, + 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, + 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, + 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, + 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, + 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, + 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, + 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, + 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, + 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, + 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, + 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, + 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, + 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, + 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, + 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, + 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, + 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, + 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, + 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, + 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, + 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, + 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, + 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, + 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, + 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, + 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, + 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, + 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, + 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, + 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, + 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, + 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, + 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, + 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, + 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, + 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, + 936, 638 +}; + +/*-------------------------------------------------------------*/ +/*--- end randtable.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bzip2.c b/archival/bzip2.c new file mode 100644 index 000000000..04478eecc --- /dev/null +++ b/archival/bzip2.c @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2007 Denys Vlasenko + * + * This file uses bzip2 library code which is written + * by Julian Seward . + * See README and LICENSE files in bz/ directory for more information + * about bzip2 library code. + */ + +#include "libbb.h" + +/* This buys 6% speed for nearly 4k code */ +/*#define FAST_GROUP6 1*/ + +#include "bz/bzlib.h" + +#include "bz/bzlib_private.h" + +#include "bz/blocksort.c" +#include "bz/bzlib.c" +#include "bz/compress.c" +#include "bz/crctable.c" +#include "bz/huffman.c" +#include "bz/randtable.c" + +/* No point in being shy and having very small buffer here. + * bzip2 internal buffers are much bigger anyway, hundreds of kbytes. + * If iobuf is several pages long, malloc() may use mmap, + * making iobuf is page aligned and thus (maybe) have one memcpy less + * if kernel is clever enough. + */ +enum { + IOBUF_SIZE = 8 * 1024 +}; + +/* Returns: + * <0 on write errors (examine errno), + * >0 on short writes (errno == 0) + * 0 no error (entire input consume, gimme more) + * on "impossible" errors (internal bzip2 compressor bug) dies + */ +static +ssize_t bz_write(bz_stream *strm, void* rbuf, ssize_t rlen, void *wbuf) +{ + int n, n2, ret; + + /* if (len == 0) return 0; */ + + strm->avail_in = rlen; + strm->next_in = rbuf; + while (1) { + strm->avail_out = IOBUF_SIZE; + strm->next_out = wbuf; + + ret = BZ2_bzCompress(strm, BZ_RUN); + if (ret != BZ_RUN_OK) + bb_error_msg_and_die("internal error %d", ret); + + n = IOBUF_SIZE - strm->avail_out; + if (n) { + /* short reads must have errno == 0 */ + errno = 0; + n2 = full_write(STDOUT_FILENO, wbuf, n); + if (n2 != n) + return n2 ? n2 : 1; + } + + if (strm->avail_in == 0) + return 0; + } +} + + +/*---------------------------------------------------*/ +static +USE_DESKTOP(long long) int bz_write_tail(bz_stream *strm, void *wbuf) +{ + int n, n2, ret; + USE_DESKTOP(long long) int total; + + total = -1; + while (1) { + strm->avail_out = IOBUF_SIZE; + strm->next_out = wbuf; + + ret = BZ2_bzCompress(strm, BZ_FINISH); + if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) + bb_error_msg_and_die("internal error %d", ret); + + n = IOBUF_SIZE - strm->avail_out; + if (n) { + n2 = full_write(STDOUT_FILENO, wbuf, n); + if (n2 != n) + goto err; + } + + if (ret == BZ_STREAM_END) + break; + } + + total = 0 USE_DESKTOP( + strm->total_out ); + err: + BZ2_bzCompressEnd(strm); + return total; +} + + +static +USE_DESKTOP(long long) int compressStream(void) +{ + USE_DESKTOP(long long) int total; + ssize_t count; + bz_stream bzs; /* it's small */ +#define strm (&bzs) + char *iobuf; +#define rbuf iobuf +#define wbuf (iobuf + IOBUF_SIZE) + + iobuf = xmalloc(2 * IOBUF_SIZE); + + BZ2_bzCompressInit(strm, 9 /*blockSize100k*/); + + while (1) { + count = full_read(STDIN_FILENO, rbuf, IOBUF_SIZE); + if (count < 0) + bb_perror_msg("read error"); + if (count <= 0) + break; + count = bz_write(strm, rbuf, count, wbuf); + if (count) { + bb_perror_msg(count < 0 ? "write error" : "short write"); + break; + } + } + + total = bz_write_tail(strm, wbuf); + free(iobuf); + /* we had no error _only_ if count == 0 */ + return count == 0 ? total : -1; +} + +static +char* make_new_name_bzip2(char *filename) +{ + return xasprintf("%s.bz2", filename); +} + +int bzip2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int bzip2_main(int argc, char **argv) +{ + unsigned opt; + + /* Must match bbunzip's constants OPT_STDOUT, OPT_FORCE! */ + opt = getopt32(argv, "cfv" USE_BUNZIP2("d") "q123456789" ); +#if ENABLE_BUNZIP2 /* bunzip2_main may not be visible... */ + if (opt & 0x8) // -d + return bunzip2_main(argc, argv); +#endif + option_mask32 &= 0x7; /* ignore -q, -0..9 */ + //if (opt & 0x1) // -c + //if (opt & 0x2) // -f + //if (opt & 0x4) // -v + argv += optind; + + return bbunpack(argv, make_new_name_bzip2, compressStream); +} diff --git a/archival/gzip.c b/archival/gzip.c index bc7502e70..00299b17c 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -2032,15 +2032,14 @@ int gzip_main(int argc, char **argv) /* Must match bbunzip's constants OPT_STDOUT, OPT_FORCE! */ opt = getopt32(argv, "cfv" USE_GUNZIP("d") "q123456789" ); - option_mask32 &= 0x7; /* Clear -d, ignore -q, -0..9 */ - //if (opt & 0x1) // -c - //if (opt & 0x2) // -f - //if (opt & 0x4) // -v #if ENABLE_GUNZIP /* gunzip_main may not be visible... */ - if (opt & 0x8) { // -d + if (opt & 0x8) // -d return gunzip_main(argc, argv); - } #endif + option_mask32 &= 0x7; /* ignore -q, -0..9 */ + //if (opt & 0x1) // -c + //if (opt & 0x2) // -f + //if (opt & 0x4) // -v argv += optind; PTR_TO_GLOBALS = xzalloc(sizeof(struct globals) + sizeof(struct globals2)) diff --git a/archival/libunarchive/Kbuild b/archival/libunarchive/Kbuild index a58a84f4b..1bc054a96 100644 --- a/archival/libunarchive/Kbuild +++ b/archival/libunarchive/Kbuild @@ -28,8 +28,6 @@ lib-y:= \ find_list_entry.o \ init_handle.o -GUNZIP_FILES:= decompress_unzip.o - DPKG_FILES:= \ get_header_ar.o \ unpack_ar_archive.o \ @@ -51,19 +49,19 @@ lib-$(CONFIG_UNLZMA) += decompress_unlzma.o lib-$(CONFIG_CPIO) += get_header_cpio.o lib-$(CONFIG_DPKG) += $(DPKG_FILES) lib-$(CONFIG_DPKG_DEB) += $(DPKG_FILES) -lib-$(CONFIG_FEATURE_DEB_TAR_GZ) += $(GUNZIP_FILES) get_header_tar_gz.o +lib-$(CONFIG_FEATURE_DEB_TAR_GZ) += decompress_unzip.o get_header_tar_gz.o lib-$(CONFIG_FEATURE_DEB_TAR_BZ2) += decompress_bunzip2.o get_header_tar_bz2.o lib-$(CONFIG_FEATURE_DEB_TAR_LZMA) += decompress_unlzma.o get_header_tar_lzma.o -lib-$(CONFIG_GUNZIP) += $(GUNZIP_FILES) +lib-$(CONFIG_GUNZIP) += decompress_unzip.o lib-$(CONFIG_FEATURE_GUNZIP_UNCOMPRESS) += decompress_uncompress.o -lib-$(CONFIG_RPM2CPIO) += $(GUNZIP_FILES) get_header_cpio.o -lib-$(CONFIG_RPM) += $(GUNZIP_FILES) get_header_cpio.o +lib-$(CONFIG_RPM2CPIO) += decompress_unzip.o get_header_cpio.o +lib-$(CONFIG_RPM) += decompress_unzip.o get_header_cpio.o lib-$(CONFIG_FEATURE_RPM_BZ2) += decompress_bunzip2.o lib-$(CONFIG_TAR) += get_header_tar.o lib-$(CONFIG_FEATURE_TAR_BZIP2) += decompress_bunzip2.o get_header_tar_bz2.o lib-$(CONFIG_FEATURE_TAR_LZMA) += decompress_unlzma.o get_header_tar_lzma.o -lib-$(CONFIG_FEATURE_TAR_GZIP) += $(GUNZIP_FILES) get_header_tar_gz.o +lib-$(CONFIG_FEATURE_TAR_GZIP) += decompress_unzip.o get_header_tar_gz.o lib-$(CONFIG_FEATURE_TAR_COMPRESS) += decompress_uncompress.o lib-$(CONFIG_UNCOMPRESS) += decompress_uncompress.o -lib-$(CONFIG_UNZIP) += $(GUNZIP_FILES) +lib-$(CONFIG_UNZIP) += decompress_unzip.o lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o diff --git a/archival/libunarchive/decompress_unzip.c b/archival/libunarchive/decompress_unzip.c index 52be6b25d..0572bee68 100644 --- a/archival/libunarchive/decompress_unzip.c +++ b/archival/libunarchive/decompress_unzip.c @@ -323,7 +323,7 @@ static int huft_build(const unsigned *b, const unsigned n, } while (--i); if (c[0] == n) { /* null input - all zero length codes */ *m = 0; - return 2; + return 2; } /* Find minimum and maximum length, bound *m by those */ diff --git a/include/applets.h b/include/applets.h index ceab00334..29a5d09e3 100644 --- a/include/applets.h +++ b/include/applets.h @@ -88,6 +88,7 @@ USE_BBCONFIG(APPLET(bbconfig, _BB_DIR_BIN, _BB_SUID_NEVER)) //USE_BBSH(APPLET(bbsh, _BB_DIR_BIN, _BB_SUID_NEVER)) USE_BUNZIP2(APPLET(bunzip2, _BB_DIR_USR_BIN, _BB_SUID_NEVER)) USE_BUNZIP2(APPLET_ODDNAME(bzcat, bunzip2, _BB_DIR_USR_BIN, _BB_SUID_NEVER, bzcat)) +USE_BZIP2(APPLET(bzip2, _BB_DIR_USR_BIN, _BB_SUID_NEVER)) USE_CAL(APPLET(cal, _BB_DIR_USR_BIN, _BB_SUID_NEVER)) USE_CAT(APPLET_NOFORK(cat, cat, _BB_DIR_BIN, _BB_SUID_NEVER, cat)) USE_CATV(APPLET(catv, _BB_DIR_BIN, _BB_SUID_NEVER)) diff --git a/include/libbb.h b/include/libbb.h index be548a306..af385e232 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -709,6 +709,9 @@ int chown_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; #if ENABLE_GUNZIP int gunzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; #endif +#if ENABLE_BUNZIP2 +int bunzip2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +#endif int bbunpack(char **argv, char* (*make_new_name)(char *filename), USE_DESKTOP(long long) int (*unpacker)(void) diff --git a/include/platform.h b/include/platform.h index 53d72829f..edb0f8ab0 100644 --- a/include/platform.h +++ b/include/platform.h @@ -54,7 +54,8 @@ # define ATTRIBUTE_ALIGNED(m) __attribute__ ((__aligned__(m))) # if __GNUC_PREREQ (3,0) # define ALWAYS_INLINE __attribute__ ((always_inline)) inline -# define NOINLINE __attribute__((noinline)) +/* I've seen a toolchain where I needed __noinline__ instead of noinline */ +# define NOINLINE __attribute__((__noinline__)) # if !ENABLE_WERROR # define ATTRIBUTE_DEPRECATED __attribute__ ((__deprecated__)) # define ATTRIBUTE_UNUSED_RESULT __attribute__ ((warn_unused_result)) @@ -63,7 +64,8 @@ # define ATTRIBUTE_UNUSED_RESULT /* n/a */ # endif # else -# define ALWAYS_INLINE inline +# define ALWAYS_INLINE inline /* n/a */ +# define NOINLINE /* n/a */ # define ATTRIBUTE_DEPRECATED /* n/a */ # define ATTRIBUTE_UNUSED_RESULT /* n/a */ # endif diff --git a/include/usage.h b/include/usage.h index b53820d7b..4c697d380 100644 --- a/include/usage.h +++ b/include/usage.h @@ -127,6 +127,16 @@ " -c Write output to standard output\n" \ " -f Force" +#define bzip2_trivial_usage \ + "[OPTION]... [FILE]..." +#define bzip2_full_usage \ + "Compress FILE(s) with bzip2 algorithm.\n" \ + "When FILE is '-' or unspecified, reads standard input. Implies -c." \ + "\n\nOptions:\n" \ + " -c Write output to standard output instead of FILE.bz\n" \ + " -d Decompress\n" \ + " -f Force" + #define busybox_notes_usage \ "Hello world!\n" @@ -1201,7 +1211,7 @@ "Uncompress FILE (or standard input if FILE is '-')" \ "\n\nOptions:\n" \ " -c Write output to standard output\n" \ - " -f Force read when source is a terminal\n" \ + " -f Force\n" \ " -t Test compressed file integrity" #define gunzip_example_usage \ "$ ls -la /tmp/BusyBox*\n" \ @@ -1218,7 +1228,7 @@ "\n\nOptions:\n" \ " -c Write output to standard output instead of FILE.gz\n" \ " -d Decompress\n" \ - " -f Force write when destination is a terminal" + " -f Force" #define gzip_example_usage \ "$ ls -la /tmp/busybox*\n" \ "-rw-rw-r-- 1 andersen andersen 1761280 Apr 14 17:47 /tmp/busybox.tar\n" \ -- cgit v1.2.3