diff options
author | Denis Vlasenko <vda.linux@googlemail.com> | 2007-10-14 07:49:48 +0000 |
---|---|---|
committer | Denis Vlasenko <vda.linux@googlemail.com> | 2007-10-14 07:49:48 +0000 |
commit | 6a9154b6f649341870bc06e896d2fe7235a4aef9 (patch) | |
tree | b80d1603d370838166ada58b42cd6e0b8c7c6970 /archival/bz | |
parent | 3f5fdc7572d932f33f81029956b87230c9b05182 (diff) |
bzip2: eliminate some divisions
Diffstat (limited to 'archival/bz')
-rw-r--r-- | archival/bz/blocksort.c | 64 | ||||
-rw-r--r-- | archival/bz/compress.c | 20 | ||||
-rw-r--r-- | archival/bz/huffman.c | 2 |
3 files changed, 51 insertions, 35 deletions
diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c index ec8a2a56b..aaed883de 100644 --- a/archival/bz/blocksort.c +++ b/archival/bz/blocksort.c @@ -246,7 +246,12 @@ void fallbackSort(uint32_t* fmap, for (i = 0; i < 257; i++) ftab[i] = 0; for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; - for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; + + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i < 257; i++) { + j += ftab[i]; + ftab[i] = j; + } for (i = 0; i < nblock; i++) { j = eclass8[i]; @@ -255,7 +260,7 @@ void fallbackSort(uint32_t* fmap, fmap[k] = i; } - nBhtab = 2 + (nblock / 32); + nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ for (i = 0; i < nBhtab; i++) bhtab[i] = 0; for (i = 0; i < 256; i++) SET_BH(ftab[i]); @@ -737,27 +742,27 @@ void mainSort(uint32_t* ptr, memset(ftab, 0, 65537 * sizeof(ftab[0])); j = block[0] << 8; - i = nblock-1; + i = nblock - 1; /* 3%, +300 bytes */ #if CONFIG_BZIP2_FEATURE_SPEED >= 2 for (; i >= 3; i -= 4) { quadrant[i] = 0; - j = (j >> 8) |(((uint16_t)block[i]) << 8); + j = (j >> 8) | (((uint16_t)block[i]) << 8); ftab[j]++; quadrant[i-1] = 0; - j = (j >> 8) |(((uint16_t)block[i-1]) << 8); + j = (j >> 8) | (((uint16_t)block[i-1]) << 8); ftab[j]++; quadrant[i-2] = 0; - j = (j >> 8) |(((uint16_t)block[i-2]) << 8); + j = (j >> 8) | (((uint16_t)block[i-2]) << 8); ftab[j]++; quadrant[i-3] = 0; - j = (j >> 8) |(((uint16_t)block[i-3]) << 8); + j = (j >> 8) | (((uint16_t)block[i-3]) << 8); ftab[j]++; } #endif for (; i >= 0; i--) { quadrant[i] = 0; - j = (j >> 8) |(((uint16_t)block[i]) << 8); + j = (j >> 8) | (((uint16_t)block[i]) << 8); ftab[j]++; } @@ -768,34 +773,37 @@ void mainSort(uint32_t* ptr, } /*-- Complete the initial radix sort --*/ - for (i = 1; i <= 65536; i++) - ftab[i] += ftab[i-1]; + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i <= 65536; i++) { + j += ftab[i]; + ftab[i] = j; + } s = block[0] << 8; - i = nblock-1; + i = nblock - 1; #if CONFIG_BZIP2_FEATURE_SPEED >= 2 for (; i >= 3; i -= 4) { s = (s >> 8) | (block[i] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i; s = (s >> 8) | (block[i-1] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i-1; s = (s >> 8) | (block[i-2] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i-2; s = (s >> 8) | (block[i-3] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i-3; } #endif for (; i >= 0; i--) { s = (s >> 8) | (block[i] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i; } @@ -812,21 +820,23 @@ void mainSort(uint32_t* ptr, { int32_t vv; - /* was: int32_t h = 1; */ + /* bbox: was: int32_t h = 1; */ /* do h = 3 * h + 1; while (h <= 256); */ - int32_t h = 364; + uint32_t h = 364; do { - h = h / 3; + /*h = h / 3;*/ + h = (h * 171) >> 9; /* bbox: fast h/3 */ for (i = h; i <= 255; i++) { vv = runningOrder[i]; j = i; while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { runningOrder[j] = runningOrder[j-h]; j = j - h; - if (j <= (h - 1)) goto zero; + if (j <= (h - 1)) + goto zero; } - zero: + zero: runningOrder[j] = vv; } } while (h != 1); @@ -860,10 +870,10 @@ void mainSort(uint32_t* ptr, if (j != ss) { sb = (ss << 8) + j; if (!(ftab[sb] & SETMASK)) { - int32_t lo = ftab[sb] & CLEARMASK; + int32_t lo = ftab[sb] & CLEARMASK; int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; if (hi > lo) { - mainQSort3 ( + mainQSort3( ptr, block, quadrant, nblock, lo, hi, BZ_N_RADIX, budget ); @@ -966,15 +976,14 @@ void mainSort(uint32_t* ptr, while ((bbSize >> shifts) > 65534) shifts++; for (j = bbSize-1; j >= 0; j--) { - int32_t a2update = ptr[bbStart + j]; - uint16_t qVal = (uint16_t)(j >> shifts); + int32_t a2update = ptr[bbStart + j]; + uint16_t qVal = (uint16_t)(j >> shifts); quadrant[a2update] = qVal; if (a2update < BZ_N_OVERSHOOT) quadrant[a2update + nblock] = qVal; } AssertH(((bbSize-1) >> shifts) <= 65535, 1002); } - } } @@ -1041,7 +1050,8 @@ void BZ2_blockSort(EState* s) s->origPtr = -1; for (i = 0; i < s->nblock; i++) if (ptr[i] == 0) { - s->origPtr = i; break; + s->origPtr = i; + break; }; AssertH(s->origPtr != -1, 1003); diff --git a/archival/bz/compress.c b/archival/bz/compress.c index 3e2fbd867..724474e2d 100644 --- a/archival/bz/compress.c +++ b/archival/bz/compress.c @@ -186,7 +186,8 @@ void generateMTFValues(EState* s) s->mtfFreq[BZ_RUNA]++; } if (zPend < 2) break; - zPend = (zPend - 2) / 2; + zPend = (uint32_t)(zPend - 2) / 2; + /* bbox: unsigned div is easier */ }; zPend = 0; } @@ -219,15 +220,18 @@ void generateMTFValues(EState* s) zPend--; while (1) { if (zPend & 1) { - mtfv[wr] = BZ_RUNB; wr++; + mtfv[wr] = BZ_RUNB; + wr++; s->mtfFreq[BZ_RUNB]++; } else { - mtfv[wr] = BZ_RUNA; wr++; + mtfv[wr] = BZ_RUNA; + wr++; s->mtfFreq[BZ_RUNA]++; } if (zPend < 2) break; - zPend = (zPend - 2) / 2; + zPend = (uint32_t)(zPend - 2) / 2; + /* bbox: unsigned div is easier */ }; zPend = 0; } @@ -288,7 +292,7 @@ void sendMTFValues(EState* s) gs = 0; while (nPart > 0) { tFreq = remF / nPart; - ge = gs-1; + ge = gs - 1; aFreq = 0; while (aFreq < tFreq && ge < alphaSize-1) { ge++; @@ -297,7 +301,7 @@ void sendMTFValues(EState* s) if (ge > gs && nPart != nGroups && nPart != 1 - && ((nGroups - nPart) % 2 == 1) + && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */ ) { aFreq -= s->mtfFreq[ge]; ge--; @@ -310,7 +314,7 @@ void sendMTFValues(EState* s) s->len[nPart-1][v] = BZ_GREATER_ICOST; nPart--; - gs = ge+1; + gs = ge + 1; remF -= aFreq; } } @@ -414,7 +418,7 @@ void sendMTFValues(EState* s) /* * Increment the symbol frequencies for the selected table. */ -/* 1% faster compress. +800 bytes */ +/* 1% faster compress. +800 bytes */ #if CONFIG_BZIP2_FEATURE_SPEED >= 4 if (nGroups == 6 && 50 == ge-gs+1) { /*--- fast track the common case ---*/ diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c index 3f80c9976..02838c496 100644 --- a/archival/bz/huffman.c +++ b/archival/bz/huffman.c @@ -183,6 +183,8 @@ void BZ2_hbMakeCodeLengths(uint8_t *len, for (i = 1; i <= alphaSize; i++) { j = weight[i] >> 8; + /* bbox: yes, it is a signed division. + * don't replace with shift! */ j = 1 + (j / 2); weight[i] = j << 8; } |