The Tor Browser: comparison modules/libbz2/src/blocksort.c

--1:000000000000
+:a3c26da154e0
+/*-------------------------------------------------------------*/
+/*--- Block sorting machinery                               ---*/
+/*---                                           blocksort.c ---*/
+/*-------------------------------------------------------------*/
+/* ------------------------------------------------------------------
+This file is part of bzip2/libbzip2, a program and library for
+lossless, block-sorting data compression.
+bzip2/libbzip2 version 1.0.4 of 20 December 2006
+Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
+Please read the WARNING, DISCLAIMER and PATENTS sections in the
+README file.
+This program is released under the terms of the license contained
+in the file LICENSE.
+------------------------------------------------------------------ */
+#include "bzlib_private.h"
+/*---------------------------------------------*/
+/*--- Fallback O(N log(N)^2) sorting        ---*/
+/*--- algorithm, for repetitive blocks      ---*/
+/*---------------------------------------------*/
+/*---------------------------------------------*/
+static
+__inline__
+void fallbackSimpleSort ( UInt32* fmap,
+UInt32* eclass,
+Int32   lo,
+Int32   hi )
+{
+Int32 i, j, tmp;
+UInt32 ec_tmp;
+if (lo == hi) return;
+if (hi - lo > 3) {
+for ( i = hi-4; i >= lo; i-- ) {
+tmp = fmap[i];
+ec_tmp = eclass[tmp];
+for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
+fmap[j-4] = fmap[j];
+fmap[j-4] = tmp;
+}
+}
+for ( i = hi-1; i >= lo; i-- ) {
+tmp = fmap[i];
+ec_tmp = eclass[tmp];
+for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
+fmap[j-1] = fmap[j];
+fmap[j-1] = tmp;
+}
+}
+/*---------------------------------------------*/
+#define fswap(zz1, zz2) \
+{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+#define fvswap(zzp1, zzp2, zzn)       \
+{                                     \
+Int32 yyp1 = (zzp1);               \
+Int32 yyp2 = (zzp2);               \
+Int32 yyn  = (zzn);                \
+while (yyn > 0) {                  \
+fswap(fmap[yyp1], fmap[yyp2]);  \
+yyp1++; yyp2++; yyn--;          \
+}                                  \
+}
+#define fmin(a,b) ((a) < (b)) ? (a) : (b)
+#define fpush(lz,hz) { stackLo[sp] = lz; \
+stackHi[sp] = hz; \
+sp++; }
+#define fpop(lz,hz) { sp--;              \
+lz = stackLo[sp];  \
+hz = stackHi[sp]; }
+#define FALLBACK_QSORT_SMALL_THRESH 10
+#define FALLBACK_QSORT_STACK_SIZE   100
+static
+void fallbackQSort3 ( UInt32* fmap,
+UInt32* eclass,
+Int32   loSt,
+Int32   hiSt )
+{
+Int32 unLo, unHi, ltLo, gtHi, n, m;
+Int32 sp, lo, hi;
+UInt32 med, r, r3;
+Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
+Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
+r = 0;
+sp = 0;
+fpush ( loSt, hiSt );
+while (sp > 0) {
+AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
+fpop ( lo, hi );
+if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+fallbackSimpleSort ( fmap, eclass, lo, hi );
+continue;
+}
+/* Random partitioning.  Median of 3 sometimes fails to
+avoid bad cases.  Median of 9 seems to help but
+looks rather expensive.  This too seems to work but
+is cheaper.  Guidance for the magic constants
+7621 and 32768 is taken from Sedgewick's algorithms
+book, chapter 35.
+*/
+r = ((r * 7621) + 1) % 32768;
+r3 = r % 3;
+if (r3 == 0) med = eclass[fmap[lo]]; else
+if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
+med = eclass[fmap[hi]];
+unLo = ltLo = lo;
+unHi = gtHi = hi;
+while (1) {
+while (1) {
+if (unLo > unHi) break;
+n = (Int32)eclass[fmap[unLo]] - (Int32)med;
+if (n == 0) {
+fswap(fmap[unLo], fmap[ltLo]);
+ltLo++; unLo++;
+continue;
+};
+if (n > 0) break;
+unLo++;
+}
+while (1) {
+if (unLo > unHi) break;
+n = (Int32)eclass[fmap[unHi]] - (Int32)med;
+if (n == 0) {
+fswap(fmap[unHi], fmap[gtHi]);
+gtHi--; unHi--;
+continue;
+};
+if (n < 0) break;
+unHi--;
+}
+if (unLo > unHi) break;
+fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
+}
+AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
+if (gtHi < ltLo) continue;
+n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
+m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
+n = lo + unLo - ltLo - 1;
+m = hi - (gtHi - unHi) + 1;
+if (n - lo > hi - m) {
+fpush ( lo, n );
+fpush ( m, hi );
+} else {
+fpush ( m, hi );
+fpush ( lo, n );
+}
+}
+}
+#undef fmin
+#undef fpush
+#undef fpop
+#undef fswap
+#undef fvswap
+#undef FALLBACK_QSORT_SMALL_THRESH
+#undef FALLBACK_QSORT_STACK_SIZE
+/*---------------------------------------------*/
+/* Pre:
+nblock > 0
+eclass exists for [0 .. nblock-1]
+((UChar*)eclass) [0 .. nblock-1] holds block
+ptr exists for [0 .. nblock-1]
+Post:
+((UChar*)eclass) [0 .. nblock-1] holds block
+All other areas of eclass destroyed
+fmap [0 .. nblock-1] holds sorted order
+bhtab [ 0 .. 2+(nblock/32) ] destroyed
+*/
+#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
+#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
+#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
+#define      WORD_BH(zz)  bhtab[(zz) >> 5]
+#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
+static
+void fallbackSort ( UInt32* fmap,
+UInt32* eclass,
+UInt32* bhtab,
+Int32   nblock,
+Int32   verb )
+{
+Int32 ftab[257];
+Int32 ftabCopy[256];
+Int32 H, i, j, k, l, r, cc, cc1;
+Int32 nNotDone;
+Int32 nBhtab;
+UChar* eclass8 = (UChar*)eclass;
+/*--
+Initial 1-char radix sort to generate
+initial fmap and initial BH bits.
+--*/
+if (verb >= 4)
+VPrintf0 ( "        bucket sorting ...\n" );
+for (i = 0; i < 257;    i++) ftab[i] = 0;
+for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
+for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
+for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
+for (i = 0; i < nblock; i++) {
+j = eclass8[i];
+k = ftab[j] - 1;
+ftab[j] = k;
+fmap[k] = i;
+}
+nBhtab = 2 + (nblock / 32);
+for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
+for (i = 0; i < 256; i++) SET_BH(ftab[i]);
+/*--
+Inductively refine the buckets.  Kind-of an
+"exponential radix sort" (!), inspired by the
+Manber-Myers suffix array construction algorithm.
+--*/
+/*-- set sentinel bits for block-end detection --*/
+for (i = 0; i < 32; i++) {
+SET_BH(nblock + 2*i);
+CLEAR_BH(nblock + 2*i + 1);
+}
+/*-- the log(N) loop --*/
+H = 1;
+while (1) {
+if (verb >= 4)
+VPrintf1 ( "        depth %6d has ", H );
+j = 0;
+for (i = 0; i < nblock; i++) {
+if (ISSET_BH(i)) j = i;
+k = fmap[i] - H; if (k < 0) k += nblock;
+eclass[k] = j;
+}
+nNotDone = 0;
+r = -1;
+while (1) {
+	 /*-- find the next non-singleton bucket --*/
+k = r + 1;
+while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+if (ISSET_BH(k)) {
+while (WORD_BH(k) == 0xffffffff) k += 32;
+while (ISSET_BH(k)) k++;
+}
+l = k - 1;
+if (l >= nblock) break;
+while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+if (!ISSET_BH(k)) {
+while (WORD_BH(k) == 0x00000000) k += 32;
+while (!ISSET_BH(k)) k++;
+}
+r = k - 1;
+if (r >= nblock) break;
+/*-- now [l, r] bracket current bucket --*/
+if (r > l) {
+nNotDone += (r - l + 1);
+fallbackQSort3 ( fmap, eclass, l, r );
+/*-- scan bucket and generate header bits-- */
+cc = -1;
+for (i = l; i <= r; i++) {
+cc1 = eclass[fmap[i]];
+if (cc != cc1) { SET_BH(i); cc = cc1; };
+}
+}
+}
+if (verb >= 4)
+VPrintf1 ( "%6d unresolved strings\n", nNotDone );
+H *= 2;
+if (H > nblock || nNotDone == 0) break;
+}
+/*--
+Reconstruct the original block in
+eclass8 [0 .. nblock-1], since the
+previous phase destroyed it.
+--*/
+if (verb >= 4)
+VPrintf0 ( "        reconstructing block ...\n" );
+j = 0;
+for (i = 0; i < nblock; i++) {
+while (ftabCopy[j] == 0) j++;
+ftabCopy[j]--;
+eclass8[fmap[i]] = (UChar)j;
+}
+AssertH ( j < 256, 1005 );
+}
+#undef       SET_BH
+#undef     CLEAR_BH
+#undef     ISSET_BH
+#undef      WORD_BH
+#undef UNALIGNED_BH
+/*---------------------------------------------*/
+/*--- The main, O(N^2 log(N)) sorting       ---*/
+/*--- algorithm.  Faster for "normal"       ---*/
+/*--- non-repetitive blocks.                ---*/
+/*---------------------------------------------*/
+/*---------------------------------------------*/
+static
+__inline__
+Bool mainGtU ( UInt32  i1,
+UInt32  i2,
+UChar*  block,
+UInt16* quadrant,
+UInt32  nblock,
+Int32*  budget )
+{
+Int32  k;
+UChar  c1, c2;
+UInt16 s1, s2;
+AssertD ( i1 != i2, "mainGtU" );
+/* 1 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 2 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 3 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 4 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 5 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 6 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 7 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 8 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 9 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 10 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 11 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+/* 12 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+i1++; i2++;
+k = nblock + 8;
+do {
+/* 1 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+s1 = quadrant[i1]; s2 = quadrant[i2];
+if (s1 != s2) return (s1 > s2);
+i1++; i2++;
+/* 2 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+s1 = quadrant[i1]; s2 = quadrant[i2];
+if (s1 != s2) return (s1 > s2);
+i1++; i2++;
+/* 3 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+s1 = quadrant[i1]; s2 = quadrant[i2];
+if (s1 != s2) return (s1 > s2);
+i1++; i2++;
+/* 4 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+s1 = quadrant[i1]; s2 = quadrant[i2];
+if (s1 != s2) return (s1 > s2);
+i1++; i2++;
+/* 5 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+s1 = quadrant[i1]; s2 = quadrant[i2];
+if (s1 != s2) return (s1 > s2);
+i1++; i2++;
+/* 6 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+s1 = quadrant[i1]; s2 = quadrant[i2];
+if (s1 != s2) return (s1 > s2);
+i1++; i2++;
+/* 7 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+s1 = quadrant[i1]; s2 = quadrant[i2];
+if (s1 != s2) return (s1 > s2);
+i1++; i2++;
+/* 8 */
+c1 = block[i1]; c2 = block[i2];
+if (c1 != c2) return (c1 > c2);
+s1 = quadrant[i1]; s2 = quadrant[i2];
+if (s1 != s2) return (s1 > s2);
+i1++; i2++;
+if (i1 >= nblock) i1 -= nblock;
+if (i2 >= nblock) i2 -= nblock;
+k -= 8;
+(*budget)--;
+}
+while (k >= 0);
+return False;
+}
+/*---------------------------------------------*/
+/*--
+Knuth's increments seem to work better
+than Incerpi-Sedgewick here.  Possibly
+because the number of elems to sort is
+usually small, typically <= 20.
+--*/
+static
+Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+9841, 29524, 88573, 265720,
+797161, 2391484 };
+static
+void mainSimpleSort ( UInt32* ptr,
+UChar*  block,
+UInt16* quadrant,
+Int32   nblock,
+Int32   lo,
+Int32   hi,
+Int32   d,
+Int32*  budget )
+{
+Int32 i, j, h, bigN, hp;
+UInt32 v;
+bigN = hi - lo + 1;
+if (bigN < 2) return;
+hp = 0;
+while (incs[hp] < bigN) hp++;
+hp--;
+for (; hp >= 0; hp--) {
+h = incs[hp];
+i = lo + h;
+while (True) {
+/*-- copy 1 --*/
+if (i > hi) break;
+v = ptr[i];
+j = i;
+while ( mainGtU (
+ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+) ) {
+ptr[j] = ptr[j-h];
+j = j - h;
+if (j <= (lo + h - 1)) break;
+}
+ptr[j] = v;
+i++;
+/*-- copy 2 --*/
+if (i > hi) break;
+v = ptr[i];
+j = i;
+while ( mainGtU (
+ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+) ) {
+ptr[j] = ptr[j-h];
+j = j - h;
+if (j <= (lo + h - 1)) break;
+}
+ptr[j] = v;
+i++;
+/*-- copy 3 --*/
+if (i > hi) break;
+v = ptr[i];
+j = i;
+while ( mainGtU (
+ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+) ) {
+ptr[j] = ptr[j-h];
+j = j - h;
+if (j <= (lo + h - 1)) break;
+}
+ptr[j] = v;
+i++;
+if (*budget < 0) return;
+}
+}
+}
+/*---------------------------------------------*/
+/*--
+The following is an implementation of
+an elegant 3-way quicksort for strings,
+described in a paper "Fast Algorithms for
+Sorting and Searching Strings", by Robert
+Sedgewick and Jon L. Bentley.
+--*/
+#define mswap(zz1, zz2) \
+{ Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+#define mvswap(zzp1, zzp2, zzn)       \
+{                                     \
+Int32 yyp1 = (zzp1);               \
+Int32 yyp2 = (zzp2);               \
+Int32 yyn  = (zzn);                \
+while (yyn > 0) {                  \
+mswap(ptr[yyp1], ptr[yyp2]);    \
+yyp1++; yyp2++; yyn--;          \
+}                                  \
+}
+static
+__inline__
+UChar mmed3 ( UChar a, UChar b, UChar c )
+{
+UChar t;
+if (a > b) { t = a; a = b; b = t; };
+if (b > c) {
+b = c;
+if (a > b) b = a;
+}
+return b;
+}
+#define mmin(a,b) ((a) < (b)) ? (a) : (b)
+#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
+stackHi[sp] = hz; \
+stackD [sp] = dz; \
+sp++; }
+#define mpop(lz,hz,dz) { sp--;             \
+lz = stackLo[sp]; \
+hz = stackHi[sp]; \
+dz = stackD [sp]; }
+#define mnextsize(az) (nextHi[az]-nextLo[az])
+#define mnextswap(az,bz)                                        \
+{ Int32 tz;                                                  \
+tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
+tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
+tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
+#define MAIN_QSORT_SMALL_THRESH 20
+#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
+#define MAIN_QSORT_STACK_SIZE 100
+static
+void mainQSort3 ( UInt32* ptr,
+UChar*  block,
+UInt16* quadrant,
+Int32   nblock,
+Int32   loSt,
+Int32   hiSt,
+Int32   dSt,
+Int32*  budget )
+{
+Int32 unLo, unHi, ltLo, gtHi, n, m, med;
+Int32 sp, lo, hi, d;
+Int32 stackLo[MAIN_QSORT_STACK_SIZE];
+Int32 stackHi[MAIN_QSORT_STACK_SIZE];
+Int32 stackD [MAIN_QSORT_STACK_SIZE];
+Int32 nextLo[3];
+Int32 nextHi[3];
+Int32 nextD [3];
+sp = 0;
+mpush ( loSt, hiSt, dSt );
+while (sp > 0) {
+AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
+mpop ( lo, hi, d );
+if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
+d > MAIN_QSORT_DEPTH_THRESH) {
+mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
+if (*budget < 0) return;
+continue;
+}
+med = (Int32)
+mmed3 ( block[ptr[ lo         ]+d],
+block[ptr[ hi         ]+d],
+block[ptr[ (lo+hi)>>1 ]+d] );
+unLo = ltLo = lo;
+unHi = gtHi = hi;
+while (True) {
+while (True) {
+if (unLo > unHi) break;
+n = ((Int32)block[ptr[unLo]+d]) - med;
+if (n == 0) {
+mswap(ptr[unLo], ptr[ltLo]);
+ltLo++; unLo++; continue;
+};
+if (n >  0) break;
+unLo++;
+}
+while (True) {
+if (unLo > unHi) break;
+n = ((Int32)block[ptr[unHi]+d]) - med;
+if (n == 0) {
+mswap(ptr[unHi], ptr[gtHi]);
+gtHi--; unHi--; continue;
+};
+if (n <  0) break;
+unHi--;
+}
+if (unLo > unHi) break;
+mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
+}
+AssertD ( unHi == unLo-1, "mainQSort3(2)" );
+if (gtHi < ltLo) {
+mpush(lo, hi, d+1 );
+continue;
+}
+n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
+m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
+n = lo + unLo - ltLo - 1;
+m = hi - (gtHi - unHi) + 1;
+nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
+nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
+nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
+if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
+if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
+AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
+mpush (nextLo[0], nextHi[0], nextD[0]);
+mpush (nextLo[1], nextHi[1], nextD[1]);
+mpush (nextLo[2], nextHi[2], nextD[2]);
+}
+}
+#undef mswap
+#undef mvswap
+#undef mpush
+#undef mpop
+#undef mmin
+#undef mnextsize
+#undef mnextswap
+#undef MAIN_QSORT_SMALL_THRESH
+#undef MAIN_QSORT_DEPTH_THRESH
+#undef MAIN_QSORT_STACK_SIZE
+/*---------------------------------------------*/
+/* Pre:
+nblock > N_OVERSHOOT
+block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
+((UChar*)block32) [0 .. nblock-1] holds block
+ptr exists for [0 .. nblock-1]
+Post:
+((UChar*)block32) [0 .. nblock-1] holds block
+All other areas of block32 destroyed
+ftab [0 .. 65536 ] destroyed
+ptr [0 .. nblock-1] holds sorted order
+if (*budget < 0), sorting was abandoned
+*/
+#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
+#define SETMASK (1 << 21)
+#define CLEARMASK (~(SETMASK))
+static
+void mainSort ( UInt32* ptr,
+UChar*  block,
+UInt16* quadrant,
+UInt32* ftab,
+Int32   nblock,
+Int32   verb,
+Int32*  budget )
+{
+Int32  i, j, k, ss, sb;
+Int32  runningOrder[256];
+Bool   bigDone[256];
+Int32  copyStart[256];
+Int32  copyEnd  [256];
+UChar  c1;
+Int32  numQSorted;
+UInt16 s;
+if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
+/*-- set up the 2-byte frequency table --*/
+for (i = 65536; i >= 0; i--) ftab[i] = 0;
+j = block[0] << 8;
+i = nblock-1;
+for (; i >= 3; i -= 4) {
+quadrant[i] = 0;
+j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ftab[j]++;
+quadrant[i-1] = 0;
+j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+ftab[j]++;
+quadrant[i-2] = 0;
+j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+ftab[j]++;
+quadrant[i-3] = 0;
+j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+ftab[j]++;
+}
+for (; i >= 0; i--) {
+quadrant[i] = 0;
+j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ftab[j]++;
+}
+/*-- (emphasises close relationship of block & quadrant) --*/
+for (i = 0; i < BZ_N_OVERSHOOT; i++) {
+block   [nblock+i] = block[i];
+quadrant[nblock+i] = 0;
+}
+if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
+/*-- Complete the initial radix sort --*/
+for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
+s = block[0] << 8;
+i = nblock-1;
+for (; i >= 3; i -= 4) {
+s = (s >> 8) | (block[i] << 8);
+j = ftab[s] -1;
+ftab[s] = j;
+ptr[j] = i;
+s = (s >> 8) | (block[i-1] << 8);
+j = ftab[s] -1;
+ftab[s] = j;
+ptr[j] = i-1;
+s = (s >> 8) | (block[i-2] << 8);
+j = ftab[s] -1;
+ftab[s] = j;
+ptr[j] = i-2;
+s = (s >> 8) | (block[i-3] << 8);
+j = ftab[s] -1;
+ftab[s] = j;
+ptr[j] = i-3;
+}
+for (; i >= 0; i--) {
+s = (s >> 8) | (block[i] << 8);
+j = ftab[s] -1;
+ftab[s] = j;
+ptr[j] = i;
+}
+/*--
+Now ftab contains the first loc of every small bucket.
+Calculate the running order, from smallest to largest
+big bucket.
+--*/
+for (i = 0; i <= 255; i++) {
+bigDone     [i] = False;
+runningOrder[i] = i;
+}
+{
+Int32 vv;
+Int32 h = 1;
+do h = 3 * h + 1; while (h <= 256);
+do {
+h = h / 3;
+for (i = h; i <= 255; i++) {
+vv = runningOrder[i];
+j = i;
+while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
+runningOrder[j] = runningOrder[j-h];
+j = j - h;
+if (j <= (h - 1)) goto zero;
+}
+zero:
+runningOrder[j] = vv;
+}
+} while (h != 1);
+}
+/*--
+The main sorting loop.
+--*/
+numQSorted = 0;
+for (i = 0; i <= 255; i++) {
+/*--
+Process big buckets, starting with the least full.
+Basically this is a 3-step process in which we call
+mainQSort3 to sort the small buckets [ss, j], but
+also make a big effort to avoid the calls if we can.
+--*/
+ss = runningOrder[i];
+/*--
+Step 1:
+Complete the big bucket [ss] by quicksorting
+any unsorted small buckets [ss, j], for j != ss.
+Hopefully previous pointer-scanning phases have already
+completed many of the small buckets [ss, j], so
+we don't have to sort them at all.
+--*/
+for (j = 0; j <= 255; j++) {
+if (j != ss) {
+sb = (ss << 8) + j;
+if ( ! (ftab[sb] & SETMASK) ) {
+Int32 lo = ftab[sb]   & CLEARMASK;
+Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
+if (hi > lo) {
+if (verb >= 4)
+VPrintf4 ( "        qsort [0x%x, 0x%x]   "
+"done %d   this %d\n",
+ss, j, numQSorted, hi - lo + 1 );
+mainQSort3 (
+ptr, block, quadrant, nblock,
+lo, hi, BZ_N_RADIX, budget
+);
+numQSorted += (hi - lo + 1);
+if (*budget < 0) return;
+}
+}
+ftab[sb] |= SETMASK;
+}
+}
+AssertH ( !bigDone[ss], 1006 );
+/*--
+Step 2:
+Now scan this big bucket [ss] so as to synthesise the
+sorted order for small buckets [t, ss] for all t,
+including, magically, the bucket [ss,ss] too.
+This will avoid doing Real Work in subsequent Step 1's.
+--*/
+{
+for (j = 0; j <= 255; j++) {
+copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
+copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+}
+for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+k = ptr[j]-1; if (k < 0) k += nblock;
+c1 = block[k];
+if (!bigDone[c1])
+ptr[ copyStart[c1]++ ] = k;
+}
+for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+k = ptr[j]-1; if (k < 0) k += nblock;
+c1 = block[k];
+if (!bigDone[c1])
+ptr[ copyEnd[c1]-- ] = k;
+}
+}
+AssertH ( (copyStart[ss]-1 == copyEnd[ss])
+||
+/* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
+Necessity for this case is demonstrated by compressing
+a sequence of approximately 48.5 million of character
+251; 1.0.0/1.0.1 will then die here. */
+(copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
+1007 )
+for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+/*--
+Step 3:
+The [ss] big bucket is now done.  Record this fact,
+and update the quadrant descriptors.  Remember to
+update quadrants in the overshoot area too, if
+necessary.  The "if (i < 255)" test merely skips
+this updating for the last bucket processed, since
+updating for the last bucket is pointless.
+The quadrant array provides a way to incrementally
+cache sort orderings, as they appear, so as to
+make subsequent comparisons in fullGtU() complete
+faster.  For repetitive blocks this makes a big
+difference (but not big enough to be able to avoid
+the fallback sorting mechanism, exponential radix sort).
+The precise meaning is: at all times:
+for 0 <= i < nblock and 0 <= j <= nblock
+if block[i] != block[j],
+then the relative values of quadrant[i] and
+quadrant[j] are meaningless.
+else {
+if quadrant[i] < quadrant[j]
+then the string starting at i lexicographically
+precedes the string starting at j
+else if quadrant[i] > quadrant[j]
+then the string starting at j lexicographically
+precedes the string starting at i
+else
+the relative ordering of the strings starting
+at i and j has not yet been determined.
+}
+--*/
+bigDone[ss] = True;
+if (i < 255) {
+Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
+Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
+Int32 shifts   = 0;
+while ((bbSize >> shifts) > 65534) shifts++;
+for (j = bbSize-1; j >= 0; j--) {
+Int32 a2update     = ptr[bbStart + j];
+UInt16 qVal        = (UInt16)(j >> shifts);
+quadrant[a2update] = qVal;
+if (a2update < BZ_N_OVERSHOOT)
+quadrant[a2update + nblock] = qVal;
+}
+AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
+}
+}
+if (verb >= 4)
+VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
+nblock, numQSorted, nblock - numQSorted );
+}
+#undef BIGFREQ
+#undef SETMASK
+#undef CLEARMASK
+/*---------------------------------------------*/
+/* Pre:
+nblock > 0
+arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
+((UChar*)arr2)  [0 .. nblock-1] holds block
+arr1 exists for [0 .. nblock-1]
+Post:
+((UChar*)arr2) [0 .. nblock-1] holds block
+All other areas of block destroyed
+ftab [ 0 .. 65536 ] destroyed
+arr1 [0 .. nblock-1] holds sorted order
+*/
+void BZ2_blockSort ( EState* s )
+{
+UInt32* ptr    = s->ptr;
+UChar*  block  = s->block;
+UInt32* ftab   = s->ftab;
+Int32   nblock = s->nblock;
+Int32   verb   = s->verbosity;
+Int32   wfact  = s->workFactor;
+UInt16* quadrant;
+Int32   budget;
+Int32   budgetInit;
+Int32   i;
+if (nblock < 10000) {
+fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+} else {
+/* Calculate the location for quadrant, remembering to get
+the alignment right.  Assumes that &(block[0]) is at least
+2-byte aligned -- this should be ok since block is really
+the first section of arr2.
+*/
+i = nblock+BZ_N_OVERSHOOT;
+if (i & 1) i++;
+quadrant = (UInt16*)(&(block[i]));
+/* (wfact-1) / 3 puts the default-factor-30
+transition point at very roughly the same place as
+with v0.1 and v0.9.0.
+Not that it particularly matters any more, since the
+resulting compressed stream is now the same regardless
+of whether or not we use the main sort or fallback sort.
+*/
+if (wfact < 1  ) wfact = 1;
+if (wfact > 100) wfact = 100;
+budgetInit = nblock * ((wfact-1) / 3);
+budget = budgetInit;
+mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
+if (verb >= 3)
+VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
+budgetInit - budget,
+nblock,
+(float)(budgetInit - budget) /
+(float)(nblock==0 ? 1 : nblock) );
+if (budget < 0) {
+if (verb >= 2)
+VPrintf0 ( "    too repetitive; using fallback"
+" sorting algorithm\n" );
+fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+}
+}
+s->origPtr = -1;
+for (i = 0; i < s->nblock; i++)
+if (ptr[i] == 0)
+{ s->origPtr = i; break; };
+AssertH( s->origPtr != -1, 1003 );
+}
+/*-------------------------------------------------------------*/
+/*--- end                                       blocksort.c ---*/
+/*-------------------------------------------------------------*/

The Tor Browser / file comparison

comparison: modules/libbz2/src/blocksort.c

modules/libbz2/src/blocksort.c