modules/libbz2/src/blocksort.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/modules/libbz2/src/blocksort.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1094 @@
     1.4 +
     1.5 +/*-------------------------------------------------------------*/
     1.6 +/*--- Block sorting machinery                               ---*/
     1.7 +/*---                                           blocksort.c ---*/
     1.8 +/*-------------------------------------------------------------*/
     1.9 +
    1.10 +/* ------------------------------------------------------------------
    1.11 +   This file is part of bzip2/libbzip2, a program and library for
    1.12 +   lossless, block-sorting data compression.
    1.13 +
    1.14 +   bzip2/libbzip2 version 1.0.4 of 20 December 2006
    1.15 +   Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
    1.16 +
    1.17 +   Please read the WARNING, DISCLAIMER and PATENTS sections in the 
    1.18 +   README file.
    1.19 +
    1.20 +   This program is released under the terms of the license contained
    1.21 +   in the file LICENSE.
    1.22 +   ------------------------------------------------------------------ */
    1.23 +
    1.24 +
    1.25 +#include "bzlib_private.h"
    1.26 +
    1.27 +/*---------------------------------------------*/
    1.28 +/*--- Fallback O(N log(N)^2) sorting        ---*/
    1.29 +/*--- algorithm, for repetitive blocks      ---*/
    1.30 +/*---------------------------------------------*/
    1.31 +
    1.32 +/*---------------------------------------------*/
    1.33 +static 
    1.34 +__inline__
    1.35 +void fallbackSimpleSort ( UInt32* fmap, 
    1.36 +                          UInt32* eclass, 
    1.37 +                          Int32   lo, 
    1.38 +                          Int32   hi )
    1.39 +{
    1.40 +   Int32 i, j, tmp;
    1.41 +   UInt32 ec_tmp;
    1.42 +
    1.43 +   if (lo == hi) return;
    1.44 +
    1.45 +   if (hi - lo > 3) {
    1.46 +      for ( i = hi-4; i >= lo; i-- ) {
    1.47 +         tmp = fmap[i];
    1.48 +         ec_tmp = eclass[tmp];
    1.49 +         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
    1.50 +            fmap[j-4] = fmap[j];
    1.51 +         fmap[j-4] = tmp;
    1.52 +      }
    1.53 +   }
    1.54 +
    1.55 +   for ( i = hi-1; i >= lo; i-- ) {
    1.56 +      tmp = fmap[i];
    1.57 +      ec_tmp = eclass[tmp];
    1.58 +      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
    1.59 +         fmap[j-1] = fmap[j];
    1.60 +      fmap[j-1] = tmp;
    1.61 +   }
    1.62 +}
    1.63 +
    1.64 +
    1.65 +/*---------------------------------------------*/
    1.66 +#define fswap(zz1, zz2) \
    1.67 +   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
    1.68 +
    1.69 +#define fvswap(zzp1, zzp2, zzn)       \
    1.70 +{                                     \
    1.71 +   Int32 yyp1 = (zzp1);               \
    1.72 +   Int32 yyp2 = (zzp2);               \
    1.73 +   Int32 yyn  = (zzn);                \
    1.74 +   while (yyn > 0) {                  \
    1.75 +      fswap(fmap[yyp1], fmap[yyp2]);  \
    1.76 +      yyp1++; yyp2++; yyn--;          \
    1.77 +   }                                  \
    1.78 +}
    1.79 +
    1.80 +
    1.81 +#define fmin(a,b) ((a) < (b)) ? (a) : (b)
    1.82 +
    1.83 +#define fpush(lz,hz) { stackLo[sp] = lz; \
    1.84 +                       stackHi[sp] = hz; \
    1.85 +                       sp++; }
    1.86 +
    1.87 +#define fpop(lz,hz) { sp--;              \
    1.88 +                      lz = stackLo[sp];  \
    1.89 +                      hz = stackHi[sp]; }
    1.90 +
    1.91 +#define FALLBACK_QSORT_SMALL_THRESH 10
    1.92 +#define FALLBACK_QSORT_STACK_SIZE   100
    1.93 +
    1.94 +
    1.95 +static
    1.96 +void fallbackQSort3 ( UInt32* fmap, 
    1.97 +                      UInt32* eclass,
    1.98 +                      Int32   loSt, 
    1.99 +                      Int32   hiSt )
   1.100 +{
   1.101 +   Int32 unLo, unHi, ltLo, gtHi, n, m;
   1.102 +   Int32 sp, lo, hi;
   1.103 +   UInt32 med, r, r3;
   1.104 +   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
   1.105 +   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
   1.106 +
   1.107 +   r = 0;
   1.108 +
   1.109 +   sp = 0;
   1.110 +   fpush ( loSt, hiSt );
   1.111 +
   1.112 +   while (sp > 0) {
   1.113 +
   1.114 +      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
   1.115 +
   1.116 +      fpop ( lo, hi );
   1.117 +      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
   1.118 +         fallbackSimpleSort ( fmap, eclass, lo, hi );
   1.119 +         continue;
   1.120 +      }
   1.121 +
   1.122 +      /* Random partitioning.  Median of 3 sometimes fails to
   1.123 +         avoid bad cases.  Median of 9 seems to help but 
   1.124 +         looks rather expensive.  This too seems to work but
   1.125 +         is cheaper.  Guidance for the magic constants 
   1.126 +         7621 and 32768 is taken from Sedgewick's algorithms
   1.127 +         book, chapter 35.
   1.128 +      */
   1.129 +      r = ((r * 7621) + 1) % 32768;
   1.130 +      r3 = r % 3;
   1.131 +      if (r3 == 0) med = eclass[fmap[lo]]; else
   1.132 +      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
   1.133 +                   med = eclass[fmap[hi]];
   1.134 +
   1.135 +      unLo = ltLo = lo;
   1.136 +      unHi = gtHi = hi;
   1.137 +
   1.138 +      while (1) {
   1.139 +         while (1) {
   1.140 +            if (unLo > unHi) break;
   1.141 +            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
   1.142 +            if (n == 0) { 
   1.143 +               fswap(fmap[unLo], fmap[ltLo]); 
   1.144 +               ltLo++; unLo++; 
   1.145 +               continue; 
   1.146 +            };
   1.147 +            if (n > 0) break;
   1.148 +            unLo++;
   1.149 +         }
   1.150 +         while (1) {
   1.151 +            if (unLo > unHi) break;
   1.152 +            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
   1.153 +            if (n == 0) { 
   1.154 +               fswap(fmap[unHi], fmap[gtHi]); 
   1.155 +               gtHi--; unHi--; 
   1.156 +               continue; 
   1.157 +            };
   1.158 +            if (n < 0) break;
   1.159 +            unHi--;
   1.160 +         }
   1.161 +         if (unLo > unHi) break;
   1.162 +         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
   1.163 +      }
   1.164 +
   1.165 +      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
   1.166 +
   1.167 +      if (gtHi < ltLo) continue;
   1.168 +
   1.169 +      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
   1.170 +      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
   1.171 +
   1.172 +      n = lo + unLo - ltLo - 1;
   1.173 +      m = hi - (gtHi - unHi) + 1;
   1.174 +
   1.175 +      if (n - lo > hi - m) {
   1.176 +         fpush ( lo, n );
   1.177 +         fpush ( m, hi );
   1.178 +      } else {
   1.179 +         fpush ( m, hi );
   1.180 +         fpush ( lo, n );
   1.181 +      }
   1.182 +   }
   1.183 +}
   1.184 +
   1.185 +#undef fmin
   1.186 +#undef fpush
   1.187 +#undef fpop
   1.188 +#undef fswap
   1.189 +#undef fvswap
   1.190 +#undef FALLBACK_QSORT_SMALL_THRESH
   1.191 +#undef FALLBACK_QSORT_STACK_SIZE
   1.192 +
   1.193 +
   1.194 +/*---------------------------------------------*/
   1.195 +/* Pre:
   1.196 +      nblock > 0
   1.197 +      eclass exists for [0 .. nblock-1]
   1.198 +      ((UChar*)eclass) [0 .. nblock-1] holds block
   1.199 +      ptr exists for [0 .. nblock-1]
   1.200 +
   1.201 +   Post:
   1.202 +      ((UChar*)eclass) [0 .. nblock-1] holds block
   1.203 +      All other areas of eclass destroyed
   1.204 +      fmap [0 .. nblock-1] holds sorted order
   1.205 +      bhtab [ 0 .. 2+(nblock/32) ] destroyed
   1.206 +*/
   1.207 +
   1.208 +#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
   1.209 +#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
   1.210 +#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
   1.211 +#define      WORD_BH(zz)  bhtab[(zz) >> 5]
   1.212 +#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
   1.213 +
   1.214 +static
   1.215 +void fallbackSort ( UInt32* fmap, 
   1.216 +                    UInt32* eclass, 
   1.217 +                    UInt32* bhtab,
   1.218 +                    Int32   nblock,
   1.219 +                    Int32   verb )
   1.220 +{
   1.221 +   Int32 ftab[257];
   1.222 +   Int32 ftabCopy[256];
   1.223 +   Int32 H, i, j, k, l, r, cc, cc1;
   1.224 +   Int32 nNotDone;
   1.225 +   Int32 nBhtab;
   1.226 +   UChar* eclass8 = (UChar*)eclass;
   1.227 +
   1.228 +   /*--
   1.229 +      Initial 1-char radix sort to generate
   1.230 +      initial fmap and initial BH bits.
   1.231 +   --*/
   1.232 +   if (verb >= 4)
   1.233 +      VPrintf0 ( "        bucket sorting ...\n" );
   1.234 +   for (i = 0; i < 257;    i++) ftab[i] = 0;
   1.235 +   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
   1.236 +   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
   1.237 +   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
   1.238 +
   1.239 +   for (i = 0; i < nblock; i++) {
   1.240 +      j = eclass8[i];
   1.241 +      k = ftab[j] - 1;
   1.242 +      ftab[j] = k;
   1.243 +      fmap[k] = i;
   1.244 +   }
   1.245 +
   1.246 +   nBhtab = 2 + (nblock / 32);
   1.247 +   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
   1.248 +   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
   1.249 +
   1.250 +   /*--
   1.251 +      Inductively refine the buckets.  Kind-of an
   1.252 +      "exponential radix sort" (!), inspired by the
   1.253 +      Manber-Myers suffix array construction algorithm.
   1.254 +   --*/
   1.255 +
   1.256 +   /*-- set sentinel bits for block-end detection --*/
   1.257 +   for (i = 0; i < 32; i++) { 
   1.258 +      SET_BH(nblock + 2*i);
   1.259 +      CLEAR_BH(nblock + 2*i + 1);
   1.260 +   }
   1.261 +
   1.262 +   /*-- the log(N) loop --*/
   1.263 +   H = 1;
   1.264 +   while (1) {
   1.265 +
   1.266 +      if (verb >= 4) 
   1.267 +         VPrintf1 ( "        depth %6d has ", H );
   1.268 +
   1.269 +      j = 0;
   1.270 +      for (i = 0; i < nblock; i++) {
   1.271 +         if (ISSET_BH(i)) j = i;
   1.272 +         k = fmap[i] - H; if (k < 0) k += nblock;
   1.273 +         eclass[k] = j;
   1.274 +      }
   1.275 +
   1.276 +      nNotDone = 0;
   1.277 +      r = -1;
   1.278 +      while (1) {
   1.279 +
   1.280 +	 /*-- find the next non-singleton bucket --*/
   1.281 +         k = r + 1;
   1.282 +         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
   1.283 +         if (ISSET_BH(k)) {
   1.284 +            while (WORD_BH(k) == 0xffffffff) k += 32;
   1.285 +            while (ISSET_BH(k)) k++;
   1.286 +         }
   1.287 +         l = k - 1;
   1.288 +         if (l >= nblock) break;
   1.289 +         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
   1.290 +         if (!ISSET_BH(k)) {
   1.291 +            while (WORD_BH(k) == 0x00000000) k += 32;
   1.292 +            while (!ISSET_BH(k)) k++;
   1.293 +         }
   1.294 +         r = k - 1;
   1.295 +         if (r >= nblock) break;
   1.296 +
   1.297 +         /*-- now [l, r] bracket current bucket --*/
   1.298 +         if (r > l) {
   1.299 +            nNotDone += (r - l + 1);
   1.300 +            fallbackQSort3 ( fmap, eclass, l, r );
   1.301 +
   1.302 +            /*-- scan bucket and generate header bits-- */
   1.303 +            cc = -1;
   1.304 +            for (i = l; i <= r; i++) {
   1.305 +               cc1 = eclass[fmap[i]];
   1.306 +               if (cc != cc1) { SET_BH(i); cc = cc1; };
   1.307 +            }
   1.308 +         }
   1.309 +      }
   1.310 +
   1.311 +      if (verb >= 4) 
   1.312 +         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
   1.313 +
   1.314 +      H *= 2;
   1.315 +      if (H > nblock || nNotDone == 0) break;
   1.316 +   }
   1.317 +
   1.318 +   /*-- 
   1.319 +      Reconstruct the original block in
   1.320 +      eclass8 [0 .. nblock-1], since the
   1.321 +      previous phase destroyed it.
   1.322 +   --*/
   1.323 +   if (verb >= 4)
   1.324 +      VPrintf0 ( "        reconstructing block ...\n" );
   1.325 +   j = 0;
   1.326 +   for (i = 0; i < nblock; i++) {
   1.327 +      while (ftabCopy[j] == 0) j++;
   1.328 +      ftabCopy[j]--;
   1.329 +      eclass8[fmap[i]] = (UChar)j;
   1.330 +   }
   1.331 +   AssertH ( j < 256, 1005 );
   1.332 +}
   1.333 +
   1.334 +#undef       SET_BH
   1.335 +#undef     CLEAR_BH
   1.336 +#undef     ISSET_BH
   1.337 +#undef      WORD_BH
   1.338 +#undef UNALIGNED_BH
   1.339 +
   1.340 +
   1.341 +/*---------------------------------------------*/
   1.342 +/*--- The main, O(N^2 log(N)) sorting       ---*/
   1.343 +/*--- algorithm.  Faster for "normal"       ---*/
   1.344 +/*--- non-repetitive blocks.                ---*/
   1.345 +/*---------------------------------------------*/
   1.346 +
   1.347 +/*---------------------------------------------*/
   1.348 +static
   1.349 +__inline__
   1.350 +Bool mainGtU ( UInt32  i1, 
   1.351 +               UInt32  i2,
   1.352 +               UChar*  block, 
   1.353 +               UInt16* quadrant,
   1.354 +               UInt32  nblock,
   1.355 +               Int32*  budget )
   1.356 +{
   1.357 +   Int32  k;
   1.358 +   UChar  c1, c2;
   1.359 +   UInt16 s1, s2;
   1.360 +
   1.361 +   AssertD ( i1 != i2, "mainGtU" );
   1.362 +   /* 1 */
   1.363 +   c1 = block[i1]; c2 = block[i2];
   1.364 +   if (c1 != c2) return (c1 > c2);
   1.365 +   i1++; i2++;
   1.366 +   /* 2 */
   1.367 +   c1 = block[i1]; c2 = block[i2];
   1.368 +   if (c1 != c2) return (c1 > c2);
   1.369 +   i1++; i2++;
   1.370 +   /* 3 */
   1.371 +   c1 = block[i1]; c2 = block[i2];
   1.372 +   if (c1 != c2) return (c1 > c2);
   1.373 +   i1++; i2++;
   1.374 +   /* 4 */
   1.375 +   c1 = block[i1]; c2 = block[i2];
   1.376 +   if (c1 != c2) return (c1 > c2);
   1.377 +   i1++; i2++;
   1.378 +   /* 5 */
   1.379 +   c1 = block[i1]; c2 = block[i2];
   1.380 +   if (c1 != c2) return (c1 > c2);
   1.381 +   i1++; i2++;
   1.382 +   /* 6 */
   1.383 +   c1 = block[i1]; c2 = block[i2];
   1.384 +   if (c1 != c2) return (c1 > c2);
   1.385 +   i1++; i2++;
   1.386 +   /* 7 */
   1.387 +   c1 = block[i1]; c2 = block[i2];
   1.388 +   if (c1 != c2) return (c1 > c2);
   1.389 +   i1++; i2++;
   1.390 +   /* 8 */
   1.391 +   c1 = block[i1]; c2 = block[i2];
   1.392 +   if (c1 != c2) return (c1 > c2);
   1.393 +   i1++; i2++;
   1.394 +   /* 9 */
   1.395 +   c1 = block[i1]; c2 = block[i2];
   1.396 +   if (c1 != c2) return (c1 > c2);
   1.397 +   i1++; i2++;
   1.398 +   /* 10 */
   1.399 +   c1 = block[i1]; c2 = block[i2];
   1.400 +   if (c1 != c2) return (c1 > c2);
   1.401 +   i1++; i2++;
   1.402 +   /* 11 */
   1.403 +   c1 = block[i1]; c2 = block[i2];
   1.404 +   if (c1 != c2) return (c1 > c2);
   1.405 +   i1++; i2++;
   1.406 +   /* 12 */
   1.407 +   c1 = block[i1]; c2 = block[i2];
   1.408 +   if (c1 != c2) return (c1 > c2);
   1.409 +   i1++; i2++;
   1.410 +
   1.411 +   k = nblock + 8;
   1.412 +
   1.413 +   do {
   1.414 +      /* 1 */
   1.415 +      c1 = block[i1]; c2 = block[i2];
   1.416 +      if (c1 != c2) return (c1 > c2);
   1.417 +      s1 = quadrant[i1]; s2 = quadrant[i2];
   1.418 +      if (s1 != s2) return (s1 > s2);
   1.419 +      i1++; i2++;
   1.420 +      /* 2 */
   1.421 +      c1 = block[i1]; c2 = block[i2];
   1.422 +      if (c1 != c2) return (c1 > c2);
   1.423 +      s1 = quadrant[i1]; s2 = quadrant[i2];
   1.424 +      if (s1 != s2) return (s1 > s2);
   1.425 +      i1++; i2++;
   1.426 +      /* 3 */
   1.427 +      c1 = block[i1]; c2 = block[i2];
   1.428 +      if (c1 != c2) return (c1 > c2);
   1.429 +      s1 = quadrant[i1]; s2 = quadrant[i2];
   1.430 +      if (s1 != s2) return (s1 > s2);
   1.431 +      i1++; i2++;
   1.432 +      /* 4 */
   1.433 +      c1 = block[i1]; c2 = block[i2];
   1.434 +      if (c1 != c2) return (c1 > c2);
   1.435 +      s1 = quadrant[i1]; s2 = quadrant[i2];
   1.436 +      if (s1 != s2) return (s1 > s2);
   1.437 +      i1++; i2++;
   1.438 +      /* 5 */
   1.439 +      c1 = block[i1]; c2 = block[i2];
   1.440 +      if (c1 != c2) return (c1 > c2);
   1.441 +      s1 = quadrant[i1]; s2 = quadrant[i2];
   1.442 +      if (s1 != s2) return (s1 > s2);
   1.443 +      i1++; i2++;
   1.444 +      /* 6 */
   1.445 +      c1 = block[i1]; c2 = block[i2];
   1.446 +      if (c1 != c2) return (c1 > c2);
   1.447 +      s1 = quadrant[i1]; s2 = quadrant[i2];
   1.448 +      if (s1 != s2) return (s1 > s2);
   1.449 +      i1++; i2++;
   1.450 +      /* 7 */
   1.451 +      c1 = block[i1]; c2 = block[i2];
   1.452 +      if (c1 != c2) return (c1 > c2);
   1.453 +      s1 = quadrant[i1]; s2 = quadrant[i2];
   1.454 +      if (s1 != s2) return (s1 > s2);
   1.455 +      i1++; i2++;
   1.456 +      /* 8 */
   1.457 +      c1 = block[i1]; c2 = block[i2];
   1.458 +      if (c1 != c2) return (c1 > c2);
   1.459 +      s1 = quadrant[i1]; s2 = quadrant[i2];
   1.460 +      if (s1 != s2) return (s1 > s2);
   1.461 +      i1++; i2++;
   1.462 +
   1.463 +      if (i1 >= nblock) i1 -= nblock;
   1.464 +      if (i2 >= nblock) i2 -= nblock;
   1.465 +
   1.466 +      k -= 8;
   1.467 +      (*budget)--;
   1.468 +   }
   1.469 +      while (k >= 0);
   1.470 +
   1.471 +   return False;
   1.472 +}
   1.473 +
   1.474 +
   1.475 +/*---------------------------------------------*/
   1.476 +/*--
   1.477 +   Knuth's increments seem to work better
   1.478 +   than Incerpi-Sedgewick here.  Possibly
   1.479 +   because the number of elems to sort is
   1.480 +   usually small, typically <= 20.
   1.481 +--*/
   1.482 +static
   1.483 +Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
   1.484 +                   9841, 29524, 88573, 265720,
   1.485 +                   797161, 2391484 };
   1.486 +
   1.487 +static
   1.488 +void mainSimpleSort ( UInt32* ptr,
   1.489 +                      UChar*  block,
   1.490 +                      UInt16* quadrant,
   1.491 +                      Int32   nblock,
   1.492 +                      Int32   lo, 
   1.493 +                      Int32   hi, 
   1.494 +                      Int32   d,
   1.495 +                      Int32*  budget )
   1.496 +{
   1.497 +   Int32 i, j, h, bigN, hp;
   1.498 +   UInt32 v;
   1.499 +
   1.500 +   bigN = hi - lo + 1;
   1.501 +   if (bigN < 2) return;
   1.502 +
   1.503 +   hp = 0;
   1.504 +   while (incs[hp] < bigN) hp++;
   1.505 +   hp--;
   1.506 +
   1.507 +   for (; hp >= 0; hp--) {
   1.508 +      h = incs[hp];
   1.509 +
   1.510 +      i = lo + h;
   1.511 +      while (True) {
   1.512 +
   1.513 +         /*-- copy 1 --*/
   1.514 +         if (i > hi) break;
   1.515 +         v = ptr[i];
   1.516 +         j = i;
   1.517 +         while ( mainGtU ( 
   1.518 +                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
   1.519 +                 ) ) {
   1.520 +            ptr[j] = ptr[j-h];
   1.521 +            j = j - h;
   1.522 +            if (j <= (lo + h - 1)) break;
   1.523 +         }
   1.524 +         ptr[j] = v;
   1.525 +         i++;
   1.526 +
   1.527 +         /*-- copy 2 --*/
   1.528 +         if (i > hi) break;
   1.529 +         v = ptr[i];
   1.530 +         j = i;
   1.531 +         while ( mainGtU ( 
   1.532 +                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
   1.533 +                 ) ) {
   1.534 +            ptr[j] = ptr[j-h];
   1.535 +            j = j - h;
   1.536 +            if (j <= (lo + h - 1)) break;
   1.537 +         }
   1.538 +         ptr[j] = v;
   1.539 +         i++;
   1.540 +
   1.541 +         /*-- copy 3 --*/
   1.542 +         if (i > hi) break;
   1.543 +         v = ptr[i];
   1.544 +         j = i;
   1.545 +         while ( mainGtU ( 
   1.546 +                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
   1.547 +                 ) ) {
   1.548 +            ptr[j] = ptr[j-h];
   1.549 +            j = j - h;
   1.550 +            if (j <= (lo + h - 1)) break;
   1.551 +         }
   1.552 +         ptr[j] = v;
   1.553 +         i++;
   1.554 +
   1.555 +         if (*budget < 0) return;
   1.556 +      }
   1.557 +   }
   1.558 +}
   1.559 +
   1.560 +
   1.561 +/*---------------------------------------------*/
   1.562 +/*--
   1.563 +   The following is an implementation of
   1.564 +   an elegant 3-way quicksort for strings,
   1.565 +   described in a paper "Fast Algorithms for
   1.566 +   Sorting and Searching Strings", by Robert
   1.567 +   Sedgewick and Jon L. Bentley.
   1.568 +--*/
   1.569 +
   1.570 +#define mswap(zz1, zz2) \
   1.571 +   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
   1.572 +
   1.573 +#define mvswap(zzp1, zzp2, zzn)       \
   1.574 +{                                     \
   1.575 +   Int32 yyp1 = (zzp1);               \
   1.576 +   Int32 yyp2 = (zzp2);               \
   1.577 +   Int32 yyn  = (zzn);                \
   1.578 +   while (yyn > 0) {                  \
   1.579 +      mswap(ptr[yyp1], ptr[yyp2]);    \
   1.580 +      yyp1++; yyp2++; yyn--;          \
   1.581 +   }                                  \
   1.582 +}
   1.583 +
   1.584 +static 
   1.585 +__inline__
   1.586 +UChar mmed3 ( UChar a, UChar b, UChar c )
   1.587 +{
   1.588 +   UChar t;
   1.589 +   if (a > b) { t = a; a = b; b = t; };
   1.590 +   if (b > c) { 
   1.591 +      b = c;
   1.592 +      if (a > b) b = a;
   1.593 +   }
   1.594 +   return b;
   1.595 +}
   1.596 +
   1.597 +#define mmin(a,b) ((a) < (b)) ? (a) : (b)
   1.598 +
   1.599 +#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
   1.600 +                          stackHi[sp] = hz; \
   1.601 +                          stackD [sp] = dz; \
   1.602 +                          sp++; }
   1.603 +
   1.604 +#define mpop(lz,hz,dz) { sp--;             \
   1.605 +                         lz = stackLo[sp]; \
   1.606 +                         hz = stackHi[sp]; \
   1.607 +                         dz = stackD [sp]; }
   1.608 +
   1.609 +
   1.610 +#define mnextsize(az) (nextHi[az]-nextLo[az])
   1.611 +
   1.612 +#define mnextswap(az,bz)                                        \
   1.613 +   { Int32 tz;                                                  \
   1.614 +     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
   1.615 +     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
   1.616 +     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
   1.617 +
   1.618 +
   1.619 +#define MAIN_QSORT_SMALL_THRESH 20
   1.620 +#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
   1.621 +#define MAIN_QSORT_STACK_SIZE 100
   1.622 +
   1.623 +static
   1.624 +void mainQSort3 ( UInt32* ptr,
   1.625 +                  UChar*  block,
   1.626 +                  UInt16* quadrant,
   1.627 +                  Int32   nblock,
   1.628 +                  Int32   loSt, 
   1.629 +                  Int32   hiSt, 
   1.630 +                  Int32   dSt,
   1.631 +                  Int32*  budget )
   1.632 +{
   1.633 +   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
   1.634 +   Int32 sp, lo, hi, d;
   1.635 +
   1.636 +   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
   1.637 +   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
   1.638 +   Int32 stackD [MAIN_QSORT_STACK_SIZE];
   1.639 +
   1.640 +   Int32 nextLo[3];
   1.641 +   Int32 nextHi[3];
   1.642 +   Int32 nextD [3];
   1.643 +
   1.644 +   sp = 0;
   1.645 +   mpush ( loSt, hiSt, dSt );
   1.646 +
   1.647 +   while (sp > 0) {
   1.648 +
   1.649 +      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
   1.650 +
   1.651 +      mpop ( lo, hi, d );
   1.652 +      if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
   1.653 +          d > MAIN_QSORT_DEPTH_THRESH) {
   1.654 +         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
   1.655 +         if (*budget < 0) return;
   1.656 +         continue;
   1.657 +      }
   1.658 +
   1.659 +      med = (Int32) 
   1.660 +            mmed3 ( block[ptr[ lo         ]+d],
   1.661 +                    block[ptr[ hi         ]+d],
   1.662 +                    block[ptr[ (lo+hi)>>1 ]+d] );
   1.663 +
   1.664 +      unLo = ltLo = lo;
   1.665 +      unHi = gtHi = hi;
   1.666 +
   1.667 +      while (True) {
   1.668 +         while (True) {
   1.669 +            if (unLo > unHi) break;
   1.670 +            n = ((Int32)block[ptr[unLo]+d]) - med;
   1.671 +            if (n == 0) { 
   1.672 +               mswap(ptr[unLo], ptr[ltLo]); 
   1.673 +               ltLo++; unLo++; continue; 
   1.674 +            };
   1.675 +            if (n >  0) break;
   1.676 +            unLo++;
   1.677 +         }
   1.678 +         while (True) {
   1.679 +            if (unLo > unHi) break;
   1.680 +            n = ((Int32)block[ptr[unHi]+d]) - med;
   1.681 +            if (n == 0) { 
   1.682 +               mswap(ptr[unHi], ptr[gtHi]); 
   1.683 +               gtHi--; unHi--; continue; 
   1.684 +            };
   1.685 +            if (n <  0) break;
   1.686 +            unHi--;
   1.687 +         }
   1.688 +         if (unLo > unHi) break;
   1.689 +         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
   1.690 +      }
   1.691 +
   1.692 +      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
   1.693 +
   1.694 +      if (gtHi < ltLo) {
   1.695 +         mpush(lo, hi, d+1 );
   1.696 +         continue;
   1.697 +      }
   1.698 +
   1.699 +      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
   1.700 +      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
   1.701 +
   1.702 +      n = lo + unLo - ltLo - 1;
   1.703 +      m = hi - (gtHi - unHi) + 1;
   1.704 +
   1.705 +      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
   1.706 +      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
   1.707 +      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
   1.708 +
   1.709 +      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
   1.710 +      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
   1.711 +      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
   1.712 +
   1.713 +      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
   1.714 +      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
   1.715 +
   1.716 +      mpush (nextLo[0], nextHi[0], nextD[0]);
   1.717 +      mpush (nextLo[1], nextHi[1], nextD[1]);
   1.718 +      mpush (nextLo[2], nextHi[2], nextD[2]);
   1.719 +   }
   1.720 +}
   1.721 +
   1.722 +#undef mswap
   1.723 +#undef mvswap
   1.724 +#undef mpush
   1.725 +#undef mpop
   1.726 +#undef mmin
   1.727 +#undef mnextsize
   1.728 +#undef mnextswap
   1.729 +#undef MAIN_QSORT_SMALL_THRESH
   1.730 +#undef MAIN_QSORT_DEPTH_THRESH
   1.731 +#undef MAIN_QSORT_STACK_SIZE
   1.732 +
   1.733 +
   1.734 +/*---------------------------------------------*/
   1.735 +/* Pre:
   1.736 +      nblock > N_OVERSHOOT
   1.737 +      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
   1.738 +      ((UChar*)block32) [0 .. nblock-1] holds block
   1.739 +      ptr exists for [0 .. nblock-1]
   1.740 +
   1.741 +   Post:
   1.742 +      ((UChar*)block32) [0 .. nblock-1] holds block
   1.743 +      All other areas of block32 destroyed
   1.744 +      ftab [0 .. 65536 ] destroyed
   1.745 +      ptr [0 .. nblock-1] holds sorted order
   1.746 +      if (*budget < 0), sorting was abandoned
   1.747 +*/
   1.748 +
   1.749 +#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
   1.750 +#define SETMASK (1 << 21)
   1.751 +#define CLEARMASK (~(SETMASK))
   1.752 +
   1.753 +static
   1.754 +void mainSort ( UInt32* ptr, 
   1.755 +                UChar*  block,
   1.756 +                UInt16* quadrant, 
   1.757 +                UInt32* ftab,
   1.758 +                Int32   nblock,
   1.759 +                Int32   verb,
   1.760 +                Int32*  budget )
   1.761 +{
   1.762 +   Int32  i, j, k, ss, sb;
   1.763 +   Int32  runningOrder[256];
   1.764 +   Bool   bigDone[256];
   1.765 +   Int32  copyStart[256];
   1.766 +   Int32  copyEnd  [256];
   1.767 +   UChar  c1;
   1.768 +   Int32  numQSorted;
   1.769 +   UInt16 s;
   1.770 +   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
   1.771 +
   1.772 +   /*-- set up the 2-byte frequency table --*/
   1.773 +   for (i = 65536; i >= 0; i--) ftab[i] = 0;
   1.774 +
   1.775 +   j = block[0] << 8;
   1.776 +   i = nblock-1;
   1.777 +   for (; i >= 3; i -= 4) {
   1.778 +      quadrant[i] = 0;
   1.779 +      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
   1.780 +      ftab[j]++;
   1.781 +      quadrant[i-1] = 0;
   1.782 +      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
   1.783 +      ftab[j]++;
   1.784 +      quadrant[i-2] = 0;
   1.785 +      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
   1.786 +      ftab[j]++;
   1.787 +      quadrant[i-3] = 0;
   1.788 +      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
   1.789 +      ftab[j]++;
   1.790 +   }
   1.791 +   for (; i >= 0; i--) {
   1.792 +      quadrant[i] = 0;
   1.793 +      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
   1.794 +      ftab[j]++;
   1.795 +   }
   1.796 +
   1.797 +   /*-- (emphasises close relationship of block & quadrant) --*/
   1.798 +   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
   1.799 +      block   [nblock+i] = block[i];
   1.800 +      quadrant[nblock+i] = 0;
   1.801 +   }
   1.802 +
   1.803 +   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
   1.804 +
   1.805 +   /*-- Complete the initial radix sort --*/
   1.806 +   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
   1.807 +
   1.808 +   s = block[0] << 8;
   1.809 +   i = nblock-1;
   1.810 +   for (; i >= 3; i -= 4) {
   1.811 +      s = (s >> 8) | (block[i] << 8);
   1.812 +      j = ftab[s] -1;
   1.813 +      ftab[s] = j;
   1.814 +      ptr[j] = i;
   1.815 +      s = (s >> 8) | (block[i-1] << 8);
   1.816 +      j = ftab[s] -1;
   1.817 +      ftab[s] = j;
   1.818 +      ptr[j] = i-1;
   1.819 +      s = (s >> 8) | (block[i-2] << 8);
   1.820 +      j = ftab[s] -1;
   1.821 +      ftab[s] = j;
   1.822 +      ptr[j] = i-2;
   1.823 +      s = (s >> 8) | (block[i-3] << 8);
   1.824 +      j = ftab[s] -1;
   1.825 +      ftab[s] = j;
   1.826 +      ptr[j] = i-3;
   1.827 +   }
   1.828 +   for (; i >= 0; i--) {
   1.829 +      s = (s >> 8) | (block[i] << 8);
   1.830 +      j = ftab[s] -1;
   1.831 +      ftab[s] = j;
   1.832 +      ptr[j] = i;
   1.833 +   }
   1.834 +
   1.835 +   /*--
   1.836 +      Now ftab contains the first loc of every small bucket.
   1.837 +      Calculate the running order, from smallest to largest
   1.838 +      big bucket.
   1.839 +   --*/
   1.840 +   for (i = 0; i <= 255; i++) {
   1.841 +      bigDone     [i] = False;
   1.842 +      runningOrder[i] = i;
   1.843 +   }
   1.844 +
   1.845 +   {
   1.846 +      Int32 vv;
   1.847 +      Int32 h = 1;
   1.848 +      do h = 3 * h + 1; while (h <= 256);
   1.849 +      do {
   1.850 +         h = h / 3;
   1.851 +         for (i = h; i <= 255; i++) {
   1.852 +            vv = runningOrder[i];
   1.853 +            j = i;
   1.854 +            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
   1.855 +               runningOrder[j] = runningOrder[j-h];
   1.856 +               j = j - h;
   1.857 +               if (j <= (h - 1)) goto zero;
   1.858 +            }
   1.859 +            zero:
   1.860 +            runningOrder[j] = vv;
   1.861 +         }
   1.862 +      } while (h != 1);
   1.863 +   }
   1.864 +
   1.865 +   /*--
   1.866 +      The main sorting loop.
   1.867 +   --*/
   1.868 +
   1.869 +   numQSorted = 0;
   1.870 +
   1.871 +   for (i = 0; i <= 255; i++) {
   1.872 +
   1.873 +      /*--
   1.874 +         Process big buckets, starting with the least full.
   1.875 +         Basically this is a 3-step process in which we call
   1.876 +         mainQSort3 to sort the small buckets [ss, j], but
   1.877 +         also make a big effort to avoid the calls if we can.
   1.878 +      --*/
   1.879 +      ss = runningOrder[i];
   1.880 +
   1.881 +      /*--
   1.882 +         Step 1:
   1.883 +         Complete the big bucket [ss] by quicksorting
   1.884 +         any unsorted small buckets [ss, j], for j != ss.  
   1.885 +         Hopefully previous pointer-scanning phases have already
   1.886 +         completed many of the small buckets [ss, j], so
   1.887 +         we don't have to sort them at all.
   1.888 +      --*/
   1.889 +      for (j = 0; j <= 255; j++) {
   1.890 +         if (j != ss) {
   1.891 +            sb = (ss << 8) + j;
   1.892 +            if ( ! (ftab[sb] & SETMASK) ) {
   1.893 +               Int32 lo = ftab[sb]   & CLEARMASK;
   1.894 +               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
   1.895 +               if (hi > lo) {
   1.896 +                  if (verb >= 4)
   1.897 +                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
   1.898 +                                "done %d   this %d\n",
   1.899 +                                ss, j, numQSorted, hi - lo + 1 );
   1.900 +                  mainQSort3 ( 
   1.901 +                     ptr, block, quadrant, nblock, 
   1.902 +                     lo, hi, BZ_N_RADIX, budget 
   1.903 +                  );   
   1.904 +                  numQSorted += (hi - lo + 1);
   1.905 +                  if (*budget < 0) return;
   1.906 +               }
   1.907 +            }
   1.908 +            ftab[sb] |= SETMASK;
   1.909 +         }
   1.910 +      }
   1.911 +
   1.912 +      AssertH ( !bigDone[ss], 1006 );
   1.913 +
   1.914 +      /*--
   1.915 +         Step 2:
   1.916 +         Now scan this big bucket [ss] so as to synthesise the
   1.917 +         sorted order for small buckets [t, ss] for all t,
   1.918 +         including, magically, the bucket [ss,ss] too.
   1.919 +         This will avoid doing Real Work in subsequent Step 1's.
   1.920 +      --*/
   1.921 +      {
   1.922 +         for (j = 0; j <= 255; j++) {
   1.923 +            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
   1.924 +            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
   1.925 +         }
   1.926 +         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
   1.927 +            k = ptr[j]-1; if (k < 0) k += nblock;
   1.928 +            c1 = block[k];
   1.929 +            if (!bigDone[c1])
   1.930 +               ptr[ copyStart[c1]++ ] = k;
   1.931 +         }
   1.932 +         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
   1.933 +            k = ptr[j]-1; if (k < 0) k += nblock;
   1.934 +            c1 = block[k];
   1.935 +            if (!bigDone[c1]) 
   1.936 +               ptr[ copyEnd[c1]-- ] = k;
   1.937 +         }
   1.938 +      }
   1.939 +
   1.940 +      AssertH ( (copyStart[ss]-1 == copyEnd[ss])
   1.941 +                || 
   1.942 +                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
   1.943 +                   Necessity for this case is demonstrated by compressing 
   1.944 +                   a sequence of approximately 48.5 million of character 
   1.945 +                   251; 1.0.0/1.0.1 will then die here. */
   1.946 +                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
   1.947 +                1007 )
   1.948 +
   1.949 +      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
   1.950 +
   1.951 +      /*--
   1.952 +         Step 3:
   1.953 +         The [ss] big bucket is now done.  Record this fact,
   1.954 +         and update the quadrant descriptors.  Remember to
   1.955 +         update quadrants in the overshoot area too, if
   1.956 +         necessary.  The "if (i < 255)" test merely skips
   1.957 +         this updating for the last bucket processed, since
   1.958 +         updating for the last bucket is pointless.
   1.959 +
   1.960 +         The quadrant array provides a way to incrementally
   1.961 +         cache sort orderings, as they appear, so as to 
   1.962 +         make subsequent comparisons in fullGtU() complete
   1.963 +         faster.  For repetitive blocks this makes a big
   1.964 +         difference (but not big enough to be able to avoid
   1.965 +         the fallback sorting mechanism, exponential radix sort).
   1.966 +
   1.967 +         The precise meaning is: at all times:
   1.968 +
   1.969 +            for 0 <= i < nblock and 0 <= j <= nblock
   1.970 +
   1.971 +            if block[i] != block[j], 
   1.972 +
   1.973 +               then the relative values of quadrant[i] and 
   1.974 +                    quadrant[j] are meaningless.
   1.975 +
   1.976 +               else {
   1.977 +                  if quadrant[i] < quadrant[j]
   1.978 +                     then the string starting at i lexicographically
   1.979 +                     precedes the string starting at j
   1.980 +
   1.981 +                  else if quadrant[i] > quadrant[j]
   1.982 +                     then the string starting at j lexicographically
   1.983 +                     precedes the string starting at i
   1.984 +
   1.985 +                  else
   1.986 +                     the relative ordering of the strings starting
   1.987 +                     at i and j has not yet been determined.
   1.988 +               }
   1.989 +      --*/
   1.990 +      bigDone[ss] = True;
   1.991 +
   1.992 +      if (i < 255) {
   1.993 +         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
   1.994 +         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
   1.995 +         Int32 shifts   = 0;
   1.996 +
   1.997 +         while ((bbSize >> shifts) > 65534) shifts++;
   1.998 +
   1.999 +         for (j = bbSize-1; j >= 0; j--) {
  1.1000 +            Int32 a2update     = ptr[bbStart + j];
  1.1001 +            UInt16 qVal        = (UInt16)(j >> shifts);
  1.1002 +            quadrant[a2update] = qVal;
  1.1003 +            if (a2update < BZ_N_OVERSHOOT)
  1.1004 +               quadrant[a2update + nblock] = qVal;
  1.1005 +         }
  1.1006 +         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
  1.1007 +      }
  1.1008 +
  1.1009 +   }
  1.1010 +
  1.1011 +   if (verb >= 4)
  1.1012 +      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
  1.1013 +                 nblock, numQSorted, nblock - numQSorted );
  1.1014 +}
  1.1015 +
  1.1016 +#undef BIGFREQ
  1.1017 +#undef SETMASK
  1.1018 +#undef CLEARMASK
  1.1019 +
  1.1020 +
  1.1021 +/*---------------------------------------------*/
  1.1022 +/* Pre:
  1.1023 +      nblock > 0
  1.1024 +      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
  1.1025 +      ((UChar*)arr2)  [0 .. nblock-1] holds block
  1.1026 +      arr1 exists for [0 .. nblock-1]
  1.1027 +
  1.1028 +   Post:
  1.1029 +      ((UChar*)arr2) [0 .. nblock-1] holds block
  1.1030 +      All other areas of block destroyed
  1.1031 +      ftab [ 0 .. 65536 ] destroyed
  1.1032 +      arr1 [0 .. nblock-1] holds sorted order
  1.1033 +*/
  1.1034 +void BZ2_blockSort ( EState* s )
  1.1035 +{
  1.1036 +   UInt32* ptr    = s->ptr; 
  1.1037 +   UChar*  block  = s->block;
  1.1038 +   UInt32* ftab   = s->ftab;
  1.1039 +   Int32   nblock = s->nblock;
  1.1040 +   Int32   verb   = s->verbosity;
  1.1041 +   Int32   wfact  = s->workFactor;
  1.1042 +   UInt16* quadrant;
  1.1043 +   Int32   budget;
  1.1044 +   Int32   budgetInit;
  1.1045 +   Int32   i;
  1.1046 +
  1.1047 +   if (nblock < 10000) {
  1.1048 +      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
  1.1049 +   } else {
  1.1050 +      /* Calculate the location for quadrant, remembering to get
  1.1051 +         the alignment right.  Assumes that &(block[0]) is at least
  1.1052 +         2-byte aligned -- this should be ok since block is really
  1.1053 +         the first section of arr2.
  1.1054 +      */
  1.1055 +      i = nblock+BZ_N_OVERSHOOT;
  1.1056 +      if (i & 1) i++;
  1.1057 +      quadrant = (UInt16*)(&(block[i]));
  1.1058 +
  1.1059 +      /* (wfact-1) / 3 puts the default-factor-30
  1.1060 +         transition point at very roughly the same place as 
  1.1061 +         with v0.1 and v0.9.0.  
  1.1062 +         Not that it particularly matters any more, since the
  1.1063 +         resulting compressed stream is now the same regardless
  1.1064 +         of whether or not we use the main sort or fallback sort.
  1.1065 +      */
  1.1066 +      if (wfact < 1  ) wfact = 1;
  1.1067 +      if (wfact > 100) wfact = 100;
  1.1068 +      budgetInit = nblock * ((wfact-1) / 3);
  1.1069 +      budget = budgetInit;
  1.1070 +
  1.1071 +      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
  1.1072 +      if (verb >= 3) 
  1.1073 +         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
  1.1074 +                    budgetInit - budget,
  1.1075 +                    nblock, 
  1.1076 +                    (float)(budgetInit - budget) /
  1.1077 +                    (float)(nblock==0 ? 1 : nblock) ); 
  1.1078 +      if (budget < 0) {
  1.1079 +         if (verb >= 2) 
  1.1080 +            VPrintf0 ( "    too repetitive; using fallback"
  1.1081 +                       " sorting algorithm\n" );
  1.1082 +         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
  1.1083 +      }
  1.1084 +   }
  1.1085 +
  1.1086 +   s->origPtr = -1;
  1.1087 +   for (i = 0; i < s->nblock; i++)
  1.1088 +      if (ptr[i] == 0)
  1.1089 +         { s->origPtr = i; break; };
  1.1090 +
  1.1091 +   AssertH( s->origPtr != -1, 1003 );
  1.1092 +}
  1.1093 +
  1.1094 +
  1.1095 +/*-------------------------------------------------------------*/
  1.1096 +/*--- end                                       blocksort.c ---*/
  1.1097 +/*-------------------------------------------------------------*/

mercurial