Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | |
michael@0 | 2 | /*-------------------------------------------------------------*/ |
michael@0 | 3 | /*--- Block sorting machinery ---*/ |
michael@0 | 4 | /*--- blocksort.c ---*/ |
michael@0 | 5 | /*-------------------------------------------------------------*/ |
michael@0 | 6 | |
michael@0 | 7 | /* ------------------------------------------------------------------ |
michael@0 | 8 | This file is part of bzip2/libbzip2, a program and library for |
michael@0 | 9 | lossless, block-sorting data compression. |
michael@0 | 10 | |
michael@0 | 11 | bzip2/libbzip2 version 1.0.4 of 20 December 2006 |
michael@0 | 12 | Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org> |
michael@0 | 13 | |
michael@0 | 14 | Please read the WARNING, DISCLAIMER and PATENTS sections in the |
michael@0 | 15 | README file. |
michael@0 | 16 | |
michael@0 | 17 | This program is released under the terms of the license contained |
michael@0 | 18 | in the file LICENSE. |
michael@0 | 19 | ------------------------------------------------------------------ */ |
michael@0 | 20 | |
michael@0 | 21 | |
michael@0 | 22 | #include "bzlib_private.h" |
michael@0 | 23 | |
michael@0 | 24 | /*---------------------------------------------*/ |
michael@0 | 25 | /*--- Fallback O(N log(N)^2) sorting ---*/ |
michael@0 | 26 | /*--- algorithm, for repetitive blocks ---*/ |
michael@0 | 27 | /*---------------------------------------------*/ |
michael@0 | 28 | |
michael@0 | 29 | /*---------------------------------------------*/ |
michael@0 | 30 | static |
michael@0 | 31 | __inline__ |
michael@0 | 32 | void fallbackSimpleSort ( UInt32* fmap, |
michael@0 | 33 | UInt32* eclass, |
michael@0 | 34 | Int32 lo, |
michael@0 | 35 | Int32 hi ) |
michael@0 | 36 | { |
michael@0 | 37 | Int32 i, j, tmp; |
michael@0 | 38 | UInt32 ec_tmp; |
michael@0 | 39 | |
michael@0 | 40 | if (lo == hi) return; |
michael@0 | 41 | |
michael@0 | 42 | if (hi - lo > 3) { |
michael@0 | 43 | for ( i = hi-4; i >= lo; i-- ) { |
michael@0 | 44 | tmp = fmap[i]; |
michael@0 | 45 | ec_tmp = eclass[tmp]; |
michael@0 | 46 | for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) |
michael@0 | 47 | fmap[j-4] = fmap[j]; |
michael@0 | 48 | fmap[j-4] = tmp; |
michael@0 | 49 | } |
michael@0 | 50 | } |
michael@0 | 51 | |
michael@0 | 52 | for ( i = hi-1; i >= lo; i-- ) { |
michael@0 | 53 | tmp = fmap[i]; |
michael@0 | 54 | ec_tmp = eclass[tmp]; |
michael@0 | 55 | for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) |
michael@0 | 56 | fmap[j-1] = fmap[j]; |
michael@0 | 57 | fmap[j-1] = tmp; |
michael@0 | 58 | } |
michael@0 | 59 | } |
michael@0 | 60 | |
michael@0 | 61 | |
michael@0 | 62 | /*---------------------------------------------*/ |
michael@0 | 63 | #define fswap(zz1, zz2) \ |
michael@0 | 64 | { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } |
michael@0 | 65 | |
michael@0 | 66 | #define fvswap(zzp1, zzp2, zzn) \ |
michael@0 | 67 | { \ |
michael@0 | 68 | Int32 yyp1 = (zzp1); \ |
michael@0 | 69 | Int32 yyp2 = (zzp2); \ |
michael@0 | 70 | Int32 yyn = (zzn); \ |
michael@0 | 71 | while (yyn > 0) { \ |
michael@0 | 72 | fswap(fmap[yyp1], fmap[yyp2]); \ |
michael@0 | 73 | yyp1++; yyp2++; yyn--; \ |
michael@0 | 74 | } \ |
michael@0 | 75 | } |
michael@0 | 76 | |
michael@0 | 77 | |
michael@0 | 78 | #define fmin(a,b) ((a) < (b)) ? (a) : (b) |
michael@0 | 79 | |
michael@0 | 80 | #define fpush(lz,hz) { stackLo[sp] = lz; \ |
michael@0 | 81 | stackHi[sp] = hz; \ |
michael@0 | 82 | sp++; } |
michael@0 | 83 | |
michael@0 | 84 | #define fpop(lz,hz) { sp--; \ |
michael@0 | 85 | lz = stackLo[sp]; \ |
michael@0 | 86 | hz = stackHi[sp]; } |
michael@0 | 87 | |
michael@0 | 88 | #define FALLBACK_QSORT_SMALL_THRESH 10 |
michael@0 | 89 | #define FALLBACK_QSORT_STACK_SIZE 100 |
michael@0 | 90 | |
michael@0 | 91 | |
michael@0 | 92 | static |
michael@0 | 93 | void fallbackQSort3 ( UInt32* fmap, |
michael@0 | 94 | UInt32* eclass, |
michael@0 | 95 | Int32 loSt, |
michael@0 | 96 | Int32 hiSt ) |
michael@0 | 97 | { |
michael@0 | 98 | Int32 unLo, unHi, ltLo, gtHi, n, m; |
michael@0 | 99 | Int32 sp, lo, hi; |
michael@0 | 100 | UInt32 med, r, r3; |
michael@0 | 101 | Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; |
michael@0 | 102 | Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; |
michael@0 | 103 | |
michael@0 | 104 | r = 0; |
michael@0 | 105 | |
michael@0 | 106 | sp = 0; |
michael@0 | 107 | fpush ( loSt, hiSt ); |
michael@0 | 108 | |
michael@0 | 109 | while (sp > 0) { |
michael@0 | 110 | |
michael@0 | 111 | AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); |
michael@0 | 112 | |
michael@0 | 113 | fpop ( lo, hi ); |
michael@0 | 114 | if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { |
michael@0 | 115 | fallbackSimpleSort ( fmap, eclass, lo, hi ); |
michael@0 | 116 | continue; |
michael@0 | 117 | } |
michael@0 | 118 | |
michael@0 | 119 | /* Random partitioning. Median of 3 sometimes fails to |
michael@0 | 120 | avoid bad cases. Median of 9 seems to help but |
michael@0 | 121 | looks rather expensive. This too seems to work but |
michael@0 | 122 | is cheaper. Guidance for the magic constants |
michael@0 | 123 | 7621 and 32768 is taken from Sedgewick's algorithms |
michael@0 | 124 | book, chapter 35. |
michael@0 | 125 | */ |
michael@0 | 126 | r = ((r * 7621) + 1) % 32768; |
michael@0 | 127 | r3 = r % 3; |
michael@0 | 128 | if (r3 == 0) med = eclass[fmap[lo]]; else |
michael@0 | 129 | if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else |
michael@0 | 130 | med = eclass[fmap[hi]]; |
michael@0 | 131 | |
michael@0 | 132 | unLo = ltLo = lo; |
michael@0 | 133 | unHi = gtHi = hi; |
michael@0 | 134 | |
michael@0 | 135 | while (1) { |
michael@0 | 136 | while (1) { |
michael@0 | 137 | if (unLo > unHi) break; |
michael@0 | 138 | n = (Int32)eclass[fmap[unLo]] - (Int32)med; |
michael@0 | 139 | if (n == 0) { |
michael@0 | 140 | fswap(fmap[unLo], fmap[ltLo]); |
michael@0 | 141 | ltLo++; unLo++; |
michael@0 | 142 | continue; |
michael@0 | 143 | }; |
michael@0 | 144 | if (n > 0) break; |
michael@0 | 145 | unLo++; |
michael@0 | 146 | } |
michael@0 | 147 | while (1) { |
michael@0 | 148 | if (unLo > unHi) break; |
michael@0 | 149 | n = (Int32)eclass[fmap[unHi]] - (Int32)med; |
michael@0 | 150 | if (n == 0) { |
michael@0 | 151 | fswap(fmap[unHi], fmap[gtHi]); |
michael@0 | 152 | gtHi--; unHi--; |
michael@0 | 153 | continue; |
michael@0 | 154 | }; |
michael@0 | 155 | if (n < 0) break; |
michael@0 | 156 | unHi--; |
michael@0 | 157 | } |
michael@0 | 158 | if (unLo > unHi) break; |
michael@0 | 159 | fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; |
michael@0 | 160 | } |
michael@0 | 161 | |
michael@0 | 162 | AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); |
michael@0 | 163 | |
michael@0 | 164 | if (gtHi < ltLo) continue; |
michael@0 | 165 | |
michael@0 | 166 | n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); |
michael@0 | 167 | m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); |
michael@0 | 168 | |
michael@0 | 169 | n = lo + unLo - ltLo - 1; |
michael@0 | 170 | m = hi - (gtHi - unHi) + 1; |
michael@0 | 171 | |
michael@0 | 172 | if (n - lo > hi - m) { |
michael@0 | 173 | fpush ( lo, n ); |
michael@0 | 174 | fpush ( m, hi ); |
michael@0 | 175 | } else { |
michael@0 | 176 | fpush ( m, hi ); |
michael@0 | 177 | fpush ( lo, n ); |
michael@0 | 178 | } |
michael@0 | 179 | } |
michael@0 | 180 | } |
michael@0 | 181 | |
michael@0 | 182 | #undef fmin |
michael@0 | 183 | #undef fpush |
michael@0 | 184 | #undef fpop |
michael@0 | 185 | #undef fswap |
michael@0 | 186 | #undef fvswap |
michael@0 | 187 | #undef FALLBACK_QSORT_SMALL_THRESH |
michael@0 | 188 | #undef FALLBACK_QSORT_STACK_SIZE |
michael@0 | 189 | |
michael@0 | 190 | |
michael@0 | 191 | /*---------------------------------------------*/ |
michael@0 | 192 | /* Pre: |
michael@0 | 193 | nblock > 0 |
michael@0 | 194 | eclass exists for [0 .. nblock-1] |
michael@0 | 195 | ((UChar*)eclass) [0 .. nblock-1] holds block |
michael@0 | 196 | ptr exists for [0 .. nblock-1] |
michael@0 | 197 | |
michael@0 | 198 | Post: |
michael@0 | 199 | ((UChar*)eclass) [0 .. nblock-1] holds block |
michael@0 | 200 | All other areas of eclass destroyed |
michael@0 | 201 | fmap [0 .. nblock-1] holds sorted order |
michael@0 | 202 | bhtab [ 0 .. 2+(nblock/32) ] destroyed |
michael@0 | 203 | */ |
michael@0 | 204 | |
michael@0 | 205 | #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) |
michael@0 | 206 | #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) |
michael@0 | 207 | #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) |
michael@0 | 208 | #define WORD_BH(zz) bhtab[(zz) >> 5] |
michael@0 | 209 | #define UNALIGNED_BH(zz) ((zz) & 0x01f) |
michael@0 | 210 | |
michael@0 | 211 | static |
michael@0 | 212 | void fallbackSort ( UInt32* fmap, |
michael@0 | 213 | UInt32* eclass, |
michael@0 | 214 | UInt32* bhtab, |
michael@0 | 215 | Int32 nblock, |
michael@0 | 216 | Int32 verb ) |
michael@0 | 217 | { |
michael@0 | 218 | Int32 ftab[257]; |
michael@0 | 219 | Int32 ftabCopy[256]; |
michael@0 | 220 | Int32 H, i, j, k, l, r, cc, cc1; |
michael@0 | 221 | Int32 nNotDone; |
michael@0 | 222 | Int32 nBhtab; |
michael@0 | 223 | UChar* eclass8 = (UChar*)eclass; |
michael@0 | 224 | |
michael@0 | 225 | /*-- |
michael@0 | 226 | Initial 1-char radix sort to generate |
michael@0 | 227 | initial fmap and initial BH bits. |
michael@0 | 228 | --*/ |
michael@0 | 229 | if (verb >= 4) |
michael@0 | 230 | VPrintf0 ( " bucket sorting ...\n" ); |
michael@0 | 231 | for (i = 0; i < 257; i++) ftab[i] = 0; |
michael@0 | 232 | for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; |
michael@0 | 233 | for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; |
michael@0 | 234 | for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; |
michael@0 | 235 | |
michael@0 | 236 | for (i = 0; i < nblock; i++) { |
michael@0 | 237 | j = eclass8[i]; |
michael@0 | 238 | k = ftab[j] - 1; |
michael@0 | 239 | ftab[j] = k; |
michael@0 | 240 | fmap[k] = i; |
michael@0 | 241 | } |
michael@0 | 242 | |
michael@0 | 243 | nBhtab = 2 + (nblock / 32); |
michael@0 | 244 | for (i = 0; i < nBhtab; i++) bhtab[i] = 0; |
michael@0 | 245 | for (i = 0; i < 256; i++) SET_BH(ftab[i]); |
michael@0 | 246 | |
michael@0 | 247 | /*-- |
michael@0 | 248 | Inductively refine the buckets. Kind-of an |
michael@0 | 249 | "exponential radix sort" (!), inspired by the |
michael@0 | 250 | Manber-Myers suffix array construction algorithm. |
michael@0 | 251 | --*/ |
michael@0 | 252 | |
michael@0 | 253 | /*-- set sentinel bits for block-end detection --*/ |
michael@0 | 254 | for (i = 0; i < 32; i++) { |
michael@0 | 255 | SET_BH(nblock + 2*i); |
michael@0 | 256 | CLEAR_BH(nblock + 2*i + 1); |
michael@0 | 257 | } |
michael@0 | 258 | |
michael@0 | 259 | /*-- the log(N) loop --*/ |
michael@0 | 260 | H = 1; |
michael@0 | 261 | while (1) { |
michael@0 | 262 | |
michael@0 | 263 | if (verb >= 4) |
michael@0 | 264 | VPrintf1 ( " depth %6d has ", H ); |
michael@0 | 265 | |
michael@0 | 266 | j = 0; |
michael@0 | 267 | for (i = 0; i < nblock; i++) { |
michael@0 | 268 | if (ISSET_BH(i)) j = i; |
michael@0 | 269 | k = fmap[i] - H; if (k < 0) k += nblock; |
michael@0 | 270 | eclass[k] = j; |
michael@0 | 271 | } |
michael@0 | 272 | |
michael@0 | 273 | nNotDone = 0; |
michael@0 | 274 | r = -1; |
michael@0 | 275 | while (1) { |
michael@0 | 276 | |
michael@0 | 277 | /*-- find the next non-singleton bucket --*/ |
michael@0 | 278 | k = r + 1; |
michael@0 | 279 | while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; |
michael@0 | 280 | if (ISSET_BH(k)) { |
michael@0 | 281 | while (WORD_BH(k) == 0xffffffff) k += 32; |
michael@0 | 282 | while (ISSET_BH(k)) k++; |
michael@0 | 283 | } |
michael@0 | 284 | l = k - 1; |
michael@0 | 285 | if (l >= nblock) break; |
michael@0 | 286 | while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; |
michael@0 | 287 | if (!ISSET_BH(k)) { |
michael@0 | 288 | while (WORD_BH(k) == 0x00000000) k += 32; |
michael@0 | 289 | while (!ISSET_BH(k)) k++; |
michael@0 | 290 | } |
michael@0 | 291 | r = k - 1; |
michael@0 | 292 | if (r >= nblock) break; |
michael@0 | 293 | |
michael@0 | 294 | /*-- now [l, r] bracket current bucket --*/ |
michael@0 | 295 | if (r > l) { |
michael@0 | 296 | nNotDone += (r - l + 1); |
michael@0 | 297 | fallbackQSort3 ( fmap, eclass, l, r ); |
michael@0 | 298 | |
michael@0 | 299 | /*-- scan bucket and generate header bits-- */ |
michael@0 | 300 | cc = -1; |
michael@0 | 301 | for (i = l; i <= r; i++) { |
michael@0 | 302 | cc1 = eclass[fmap[i]]; |
michael@0 | 303 | if (cc != cc1) { SET_BH(i); cc = cc1; }; |
michael@0 | 304 | } |
michael@0 | 305 | } |
michael@0 | 306 | } |
michael@0 | 307 | |
michael@0 | 308 | if (verb >= 4) |
michael@0 | 309 | VPrintf1 ( "%6d unresolved strings\n", nNotDone ); |
michael@0 | 310 | |
michael@0 | 311 | H *= 2; |
michael@0 | 312 | if (H > nblock || nNotDone == 0) break; |
michael@0 | 313 | } |
michael@0 | 314 | |
michael@0 | 315 | /*-- |
michael@0 | 316 | Reconstruct the original block in |
michael@0 | 317 | eclass8 [0 .. nblock-1], since the |
michael@0 | 318 | previous phase destroyed it. |
michael@0 | 319 | --*/ |
michael@0 | 320 | if (verb >= 4) |
michael@0 | 321 | VPrintf0 ( " reconstructing block ...\n" ); |
michael@0 | 322 | j = 0; |
michael@0 | 323 | for (i = 0; i < nblock; i++) { |
michael@0 | 324 | while (ftabCopy[j] == 0) j++; |
michael@0 | 325 | ftabCopy[j]--; |
michael@0 | 326 | eclass8[fmap[i]] = (UChar)j; |
michael@0 | 327 | } |
michael@0 | 328 | AssertH ( j < 256, 1005 ); |
michael@0 | 329 | } |
michael@0 | 330 | |
michael@0 | 331 | #undef SET_BH |
michael@0 | 332 | #undef CLEAR_BH |
michael@0 | 333 | #undef ISSET_BH |
michael@0 | 334 | #undef WORD_BH |
michael@0 | 335 | #undef UNALIGNED_BH |
michael@0 | 336 | |
michael@0 | 337 | |
michael@0 | 338 | /*---------------------------------------------*/ |
michael@0 | 339 | /*--- The main, O(N^2 log(N)) sorting ---*/ |
michael@0 | 340 | /*--- algorithm. Faster for "normal" ---*/ |
michael@0 | 341 | /*--- non-repetitive blocks. ---*/ |
michael@0 | 342 | /*---------------------------------------------*/ |
michael@0 | 343 | |
michael@0 | 344 | /*---------------------------------------------*/ |
michael@0 | 345 | static |
michael@0 | 346 | __inline__ |
michael@0 | 347 | Bool mainGtU ( UInt32 i1, |
michael@0 | 348 | UInt32 i2, |
michael@0 | 349 | UChar* block, |
michael@0 | 350 | UInt16* quadrant, |
michael@0 | 351 | UInt32 nblock, |
michael@0 | 352 | Int32* budget ) |
michael@0 | 353 | { |
michael@0 | 354 | Int32 k; |
michael@0 | 355 | UChar c1, c2; |
michael@0 | 356 | UInt16 s1, s2; |
michael@0 | 357 | |
michael@0 | 358 | AssertD ( i1 != i2, "mainGtU" ); |
michael@0 | 359 | /* 1 */ |
michael@0 | 360 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 361 | if (c1 != c2) return (c1 > c2); |
michael@0 | 362 | i1++; i2++; |
michael@0 | 363 | /* 2 */ |
michael@0 | 364 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 365 | if (c1 != c2) return (c1 > c2); |
michael@0 | 366 | i1++; i2++; |
michael@0 | 367 | /* 3 */ |
michael@0 | 368 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 369 | if (c1 != c2) return (c1 > c2); |
michael@0 | 370 | i1++; i2++; |
michael@0 | 371 | /* 4 */ |
michael@0 | 372 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 373 | if (c1 != c2) return (c1 > c2); |
michael@0 | 374 | i1++; i2++; |
michael@0 | 375 | /* 5 */ |
michael@0 | 376 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 377 | if (c1 != c2) return (c1 > c2); |
michael@0 | 378 | i1++; i2++; |
michael@0 | 379 | /* 6 */ |
michael@0 | 380 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 381 | if (c1 != c2) return (c1 > c2); |
michael@0 | 382 | i1++; i2++; |
michael@0 | 383 | /* 7 */ |
michael@0 | 384 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 385 | if (c1 != c2) return (c1 > c2); |
michael@0 | 386 | i1++; i2++; |
michael@0 | 387 | /* 8 */ |
michael@0 | 388 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 389 | if (c1 != c2) return (c1 > c2); |
michael@0 | 390 | i1++; i2++; |
michael@0 | 391 | /* 9 */ |
michael@0 | 392 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 393 | if (c1 != c2) return (c1 > c2); |
michael@0 | 394 | i1++; i2++; |
michael@0 | 395 | /* 10 */ |
michael@0 | 396 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 397 | if (c1 != c2) return (c1 > c2); |
michael@0 | 398 | i1++; i2++; |
michael@0 | 399 | /* 11 */ |
michael@0 | 400 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 401 | if (c1 != c2) return (c1 > c2); |
michael@0 | 402 | i1++; i2++; |
michael@0 | 403 | /* 12 */ |
michael@0 | 404 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 405 | if (c1 != c2) return (c1 > c2); |
michael@0 | 406 | i1++; i2++; |
michael@0 | 407 | |
michael@0 | 408 | k = nblock + 8; |
michael@0 | 409 | |
michael@0 | 410 | do { |
michael@0 | 411 | /* 1 */ |
michael@0 | 412 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 413 | if (c1 != c2) return (c1 > c2); |
michael@0 | 414 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
michael@0 | 415 | if (s1 != s2) return (s1 > s2); |
michael@0 | 416 | i1++; i2++; |
michael@0 | 417 | /* 2 */ |
michael@0 | 418 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 419 | if (c1 != c2) return (c1 > c2); |
michael@0 | 420 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
michael@0 | 421 | if (s1 != s2) return (s1 > s2); |
michael@0 | 422 | i1++; i2++; |
michael@0 | 423 | /* 3 */ |
michael@0 | 424 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 425 | if (c1 != c2) return (c1 > c2); |
michael@0 | 426 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
michael@0 | 427 | if (s1 != s2) return (s1 > s2); |
michael@0 | 428 | i1++; i2++; |
michael@0 | 429 | /* 4 */ |
michael@0 | 430 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 431 | if (c1 != c2) return (c1 > c2); |
michael@0 | 432 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
michael@0 | 433 | if (s1 != s2) return (s1 > s2); |
michael@0 | 434 | i1++; i2++; |
michael@0 | 435 | /* 5 */ |
michael@0 | 436 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 437 | if (c1 != c2) return (c1 > c2); |
michael@0 | 438 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
michael@0 | 439 | if (s1 != s2) return (s1 > s2); |
michael@0 | 440 | i1++; i2++; |
michael@0 | 441 | /* 6 */ |
michael@0 | 442 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 443 | if (c1 != c2) return (c1 > c2); |
michael@0 | 444 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
michael@0 | 445 | if (s1 != s2) return (s1 > s2); |
michael@0 | 446 | i1++; i2++; |
michael@0 | 447 | /* 7 */ |
michael@0 | 448 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 449 | if (c1 != c2) return (c1 > c2); |
michael@0 | 450 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
michael@0 | 451 | if (s1 != s2) return (s1 > s2); |
michael@0 | 452 | i1++; i2++; |
michael@0 | 453 | /* 8 */ |
michael@0 | 454 | c1 = block[i1]; c2 = block[i2]; |
michael@0 | 455 | if (c1 != c2) return (c1 > c2); |
michael@0 | 456 | s1 = quadrant[i1]; s2 = quadrant[i2]; |
michael@0 | 457 | if (s1 != s2) return (s1 > s2); |
michael@0 | 458 | i1++; i2++; |
michael@0 | 459 | |
michael@0 | 460 | if (i1 >= nblock) i1 -= nblock; |
michael@0 | 461 | if (i2 >= nblock) i2 -= nblock; |
michael@0 | 462 | |
michael@0 | 463 | k -= 8; |
michael@0 | 464 | (*budget)--; |
michael@0 | 465 | } |
michael@0 | 466 | while (k >= 0); |
michael@0 | 467 | |
michael@0 | 468 | return False; |
michael@0 | 469 | } |
michael@0 | 470 | |
michael@0 | 471 | |
michael@0 | 472 | /*---------------------------------------------*/ |
michael@0 | 473 | /*-- |
michael@0 | 474 | Knuth's increments seem to work better |
michael@0 | 475 | than Incerpi-Sedgewick here. Possibly |
michael@0 | 476 | because the number of elems to sort is |
michael@0 | 477 | usually small, typically <= 20. |
michael@0 | 478 | --*/ |
michael@0 | 479 | static |
michael@0 | 480 | Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, |
michael@0 | 481 | 9841, 29524, 88573, 265720, |
michael@0 | 482 | 797161, 2391484 }; |
michael@0 | 483 | |
michael@0 | 484 | static |
michael@0 | 485 | void mainSimpleSort ( UInt32* ptr, |
michael@0 | 486 | UChar* block, |
michael@0 | 487 | UInt16* quadrant, |
michael@0 | 488 | Int32 nblock, |
michael@0 | 489 | Int32 lo, |
michael@0 | 490 | Int32 hi, |
michael@0 | 491 | Int32 d, |
michael@0 | 492 | Int32* budget ) |
michael@0 | 493 | { |
michael@0 | 494 | Int32 i, j, h, bigN, hp; |
michael@0 | 495 | UInt32 v; |
michael@0 | 496 | |
michael@0 | 497 | bigN = hi - lo + 1; |
michael@0 | 498 | if (bigN < 2) return; |
michael@0 | 499 | |
michael@0 | 500 | hp = 0; |
michael@0 | 501 | while (incs[hp] < bigN) hp++; |
michael@0 | 502 | hp--; |
michael@0 | 503 | |
michael@0 | 504 | for (; hp >= 0; hp--) { |
michael@0 | 505 | h = incs[hp]; |
michael@0 | 506 | |
michael@0 | 507 | i = lo + h; |
michael@0 | 508 | while (True) { |
michael@0 | 509 | |
michael@0 | 510 | /*-- copy 1 --*/ |
michael@0 | 511 | if (i > hi) break; |
michael@0 | 512 | v = ptr[i]; |
michael@0 | 513 | j = i; |
michael@0 | 514 | while ( mainGtU ( |
michael@0 | 515 | ptr[j-h]+d, v+d, block, quadrant, nblock, budget |
michael@0 | 516 | ) ) { |
michael@0 | 517 | ptr[j] = ptr[j-h]; |
michael@0 | 518 | j = j - h; |
michael@0 | 519 | if (j <= (lo + h - 1)) break; |
michael@0 | 520 | } |
michael@0 | 521 | ptr[j] = v; |
michael@0 | 522 | i++; |
michael@0 | 523 | |
michael@0 | 524 | /*-- copy 2 --*/ |
michael@0 | 525 | if (i > hi) break; |
michael@0 | 526 | v = ptr[i]; |
michael@0 | 527 | j = i; |
michael@0 | 528 | while ( mainGtU ( |
michael@0 | 529 | ptr[j-h]+d, v+d, block, quadrant, nblock, budget |
michael@0 | 530 | ) ) { |
michael@0 | 531 | ptr[j] = ptr[j-h]; |
michael@0 | 532 | j = j - h; |
michael@0 | 533 | if (j <= (lo + h - 1)) break; |
michael@0 | 534 | } |
michael@0 | 535 | ptr[j] = v; |
michael@0 | 536 | i++; |
michael@0 | 537 | |
michael@0 | 538 | /*-- copy 3 --*/ |
michael@0 | 539 | if (i > hi) break; |
michael@0 | 540 | v = ptr[i]; |
michael@0 | 541 | j = i; |
michael@0 | 542 | while ( mainGtU ( |
michael@0 | 543 | ptr[j-h]+d, v+d, block, quadrant, nblock, budget |
michael@0 | 544 | ) ) { |
michael@0 | 545 | ptr[j] = ptr[j-h]; |
michael@0 | 546 | j = j - h; |
michael@0 | 547 | if (j <= (lo + h - 1)) break; |
michael@0 | 548 | } |
michael@0 | 549 | ptr[j] = v; |
michael@0 | 550 | i++; |
michael@0 | 551 | |
michael@0 | 552 | if (*budget < 0) return; |
michael@0 | 553 | } |
michael@0 | 554 | } |
michael@0 | 555 | } |
michael@0 | 556 | |
michael@0 | 557 | |
michael@0 | 558 | /*---------------------------------------------*/ |
michael@0 | 559 | /*-- |
michael@0 | 560 | The following is an implementation of |
michael@0 | 561 | an elegant 3-way quicksort for strings, |
michael@0 | 562 | described in a paper "Fast Algorithms for |
michael@0 | 563 | Sorting and Searching Strings", by Robert |
michael@0 | 564 | Sedgewick and Jon L. Bentley. |
michael@0 | 565 | --*/ |
michael@0 | 566 | |
michael@0 | 567 | #define mswap(zz1, zz2) \ |
michael@0 | 568 | { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } |
michael@0 | 569 | |
michael@0 | 570 | #define mvswap(zzp1, zzp2, zzn) \ |
michael@0 | 571 | { \ |
michael@0 | 572 | Int32 yyp1 = (zzp1); \ |
michael@0 | 573 | Int32 yyp2 = (zzp2); \ |
michael@0 | 574 | Int32 yyn = (zzn); \ |
michael@0 | 575 | while (yyn > 0) { \ |
michael@0 | 576 | mswap(ptr[yyp1], ptr[yyp2]); \ |
michael@0 | 577 | yyp1++; yyp2++; yyn--; \ |
michael@0 | 578 | } \ |
michael@0 | 579 | } |
michael@0 | 580 | |
michael@0 | 581 | static |
michael@0 | 582 | __inline__ |
michael@0 | 583 | UChar mmed3 ( UChar a, UChar b, UChar c ) |
michael@0 | 584 | { |
michael@0 | 585 | UChar t; |
michael@0 | 586 | if (a > b) { t = a; a = b; b = t; }; |
michael@0 | 587 | if (b > c) { |
michael@0 | 588 | b = c; |
michael@0 | 589 | if (a > b) b = a; |
michael@0 | 590 | } |
michael@0 | 591 | return b; |
michael@0 | 592 | } |
michael@0 | 593 | |
michael@0 | 594 | #define mmin(a,b) ((a) < (b)) ? (a) : (b) |
michael@0 | 595 | |
michael@0 | 596 | #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ |
michael@0 | 597 | stackHi[sp] = hz; \ |
michael@0 | 598 | stackD [sp] = dz; \ |
michael@0 | 599 | sp++; } |
michael@0 | 600 | |
michael@0 | 601 | #define mpop(lz,hz,dz) { sp--; \ |
michael@0 | 602 | lz = stackLo[sp]; \ |
michael@0 | 603 | hz = stackHi[sp]; \ |
michael@0 | 604 | dz = stackD [sp]; } |
michael@0 | 605 | |
michael@0 | 606 | |
michael@0 | 607 | #define mnextsize(az) (nextHi[az]-nextLo[az]) |
michael@0 | 608 | |
michael@0 | 609 | #define mnextswap(az,bz) \ |
michael@0 | 610 | { Int32 tz; \ |
michael@0 | 611 | tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ |
michael@0 | 612 | tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ |
michael@0 | 613 | tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } |
michael@0 | 614 | |
michael@0 | 615 | |
michael@0 | 616 | #define MAIN_QSORT_SMALL_THRESH 20 |
michael@0 | 617 | #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) |
michael@0 | 618 | #define MAIN_QSORT_STACK_SIZE 100 |
michael@0 | 619 | |
michael@0 | 620 | static |
michael@0 | 621 | void mainQSort3 ( UInt32* ptr, |
michael@0 | 622 | UChar* block, |
michael@0 | 623 | UInt16* quadrant, |
michael@0 | 624 | Int32 nblock, |
michael@0 | 625 | Int32 loSt, |
michael@0 | 626 | Int32 hiSt, |
michael@0 | 627 | Int32 dSt, |
michael@0 | 628 | Int32* budget ) |
michael@0 | 629 | { |
michael@0 | 630 | Int32 unLo, unHi, ltLo, gtHi, n, m, med; |
michael@0 | 631 | Int32 sp, lo, hi, d; |
michael@0 | 632 | |
michael@0 | 633 | Int32 stackLo[MAIN_QSORT_STACK_SIZE]; |
michael@0 | 634 | Int32 stackHi[MAIN_QSORT_STACK_SIZE]; |
michael@0 | 635 | Int32 stackD [MAIN_QSORT_STACK_SIZE]; |
michael@0 | 636 | |
michael@0 | 637 | Int32 nextLo[3]; |
michael@0 | 638 | Int32 nextHi[3]; |
michael@0 | 639 | Int32 nextD [3]; |
michael@0 | 640 | |
michael@0 | 641 | sp = 0; |
michael@0 | 642 | mpush ( loSt, hiSt, dSt ); |
michael@0 | 643 | |
michael@0 | 644 | while (sp > 0) { |
michael@0 | 645 | |
michael@0 | 646 | AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); |
michael@0 | 647 | |
michael@0 | 648 | mpop ( lo, hi, d ); |
michael@0 | 649 | if (hi - lo < MAIN_QSORT_SMALL_THRESH || |
michael@0 | 650 | d > MAIN_QSORT_DEPTH_THRESH) { |
michael@0 | 651 | mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); |
michael@0 | 652 | if (*budget < 0) return; |
michael@0 | 653 | continue; |
michael@0 | 654 | } |
michael@0 | 655 | |
michael@0 | 656 | med = (Int32) |
michael@0 | 657 | mmed3 ( block[ptr[ lo ]+d], |
michael@0 | 658 | block[ptr[ hi ]+d], |
michael@0 | 659 | block[ptr[ (lo+hi)>>1 ]+d] ); |
michael@0 | 660 | |
michael@0 | 661 | unLo = ltLo = lo; |
michael@0 | 662 | unHi = gtHi = hi; |
michael@0 | 663 | |
michael@0 | 664 | while (True) { |
michael@0 | 665 | while (True) { |
michael@0 | 666 | if (unLo > unHi) break; |
michael@0 | 667 | n = ((Int32)block[ptr[unLo]+d]) - med; |
michael@0 | 668 | if (n == 0) { |
michael@0 | 669 | mswap(ptr[unLo], ptr[ltLo]); |
michael@0 | 670 | ltLo++; unLo++; continue; |
michael@0 | 671 | }; |
michael@0 | 672 | if (n > 0) break; |
michael@0 | 673 | unLo++; |
michael@0 | 674 | } |
michael@0 | 675 | while (True) { |
michael@0 | 676 | if (unLo > unHi) break; |
michael@0 | 677 | n = ((Int32)block[ptr[unHi]+d]) - med; |
michael@0 | 678 | if (n == 0) { |
michael@0 | 679 | mswap(ptr[unHi], ptr[gtHi]); |
michael@0 | 680 | gtHi--; unHi--; continue; |
michael@0 | 681 | }; |
michael@0 | 682 | if (n < 0) break; |
michael@0 | 683 | unHi--; |
michael@0 | 684 | } |
michael@0 | 685 | if (unLo > unHi) break; |
michael@0 | 686 | mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; |
michael@0 | 687 | } |
michael@0 | 688 | |
michael@0 | 689 | AssertD ( unHi == unLo-1, "mainQSort3(2)" ); |
michael@0 | 690 | |
michael@0 | 691 | if (gtHi < ltLo) { |
michael@0 | 692 | mpush(lo, hi, d+1 ); |
michael@0 | 693 | continue; |
michael@0 | 694 | } |
michael@0 | 695 | |
michael@0 | 696 | n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); |
michael@0 | 697 | m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); |
michael@0 | 698 | |
michael@0 | 699 | n = lo + unLo - ltLo - 1; |
michael@0 | 700 | m = hi - (gtHi - unHi) + 1; |
michael@0 | 701 | |
michael@0 | 702 | nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; |
michael@0 | 703 | nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; |
michael@0 | 704 | nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; |
michael@0 | 705 | |
michael@0 | 706 | if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); |
michael@0 | 707 | if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); |
michael@0 | 708 | if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); |
michael@0 | 709 | |
michael@0 | 710 | AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); |
michael@0 | 711 | AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); |
michael@0 | 712 | |
michael@0 | 713 | mpush (nextLo[0], nextHi[0], nextD[0]); |
michael@0 | 714 | mpush (nextLo[1], nextHi[1], nextD[1]); |
michael@0 | 715 | mpush (nextLo[2], nextHi[2], nextD[2]); |
michael@0 | 716 | } |
michael@0 | 717 | } |
michael@0 | 718 | |
michael@0 | 719 | #undef mswap |
michael@0 | 720 | #undef mvswap |
michael@0 | 721 | #undef mpush |
michael@0 | 722 | #undef mpop |
michael@0 | 723 | #undef mmin |
michael@0 | 724 | #undef mnextsize |
michael@0 | 725 | #undef mnextswap |
michael@0 | 726 | #undef MAIN_QSORT_SMALL_THRESH |
michael@0 | 727 | #undef MAIN_QSORT_DEPTH_THRESH |
michael@0 | 728 | #undef MAIN_QSORT_STACK_SIZE |
michael@0 | 729 | |
michael@0 | 730 | |
michael@0 | 731 | /*---------------------------------------------*/ |
michael@0 | 732 | /* Pre: |
michael@0 | 733 | nblock > N_OVERSHOOT |
michael@0 | 734 | block32 exists for [0 .. nblock-1 +N_OVERSHOOT] |
michael@0 | 735 | ((UChar*)block32) [0 .. nblock-1] holds block |
michael@0 | 736 | ptr exists for [0 .. nblock-1] |
michael@0 | 737 | |
michael@0 | 738 | Post: |
michael@0 | 739 | ((UChar*)block32) [0 .. nblock-1] holds block |
michael@0 | 740 | All other areas of block32 destroyed |
michael@0 | 741 | ftab [0 .. 65536 ] destroyed |
michael@0 | 742 | ptr [0 .. nblock-1] holds sorted order |
michael@0 | 743 | if (*budget < 0), sorting was abandoned |
michael@0 | 744 | */ |
michael@0 | 745 | |
michael@0 | 746 | #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) |
michael@0 | 747 | #define SETMASK (1 << 21) |
michael@0 | 748 | #define CLEARMASK (~(SETMASK)) |
michael@0 | 749 | |
michael@0 | 750 | static |
michael@0 | 751 | void mainSort ( UInt32* ptr, |
michael@0 | 752 | UChar* block, |
michael@0 | 753 | UInt16* quadrant, |
michael@0 | 754 | UInt32* ftab, |
michael@0 | 755 | Int32 nblock, |
michael@0 | 756 | Int32 verb, |
michael@0 | 757 | Int32* budget ) |
michael@0 | 758 | { |
michael@0 | 759 | Int32 i, j, k, ss, sb; |
michael@0 | 760 | Int32 runningOrder[256]; |
michael@0 | 761 | Bool bigDone[256]; |
michael@0 | 762 | Int32 copyStart[256]; |
michael@0 | 763 | Int32 copyEnd [256]; |
michael@0 | 764 | UChar c1; |
michael@0 | 765 | Int32 numQSorted; |
michael@0 | 766 | UInt16 s; |
michael@0 | 767 | if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); |
michael@0 | 768 | |
michael@0 | 769 | /*-- set up the 2-byte frequency table --*/ |
michael@0 | 770 | for (i = 65536; i >= 0; i--) ftab[i] = 0; |
michael@0 | 771 | |
michael@0 | 772 | j = block[0] << 8; |
michael@0 | 773 | i = nblock-1; |
michael@0 | 774 | for (; i >= 3; i -= 4) { |
michael@0 | 775 | quadrant[i] = 0; |
michael@0 | 776 | j = (j >> 8) | ( ((UInt16)block[i]) << 8); |
michael@0 | 777 | ftab[j]++; |
michael@0 | 778 | quadrant[i-1] = 0; |
michael@0 | 779 | j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); |
michael@0 | 780 | ftab[j]++; |
michael@0 | 781 | quadrant[i-2] = 0; |
michael@0 | 782 | j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); |
michael@0 | 783 | ftab[j]++; |
michael@0 | 784 | quadrant[i-3] = 0; |
michael@0 | 785 | j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); |
michael@0 | 786 | ftab[j]++; |
michael@0 | 787 | } |
michael@0 | 788 | for (; i >= 0; i--) { |
michael@0 | 789 | quadrant[i] = 0; |
michael@0 | 790 | j = (j >> 8) | ( ((UInt16)block[i]) << 8); |
michael@0 | 791 | ftab[j]++; |
michael@0 | 792 | } |
michael@0 | 793 | |
michael@0 | 794 | /*-- (emphasises close relationship of block & quadrant) --*/ |
michael@0 | 795 | for (i = 0; i < BZ_N_OVERSHOOT; i++) { |
michael@0 | 796 | block [nblock+i] = block[i]; |
michael@0 | 797 | quadrant[nblock+i] = 0; |
michael@0 | 798 | } |
michael@0 | 799 | |
michael@0 | 800 | if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); |
michael@0 | 801 | |
michael@0 | 802 | /*-- Complete the initial radix sort --*/ |
michael@0 | 803 | for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; |
michael@0 | 804 | |
michael@0 | 805 | s = block[0] << 8; |
michael@0 | 806 | i = nblock-1; |
michael@0 | 807 | for (; i >= 3; i -= 4) { |
michael@0 | 808 | s = (s >> 8) | (block[i] << 8); |
michael@0 | 809 | j = ftab[s] -1; |
michael@0 | 810 | ftab[s] = j; |
michael@0 | 811 | ptr[j] = i; |
michael@0 | 812 | s = (s >> 8) | (block[i-1] << 8); |
michael@0 | 813 | j = ftab[s] -1; |
michael@0 | 814 | ftab[s] = j; |
michael@0 | 815 | ptr[j] = i-1; |
michael@0 | 816 | s = (s >> 8) | (block[i-2] << 8); |
michael@0 | 817 | j = ftab[s] -1; |
michael@0 | 818 | ftab[s] = j; |
michael@0 | 819 | ptr[j] = i-2; |
michael@0 | 820 | s = (s >> 8) | (block[i-3] << 8); |
michael@0 | 821 | j = ftab[s] -1; |
michael@0 | 822 | ftab[s] = j; |
michael@0 | 823 | ptr[j] = i-3; |
michael@0 | 824 | } |
michael@0 | 825 | for (; i >= 0; i--) { |
michael@0 | 826 | s = (s >> 8) | (block[i] << 8); |
michael@0 | 827 | j = ftab[s] -1; |
michael@0 | 828 | ftab[s] = j; |
michael@0 | 829 | ptr[j] = i; |
michael@0 | 830 | } |
michael@0 | 831 | |
michael@0 | 832 | /*-- |
michael@0 | 833 | Now ftab contains the first loc of every small bucket. |
michael@0 | 834 | Calculate the running order, from smallest to largest |
michael@0 | 835 | big bucket. |
michael@0 | 836 | --*/ |
michael@0 | 837 | for (i = 0; i <= 255; i++) { |
michael@0 | 838 | bigDone [i] = False; |
michael@0 | 839 | runningOrder[i] = i; |
michael@0 | 840 | } |
michael@0 | 841 | |
michael@0 | 842 | { |
michael@0 | 843 | Int32 vv; |
michael@0 | 844 | Int32 h = 1; |
michael@0 | 845 | do h = 3 * h + 1; while (h <= 256); |
michael@0 | 846 | do { |
michael@0 | 847 | h = h / 3; |
michael@0 | 848 | for (i = h; i <= 255; i++) { |
michael@0 | 849 | vv = runningOrder[i]; |
michael@0 | 850 | j = i; |
michael@0 | 851 | while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { |
michael@0 | 852 | runningOrder[j] = runningOrder[j-h]; |
michael@0 | 853 | j = j - h; |
michael@0 | 854 | if (j <= (h - 1)) goto zero; |
michael@0 | 855 | } |
michael@0 | 856 | zero: |
michael@0 | 857 | runningOrder[j] = vv; |
michael@0 | 858 | } |
michael@0 | 859 | } while (h != 1); |
michael@0 | 860 | } |
michael@0 | 861 | |
michael@0 | 862 | /*-- |
michael@0 | 863 | The main sorting loop. |
michael@0 | 864 | --*/ |
michael@0 | 865 | |
michael@0 | 866 | numQSorted = 0; |
michael@0 | 867 | |
michael@0 | 868 | for (i = 0; i <= 255; i++) { |
michael@0 | 869 | |
michael@0 | 870 | /*-- |
michael@0 | 871 | Process big buckets, starting with the least full. |
michael@0 | 872 | Basically this is a 3-step process in which we call |
michael@0 | 873 | mainQSort3 to sort the small buckets [ss, j], but |
michael@0 | 874 | also make a big effort to avoid the calls if we can. |
michael@0 | 875 | --*/ |
michael@0 | 876 | ss = runningOrder[i]; |
michael@0 | 877 | |
michael@0 | 878 | /*-- |
michael@0 | 879 | Step 1: |
michael@0 | 880 | Complete the big bucket [ss] by quicksorting |
michael@0 | 881 | any unsorted small buckets [ss, j], for j != ss. |
michael@0 | 882 | Hopefully previous pointer-scanning phases have already |
michael@0 | 883 | completed many of the small buckets [ss, j], so |
michael@0 | 884 | we don't have to sort them at all. |
michael@0 | 885 | --*/ |
michael@0 | 886 | for (j = 0; j <= 255; j++) { |
michael@0 | 887 | if (j != ss) { |
michael@0 | 888 | sb = (ss << 8) + j; |
michael@0 | 889 | if ( ! (ftab[sb] & SETMASK) ) { |
michael@0 | 890 | Int32 lo = ftab[sb] & CLEARMASK; |
michael@0 | 891 | Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; |
michael@0 | 892 | if (hi > lo) { |
michael@0 | 893 | if (verb >= 4) |
michael@0 | 894 | VPrintf4 ( " qsort [0x%x, 0x%x] " |
michael@0 | 895 | "done %d this %d\n", |
michael@0 | 896 | ss, j, numQSorted, hi - lo + 1 ); |
michael@0 | 897 | mainQSort3 ( |
michael@0 | 898 | ptr, block, quadrant, nblock, |
michael@0 | 899 | lo, hi, BZ_N_RADIX, budget |
michael@0 | 900 | ); |
michael@0 | 901 | numQSorted += (hi - lo + 1); |
michael@0 | 902 | if (*budget < 0) return; |
michael@0 | 903 | } |
michael@0 | 904 | } |
michael@0 | 905 | ftab[sb] |= SETMASK; |
michael@0 | 906 | } |
michael@0 | 907 | } |
michael@0 | 908 | |
michael@0 | 909 | AssertH ( !bigDone[ss], 1006 ); |
michael@0 | 910 | |
michael@0 | 911 | /*-- |
michael@0 | 912 | Step 2: |
michael@0 | 913 | Now scan this big bucket [ss] so as to synthesise the |
michael@0 | 914 | sorted order for small buckets [t, ss] for all t, |
michael@0 | 915 | including, magically, the bucket [ss,ss] too. |
michael@0 | 916 | This will avoid doing Real Work in subsequent Step 1's. |
michael@0 | 917 | --*/ |
michael@0 | 918 | { |
michael@0 | 919 | for (j = 0; j <= 255; j++) { |
michael@0 | 920 | copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; |
michael@0 | 921 | copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; |
michael@0 | 922 | } |
michael@0 | 923 | for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { |
michael@0 | 924 | k = ptr[j]-1; if (k < 0) k += nblock; |
michael@0 | 925 | c1 = block[k]; |
michael@0 | 926 | if (!bigDone[c1]) |
michael@0 | 927 | ptr[ copyStart[c1]++ ] = k; |
michael@0 | 928 | } |
michael@0 | 929 | for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { |
michael@0 | 930 | k = ptr[j]-1; if (k < 0) k += nblock; |
michael@0 | 931 | c1 = block[k]; |
michael@0 | 932 | if (!bigDone[c1]) |
michael@0 | 933 | ptr[ copyEnd[c1]-- ] = k; |
michael@0 | 934 | } |
michael@0 | 935 | } |
michael@0 | 936 | |
michael@0 | 937 | AssertH ( (copyStart[ss]-1 == copyEnd[ss]) |
michael@0 | 938 | || |
michael@0 | 939 | /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. |
michael@0 | 940 | Necessity for this case is demonstrated by compressing |
michael@0 | 941 | a sequence of approximately 48.5 million of character |
michael@0 | 942 | 251; 1.0.0/1.0.1 will then die here. */ |
michael@0 | 943 | (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), |
michael@0 | 944 | 1007 ) |
michael@0 | 945 | |
michael@0 | 946 | for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; |
michael@0 | 947 | |
michael@0 | 948 | /*-- |
michael@0 | 949 | Step 3: |
michael@0 | 950 | The [ss] big bucket is now done. Record this fact, |
michael@0 | 951 | and update the quadrant descriptors. Remember to |
michael@0 | 952 | update quadrants in the overshoot area too, if |
michael@0 | 953 | necessary. The "if (i < 255)" test merely skips |
michael@0 | 954 | this updating for the last bucket processed, since |
michael@0 | 955 | updating for the last bucket is pointless. |
michael@0 | 956 | |
michael@0 | 957 | The quadrant array provides a way to incrementally |
michael@0 | 958 | cache sort orderings, as they appear, so as to |
michael@0 | 959 | make subsequent comparisons in fullGtU() complete |
michael@0 | 960 | faster. For repetitive blocks this makes a big |
michael@0 | 961 | difference (but not big enough to be able to avoid |
michael@0 | 962 | the fallback sorting mechanism, exponential radix sort). |
michael@0 | 963 | |
michael@0 | 964 | The precise meaning is: at all times: |
michael@0 | 965 | |
michael@0 | 966 | for 0 <= i < nblock and 0 <= j <= nblock |
michael@0 | 967 | |
michael@0 | 968 | if block[i] != block[j], |
michael@0 | 969 | |
michael@0 | 970 | then the relative values of quadrant[i] and |
michael@0 | 971 | quadrant[j] are meaningless. |
michael@0 | 972 | |
michael@0 | 973 | else { |
michael@0 | 974 | if quadrant[i] < quadrant[j] |
michael@0 | 975 | then the string starting at i lexicographically |
michael@0 | 976 | precedes the string starting at j |
michael@0 | 977 | |
michael@0 | 978 | else if quadrant[i] > quadrant[j] |
michael@0 | 979 | then the string starting at j lexicographically |
michael@0 | 980 | precedes the string starting at i |
michael@0 | 981 | |
michael@0 | 982 | else |
michael@0 | 983 | the relative ordering of the strings starting |
michael@0 | 984 | at i and j has not yet been determined. |
michael@0 | 985 | } |
michael@0 | 986 | --*/ |
michael@0 | 987 | bigDone[ss] = True; |
michael@0 | 988 | |
michael@0 | 989 | if (i < 255) { |
michael@0 | 990 | Int32 bbStart = ftab[ss << 8] & CLEARMASK; |
michael@0 | 991 | Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; |
michael@0 | 992 | Int32 shifts = 0; |
michael@0 | 993 | |
michael@0 | 994 | while ((bbSize >> shifts) > 65534) shifts++; |
michael@0 | 995 | |
michael@0 | 996 | for (j = bbSize-1; j >= 0; j--) { |
michael@0 | 997 | Int32 a2update = ptr[bbStart + j]; |
michael@0 | 998 | UInt16 qVal = (UInt16)(j >> shifts); |
michael@0 | 999 | quadrant[a2update] = qVal; |
michael@0 | 1000 | if (a2update < BZ_N_OVERSHOOT) |
michael@0 | 1001 | quadrant[a2update + nblock] = qVal; |
michael@0 | 1002 | } |
michael@0 | 1003 | AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); |
michael@0 | 1004 | } |
michael@0 | 1005 | |
michael@0 | 1006 | } |
michael@0 | 1007 | |
michael@0 | 1008 | if (verb >= 4) |
michael@0 | 1009 | VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", |
michael@0 | 1010 | nblock, numQSorted, nblock - numQSorted ); |
michael@0 | 1011 | } |
michael@0 | 1012 | |
michael@0 | 1013 | #undef BIGFREQ |
michael@0 | 1014 | #undef SETMASK |
michael@0 | 1015 | #undef CLEARMASK |
michael@0 | 1016 | |
michael@0 | 1017 | |
michael@0 | 1018 | /*---------------------------------------------*/ |
michael@0 | 1019 | /* Pre: |
michael@0 | 1020 | nblock > 0 |
michael@0 | 1021 | arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] |
michael@0 | 1022 | ((UChar*)arr2) [0 .. nblock-1] holds block |
michael@0 | 1023 | arr1 exists for [0 .. nblock-1] |
michael@0 | 1024 | |
michael@0 | 1025 | Post: |
michael@0 | 1026 | ((UChar*)arr2) [0 .. nblock-1] holds block |
michael@0 | 1027 | All other areas of block destroyed |
michael@0 | 1028 | ftab [ 0 .. 65536 ] destroyed |
michael@0 | 1029 | arr1 [0 .. nblock-1] holds sorted order |
michael@0 | 1030 | */ |
michael@0 | 1031 | void BZ2_blockSort ( EState* s ) |
michael@0 | 1032 | { |
michael@0 | 1033 | UInt32* ptr = s->ptr; |
michael@0 | 1034 | UChar* block = s->block; |
michael@0 | 1035 | UInt32* ftab = s->ftab; |
michael@0 | 1036 | Int32 nblock = s->nblock; |
michael@0 | 1037 | Int32 verb = s->verbosity; |
michael@0 | 1038 | Int32 wfact = s->workFactor; |
michael@0 | 1039 | UInt16* quadrant; |
michael@0 | 1040 | Int32 budget; |
michael@0 | 1041 | Int32 budgetInit; |
michael@0 | 1042 | Int32 i; |
michael@0 | 1043 | |
michael@0 | 1044 | if (nblock < 10000) { |
michael@0 | 1045 | fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); |
michael@0 | 1046 | } else { |
michael@0 | 1047 | /* Calculate the location for quadrant, remembering to get |
michael@0 | 1048 | the alignment right. Assumes that &(block[0]) is at least |
michael@0 | 1049 | 2-byte aligned -- this should be ok since block is really |
michael@0 | 1050 | the first section of arr2. |
michael@0 | 1051 | */ |
michael@0 | 1052 | i = nblock+BZ_N_OVERSHOOT; |
michael@0 | 1053 | if (i & 1) i++; |
michael@0 | 1054 | quadrant = (UInt16*)(&(block[i])); |
michael@0 | 1055 | |
michael@0 | 1056 | /* (wfact-1) / 3 puts the default-factor-30 |
michael@0 | 1057 | transition point at very roughly the same place as |
michael@0 | 1058 | with v0.1 and v0.9.0. |
michael@0 | 1059 | Not that it particularly matters any more, since the |
michael@0 | 1060 | resulting compressed stream is now the same regardless |
michael@0 | 1061 | of whether or not we use the main sort or fallback sort. |
michael@0 | 1062 | */ |
michael@0 | 1063 | if (wfact < 1 ) wfact = 1; |
michael@0 | 1064 | if (wfact > 100) wfact = 100; |
michael@0 | 1065 | budgetInit = nblock * ((wfact-1) / 3); |
michael@0 | 1066 | budget = budgetInit; |
michael@0 | 1067 | |
michael@0 | 1068 | mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); |
michael@0 | 1069 | if (verb >= 3) |
michael@0 | 1070 | VPrintf3 ( " %d work, %d block, ratio %5.2f\n", |
michael@0 | 1071 | budgetInit - budget, |
michael@0 | 1072 | nblock, |
michael@0 | 1073 | (float)(budgetInit - budget) / |
michael@0 | 1074 | (float)(nblock==0 ? 1 : nblock) ); |
michael@0 | 1075 | if (budget < 0) { |
michael@0 | 1076 | if (verb >= 2) |
michael@0 | 1077 | VPrintf0 ( " too repetitive; using fallback" |
michael@0 | 1078 | " sorting algorithm\n" ); |
michael@0 | 1079 | fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); |
michael@0 | 1080 | } |
michael@0 | 1081 | } |
michael@0 | 1082 | |
michael@0 | 1083 | s->origPtr = -1; |
michael@0 | 1084 | for (i = 0; i < s->nblock; i++) |
michael@0 | 1085 | if (ptr[i] == 0) |
michael@0 | 1086 | { s->origPtr = i; break; }; |
michael@0 | 1087 | |
michael@0 | 1088 | AssertH( s->origPtr != -1, 1003 ); |
michael@0 | 1089 | } |
michael@0 | 1090 | |
michael@0 | 1091 | |
michael@0 | 1092 | /*-------------------------------------------------------------*/ |
michael@0 | 1093 | /*--- end blocksort.c ---*/ |
michael@0 | 1094 | /*-------------------------------------------------------------*/ |