|
1 /* |
|
2 * jchuff.c |
|
3 * |
|
4 * This file was part of the Independent JPEG Group's software: |
|
5 * Copyright (C) 1991-1997, Thomas G. Lane. |
|
6 * libjpeg-turbo Modifications: |
|
7 * Copyright (C) 2009-2011, D. R. Commander. |
|
8 * For conditions of distribution and use, see the accompanying README file. |
|
9 * |
|
10 * This file contains Huffman entropy encoding routines. |
|
11 * |
|
12 * Much of the complexity here has to do with supporting output suspension. |
|
13 * If the data destination module demands suspension, we want to be able to |
|
14 * back up to the start of the current MCU. To do this, we copy state |
|
15 * variables into local working storage, and update them back to the |
|
16 * permanent JPEG objects only upon successful completion of an MCU. |
|
17 */ |
|
18 |
|
19 #define JPEG_INTERNALS |
|
20 #include "jinclude.h" |
|
21 #include "jpeglib.h" |
|
22 #include "jchuff.h" /* Declarations shared with jcphuff.c */ |
|
23 #include <limits.h> |
|
24 |
|
25 static const unsigned char jpeg_nbits_table[65536] = { |
|
26 /* Number i needs jpeg_nbits_table[i] bits to be represented. */ |
|
27 #include "jpeg_nbits_table.h" |
|
28 }; |
|
29 |
|
30 #ifndef min |
|
31 #define min(a,b) ((a)<(b)?(a):(b)) |
|
32 #endif |
|
33 |
|
34 |
|
35 /* Expanded entropy encoder object for Huffman encoding. |
|
36 * |
|
37 * The savable_state subrecord contains fields that change within an MCU, |
|
38 * but must not be updated permanently until we complete the MCU. |
|
39 */ |
|
40 |
|
41 typedef struct { |
|
42 size_t put_buffer; /* current bit-accumulation buffer */ |
|
43 int put_bits; /* # of bits now in it */ |
|
44 int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ |
|
45 } savable_state; |
|
46 |
|
47 /* This macro is to work around compilers with missing or broken |
|
48 * structure assignment. You'll need to fix this code if you have |
|
49 * such a compiler and you change MAX_COMPS_IN_SCAN. |
|
50 */ |
|
51 |
|
52 #ifndef NO_STRUCT_ASSIGN |
|
53 #define ASSIGN_STATE(dest,src) ((dest) = (src)) |
|
54 #else |
|
55 #if MAX_COMPS_IN_SCAN == 4 |
|
56 #define ASSIGN_STATE(dest,src) \ |
|
57 ((dest).put_buffer = (src).put_buffer, \ |
|
58 (dest).put_bits = (src).put_bits, \ |
|
59 (dest).last_dc_val[0] = (src).last_dc_val[0], \ |
|
60 (dest).last_dc_val[1] = (src).last_dc_val[1], \ |
|
61 (dest).last_dc_val[2] = (src).last_dc_val[2], \ |
|
62 (dest).last_dc_val[3] = (src).last_dc_val[3]) |
|
63 #endif |
|
64 #endif |
|
65 |
|
66 |
|
67 typedef struct { |
|
68 struct jpeg_entropy_encoder pub; /* public fields */ |
|
69 |
|
70 savable_state saved; /* Bit buffer & DC state at start of MCU */ |
|
71 |
|
72 /* These fields are NOT loaded into local working state. */ |
|
73 unsigned int restarts_to_go; /* MCUs left in this restart interval */ |
|
74 int next_restart_num; /* next restart number to write (0-7) */ |
|
75 |
|
76 /* Pointers to derived tables (these workspaces have image lifespan) */ |
|
77 c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; |
|
78 c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; |
|
79 |
|
80 #ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ |
|
81 long * dc_count_ptrs[NUM_HUFF_TBLS]; |
|
82 long * ac_count_ptrs[NUM_HUFF_TBLS]; |
|
83 #endif |
|
84 } huff_entropy_encoder; |
|
85 |
|
86 typedef huff_entropy_encoder * huff_entropy_ptr; |
|
87 |
|
88 /* Working state while writing an MCU. |
|
89 * This struct contains all the fields that are needed by subroutines. |
|
90 */ |
|
91 |
|
92 typedef struct { |
|
93 JOCTET * next_output_byte; /* => next byte to write in buffer */ |
|
94 size_t free_in_buffer; /* # of byte spaces remaining in buffer */ |
|
95 savable_state cur; /* Current bit buffer & DC state */ |
|
96 j_compress_ptr cinfo; /* dump_buffer needs access to this */ |
|
97 } working_state; |
|
98 |
|
99 |
|
100 /* Forward declarations */ |
|
101 METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo, |
|
102 JBLOCKROW *MCU_data)); |
|
103 METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo)); |
|
104 #ifdef ENTROPY_OPT_SUPPORTED |
|
105 METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo, |
|
106 JBLOCKROW *MCU_data)); |
|
107 METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo)); |
|
108 #endif |
|
109 |
|
110 |
|
111 /* |
|
112 * Initialize for a Huffman-compressed scan. |
|
113 * If gather_statistics is TRUE, we do not output anything during the scan, |
|
114 * just count the Huffman symbols used and generate Huffman code tables. |
|
115 */ |
|
116 |
|
117 METHODDEF(void) |
|
118 start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) |
|
119 { |
|
120 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
|
121 int ci, dctbl, actbl; |
|
122 jpeg_component_info * compptr; |
|
123 |
|
124 if (gather_statistics) { |
|
125 #ifdef ENTROPY_OPT_SUPPORTED |
|
126 entropy->pub.encode_mcu = encode_mcu_gather; |
|
127 entropy->pub.finish_pass = finish_pass_gather; |
|
128 #else |
|
129 ERREXIT(cinfo, JERR_NOT_COMPILED); |
|
130 #endif |
|
131 } else { |
|
132 entropy->pub.encode_mcu = encode_mcu_huff; |
|
133 entropy->pub.finish_pass = finish_pass_huff; |
|
134 } |
|
135 |
|
136 for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
|
137 compptr = cinfo->cur_comp_info[ci]; |
|
138 dctbl = compptr->dc_tbl_no; |
|
139 actbl = compptr->ac_tbl_no; |
|
140 if (gather_statistics) { |
|
141 #ifdef ENTROPY_OPT_SUPPORTED |
|
142 /* Check for invalid table indexes */ |
|
143 /* (make_c_derived_tbl does this in the other path) */ |
|
144 if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS) |
|
145 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); |
|
146 if (actbl < 0 || actbl >= NUM_HUFF_TBLS) |
|
147 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); |
|
148 /* Allocate and zero the statistics tables */ |
|
149 /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ |
|
150 if (entropy->dc_count_ptrs[dctbl] == NULL) |
|
151 entropy->dc_count_ptrs[dctbl] = (long *) |
|
152 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
|
153 257 * SIZEOF(long)); |
|
154 MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long)); |
|
155 if (entropy->ac_count_ptrs[actbl] == NULL) |
|
156 entropy->ac_count_ptrs[actbl] = (long *) |
|
157 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
|
158 257 * SIZEOF(long)); |
|
159 MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long)); |
|
160 #endif |
|
161 } else { |
|
162 /* Compute derived values for Huffman tables */ |
|
163 /* We may do this more than once for a table, but it's not expensive */ |
|
164 jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, |
|
165 & entropy->dc_derived_tbls[dctbl]); |
|
166 jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, |
|
167 & entropy->ac_derived_tbls[actbl]); |
|
168 } |
|
169 /* Initialize DC predictions to 0 */ |
|
170 entropy->saved.last_dc_val[ci] = 0; |
|
171 } |
|
172 |
|
173 /* Initialize bit buffer to empty */ |
|
174 entropy->saved.put_buffer = 0; |
|
175 entropy->saved.put_bits = 0; |
|
176 |
|
177 /* Initialize restart stuff */ |
|
178 entropy->restarts_to_go = cinfo->restart_interval; |
|
179 entropy->next_restart_num = 0; |
|
180 } |
|
181 |
|
182 |
|
183 /* |
|
184 * Compute the derived values for a Huffman table. |
|
185 * This routine also performs some validation checks on the table. |
|
186 * |
|
187 * Note this is also used by jcphuff.c. |
|
188 */ |
|
189 |
|
190 GLOBAL(void) |
|
191 jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, |
|
192 c_derived_tbl ** pdtbl) |
|
193 { |
|
194 JHUFF_TBL *htbl; |
|
195 c_derived_tbl *dtbl; |
|
196 int p, i, l, lastp, si, maxsymbol; |
|
197 char huffsize[257]; |
|
198 unsigned int huffcode[257]; |
|
199 unsigned int code; |
|
200 |
|
201 /* Note that huffsize[] and huffcode[] are filled in code-length order, |
|
202 * paralleling the order of the symbols themselves in htbl->huffval[]. |
|
203 */ |
|
204 |
|
205 /* Find the input Huffman table */ |
|
206 if (tblno < 0 || tblno >= NUM_HUFF_TBLS) |
|
207 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
|
208 htbl = |
|
209 isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; |
|
210 if (htbl == NULL) |
|
211 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
|
212 |
|
213 /* Allocate a workspace if we haven't already done so. */ |
|
214 if (*pdtbl == NULL) |
|
215 *pdtbl = (c_derived_tbl *) |
|
216 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
|
217 SIZEOF(c_derived_tbl)); |
|
218 dtbl = *pdtbl; |
|
219 |
|
220 /* Figure C.1: make table of Huffman code length for each symbol */ |
|
221 |
|
222 p = 0; |
|
223 for (l = 1; l <= 16; l++) { |
|
224 i = (int) htbl->bits[l]; |
|
225 if (i < 0 || p + i > 256) /* protect against table overrun */ |
|
226 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
|
227 while (i--) |
|
228 huffsize[p++] = (char) l; |
|
229 } |
|
230 huffsize[p] = 0; |
|
231 lastp = p; |
|
232 |
|
233 /* Figure C.2: generate the codes themselves */ |
|
234 /* We also validate that the counts represent a legal Huffman code tree. */ |
|
235 |
|
236 code = 0; |
|
237 si = huffsize[0]; |
|
238 p = 0; |
|
239 while (huffsize[p]) { |
|
240 while (((int) huffsize[p]) == si) { |
|
241 huffcode[p++] = code; |
|
242 code++; |
|
243 } |
|
244 /* code is now 1 more than the last code used for codelength si; but |
|
245 * it must still fit in si bits, since no code is allowed to be all ones. |
|
246 */ |
|
247 if (((INT32) code) >= (((INT32) 1) << si)) |
|
248 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
|
249 code <<= 1; |
|
250 si++; |
|
251 } |
|
252 |
|
253 /* Figure C.3: generate encoding tables */ |
|
254 /* These are code and size indexed by symbol value */ |
|
255 |
|
256 /* Set all codeless symbols to have code length 0; |
|
257 * this lets us detect duplicate VAL entries here, and later |
|
258 * allows emit_bits to detect any attempt to emit such symbols. |
|
259 */ |
|
260 MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi)); |
|
261 |
|
262 /* This is also a convenient place to check for out-of-range |
|
263 * and duplicated VAL entries. We allow 0..255 for AC symbols |
|
264 * but only 0..15 for DC. (We could constrain them further |
|
265 * based on data depth and mode, but this seems enough.) |
|
266 */ |
|
267 maxsymbol = isDC ? 15 : 255; |
|
268 |
|
269 for (p = 0; p < lastp; p++) { |
|
270 i = htbl->huffval[p]; |
|
271 if (i < 0 || i > maxsymbol || dtbl->ehufsi[i]) |
|
272 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
|
273 dtbl->ehufco[i] = huffcode[p]; |
|
274 dtbl->ehufsi[i] = huffsize[p]; |
|
275 } |
|
276 } |
|
277 |
|
278 |
|
279 /* Outputting bytes to the file */ |
|
280 |
|
281 /* Emit a byte, taking 'action' if must suspend. */ |
|
282 #define emit_byte(state,val,action) \ |
|
283 { *(state)->next_output_byte++ = (JOCTET) (val); \ |
|
284 if (--(state)->free_in_buffer == 0) \ |
|
285 if (! dump_buffer(state)) \ |
|
286 { action; } } |
|
287 |
|
288 |
|
289 LOCAL(boolean) |
|
290 dump_buffer (working_state * state) |
|
291 /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ |
|
292 { |
|
293 struct jpeg_destination_mgr * dest = state->cinfo->dest; |
|
294 |
|
295 if (! (*dest->empty_output_buffer) (state->cinfo)) |
|
296 return FALSE; |
|
297 /* After a successful buffer dump, must reset buffer pointers */ |
|
298 state->next_output_byte = dest->next_output_byte; |
|
299 state->free_in_buffer = dest->free_in_buffer; |
|
300 return TRUE; |
|
301 } |
|
302 |
|
303 |
|
304 /* Outputting bits to the file */ |
|
305 |
|
306 /* These macros perform the same task as the emit_bits() function in the |
|
307 * original libjpeg code. In addition to reducing overhead by explicitly |
|
308 * inlining the code, additional performance is achieved by taking into |
|
309 * account the size of the bit buffer and waiting until it is almost full |
|
310 * before emptying it. This mostly benefits 64-bit platforms, since 6 |
|
311 * bytes can be stored in a 64-bit bit buffer before it has to be emptied. |
|
312 */ |
|
313 |
|
314 #define EMIT_BYTE() { \ |
|
315 JOCTET c; \ |
|
316 put_bits -= 8; \ |
|
317 c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \ |
|
318 *buffer++ = c; \ |
|
319 if (c == 0xFF) /* need to stuff a zero byte? */ \ |
|
320 *buffer++ = 0; \ |
|
321 } |
|
322 |
|
323 #define PUT_BITS(code, size) { \ |
|
324 put_bits += size; \ |
|
325 put_buffer = (put_buffer << size) | code; \ |
|
326 } |
|
327 |
|
328 #define CHECKBUF15() { \ |
|
329 if (put_bits > 15) { \ |
|
330 EMIT_BYTE() \ |
|
331 EMIT_BYTE() \ |
|
332 } \ |
|
333 } |
|
334 |
|
335 #define CHECKBUF31() { \ |
|
336 if (put_bits > 31) { \ |
|
337 EMIT_BYTE() \ |
|
338 EMIT_BYTE() \ |
|
339 EMIT_BYTE() \ |
|
340 EMIT_BYTE() \ |
|
341 } \ |
|
342 } |
|
343 |
|
344 #define CHECKBUF47() { \ |
|
345 if (put_bits > 47) { \ |
|
346 EMIT_BYTE() \ |
|
347 EMIT_BYTE() \ |
|
348 EMIT_BYTE() \ |
|
349 EMIT_BYTE() \ |
|
350 EMIT_BYTE() \ |
|
351 EMIT_BYTE() \ |
|
352 } \ |
|
353 } |
|
354 |
|
355 #if __WORDSIZE==64 || defined(_WIN64) |
|
356 |
|
357 #define EMIT_BITS(code, size) { \ |
|
358 CHECKBUF47() \ |
|
359 PUT_BITS(code, size) \ |
|
360 } |
|
361 |
|
362 #define EMIT_CODE(code, size) { \ |
|
363 temp2 &= (((INT32) 1)<<nbits) - 1; \ |
|
364 CHECKBUF31() \ |
|
365 PUT_BITS(code, size) \ |
|
366 PUT_BITS(temp2, nbits) \ |
|
367 } |
|
368 |
|
369 #else |
|
370 |
|
371 #define EMIT_BITS(code, size) { \ |
|
372 PUT_BITS(code, size) \ |
|
373 CHECKBUF15() \ |
|
374 } |
|
375 |
|
376 #define EMIT_CODE(code, size) { \ |
|
377 temp2 &= (((INT32) 1)<<nbits) - 1; \ |
|
378 PUT_BITS(code, size) \ |
|
379 CHECKBUF15() \ |
|
380 PUT_BITS(temp2, nbits) \ |
|
381 CHECKBUF15() \ |
|
382 } |
|
383 |
|
384 #endif |
|
385 |
|
386 |
|
387 #define BUFSIZE (DCTSIZE2 * 2) |
|
388 |
|
389 #define LOAD_BUFFER() { \ |
|
390 if (state->free_in_buffer < BUFSIZE) { \ |
|
391 localbuf = 1; \ |
|
392 buffer = _buffer; \ |
|
393 } \ |
|
394 else buffer = state->next_output_byte; \ |
|
395 } |
|
396 |
|
397 #define STORE_BUFFER() { \ |
|
398 if (localbuf) { \ |
|
399 bytes = buffer - _buffer; \ |
|
400 buffer = _buffer; \ |
|
401 while (bytes > 0) { \ |
|
402 bytestocopy = min(bytes, state->free_in_buffer); \ |
|
403 MEMCOPY(state->next_output_byte, buffer, bytestocopy); \ |
|
404 state->next_output_byte += bytestocopy; \ |
|
405 buffer += bytestocopy; \ |
|
406 state->free_in_buffer -= bytestocopy; \ |
|
407 if (state->free_in_buffer == 0) \ |
|
408 if (! dump_buffer(state)) return FALSE; \ |
|
409 bytes -= bytestocopy; \ |
|
410 } \ |
|
411 } \ |
|
412 else { \ |
|
413 state->free_in_buffer -= (buffer - state->next_output_byte); \ |
|
414 state->next_output_byte = buffer; \ |
|
415 } \ |
|
416 } |
|
417 |
|
418 |
|
419 LOCAL(boolean) |
|
420 flush_bits (working_state * state) |
|
421 { |
|
422 JOCTET _buffer[BUFSIZE], *buffer; |
|
423 size_t put_buffer; int put_bits; |
|
424 size_t bytes, bytestocopy; int localbuf = 0; |
|
425 |
|
426 put_buffer = state->cur.put_buffer; |
|
427 put_bits = state->cur.put_bits; |
|
428 LOAD_BUFFER() |
|
429 |
|
430 /* fill any partial byte with ones */ |
|
431 PUT_BITS(0x7F, 7) |
|
432 while (put_bits >= 8) EMIT_BYTE() |
|
433 |
|
434 state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ |
|
435 state->cur.put_bits = 0; |
|
436 STORE_BUFFER() |
|
437 |
|
438 return TRUE; |
|
439 } |
|
440 |
|
441 |
|
442 /* Encode a single block's worth of coefficients */ |
|
443 |
|
444 LOCAL(boolean) |
|
445 encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val, |
|
446 c_derived_tbl *dctbl, c_derived_tbl *actbl) |
|
447 { |
|
448 int temp, temp2, temp3; |
|
449 int nbits; |
|
450 int r, code, size; |
|
451 JOCTET _buffer[BUFSIZE], *buffer; |
|
452 size_t put_buffer; int put_bits; |
|
453 int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0]; |
|
454 size_t bytes, bytestocopy; int localbuf = 0; |
|
455 |
|
456 put_buffer = state->cur.put_buffer; |
|
457 put_bits = state->cur.put_bits; |
|
458 LOAD_BUFFER() |
|
459 |
|
460 /* Encode the DC coefficient difference per section F.1.2.1 */ |
|
461 |
|
462 temp = temp2 = block[0] - last_dc_val; |
|
463 |
|
464 /* This is a well-known technique for obtaining the absolute value without a |
|
465 * branch. It is derived from an assembly language technique presented in |
|
466 * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by |
|
467 * Agner Fog. |
|
468 */ |
|
469 temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); |
|
470 temp ^= temp3; |
|
471 temp -= temp3; |
|
472 |
|
473 /* For a negative input, want temp2 = bitwise complement of abs(input) */ |
|
474 /* This code assumes we are on a two's complement machine */ |
|
475 temp2 += temp3; |
|
476 |
|
477 /* Find the number of bits needed for the magnitude of the coefficient */ |
|
478 nbits = jpeg_nbits_table[temp]; |
|
479 |
|
480 /* Emit the Huffman-coded symbol for the number of bits */ |
|
481 code = dctbl->ehufco[nbits]; |
|
482 size = dctbl->ehufsi[nbits]; |
|
483 PUT_BITS(code, size) |
|
484 CHECKBUF15() |
|
485 |
|
486 /* Mask off any extra bits in code */ |
|
487 temp2 &= (((INT32) 1)<<nbits) - 1; |
|
488 |
|
489 /* Emit that number of bits of the value, if positive, */ |
|
490 /* or the complement of its magnitude, if negative. */ |
|
491 PUT_BITS(temp2, nbits) |
|
492 CHECKBUF15() |
|
493 |
|
494 /* Encode the AC coefficients per section F.1.2.2 */ |
|
495 |
|
496 r = 0; /* r = run length of zeros */ |
|
497 |
|
498 /* Manually unroll the k loop to eliminate the counter variable. This |
|
499 * improves performance greatly on systems with a limited number of |
|
500 * registers (such as x86.) |
|
501 */ |
|
502 #define kloop(jpeg_natural_order_of_k) { \ |
|
503 if ((temp = block[jpeg_natural_order_of_k]) == 0) { \ |
|
504 r++; \ |
|
505 } else { \ |
|
506 temp2 = temp; \ |
|
507 /* Branch-less absolute value, bitwise complement, etc., same as above */ \ |
|
508 temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \ |
|
509 temp ^= temp3; \ |
|
510 temp -= temp3; \ |
|
511 temp2 += temp3; \ |
|
512 nbits = jpeg_nbits_table[temp]; \ |
|
513 /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ |
|
514 while (r > 15) { \ |
|
515 EMIT_BITS(code_0xf0, size_0xf0) \ |
|
516 r -= 16; \ |
|
517 } \ |
|
518 /* Emit Huffman symbol for run length / number of bits */ \ |
|
519 temp3 = (r << 4) + nbits; \ |
|
520 code = actbl->ehufco[temp3]; \ |
|
521 size = actbl->ehufsi[temp3]; \ |
|
522 EMIT_CODE(code, size) \ |
|
523 r = 0; \ |
|
524 } \ |
|
525 } |
|
526 |
|
527 /* One iteration for each value in jpeg_natural_order[] */ |
|
528 kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3); |
|
529 kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18); |
|
530 kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26); |
|
531 kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27); |
|
532 kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21); |
|
533 kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57); |
|
534 kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15); |
|
535 kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58); |
|
536 kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39); |
|
537 kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47); |
|
538 kloop(55); kloop(62); kloop(63); |
|
539 |
|
540 /* If the last coef(s) were zero, emit an end-of-block code */ |
|
541 if (r > 0) { |
|
542 code = actbl->ehufco[0]; |
|
543 size = actbl->ehufsi[0]; |
|
544 EMIT_BITS(code, size) |
|
545 } |
|
546 |
|
547 state->cur.put_buffer = put_buffer; |
|
548 state->cur.put_bits = put_bits; |
|
549 STORE_BUFFER() |
|
550 |
|
551 return TRUE; |
|
552 } |
|
553 |
|
554 |
|
555 /* |
|
556 * Emit a restart marker & resynchronize predictions. |
|
557 */ |
|
558 |
|
559 LOCAL(boolean) |
|
560 emit_restart (working_state * state, int restart_num) |
|
561 { |
|
562 int ci; |
|
563 |
|
564 if (! flush_bits(state)) |
|
565 return FALSE; |
|
566 |
|
567 emit_byte(state, 0xFF, return FALSE); |
|
568 emit_byte(state, JPEG_RST0 + restart_num, return FALSE); |
|
569 |
|
570 /* Re-initialize DC predictions to 0 */ |
|
571 for (ci = 0; ci < state->cinfo->comps_in_scan; ci++) |
|
572 state->cur.last_dc_val[ci] = 0; |
|
573 |
|
574 /* The restart counter is not updated until we successfully write the MCU. */ |
|
575 |
|
576 return TRUE; |
|
577 } |
|
578 |
|
579 |
|
580 /* |
|
581 * Encode and output one MCU's worth of Huffman-compressed coefficients. |
|
582 */ |
|
583 |
|
584 METHODDEF(boolean) |
|
585 encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
|
586 { |
|
587 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
|
588 working_state state; |
|
589 int blkn, ci; |
|
590 jpeg_component_info * compptr; |
|
591 |
|
592 /* Load up working state */ |
|
593 state.next_output_byte = cinfo->dest->next_output_byte; |
|
594 state.free_in_buffer = cinfo->dest->free_in_buffer; |
|
595 ASSIGN_STATE(state.cur, entropy->saved); |
|
596 state.cinfo = cinfo; |
|
597 |
|
598 /* Emit restart marker if needed */ |
|
599 if (cinfo->restart_interval) { |
|
600 if (entropy->restarts_to_go == 0) |
|
601 if (! emit_restart(&state, entropy->next_restart_num)) |
|
602 return FALSE; |
|
603 } |
|
604 |
|
605 /* Encode the MCU data blocks */ |
|
606 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
|
607 ci = cinfo->MCU_membership[blkn]; |
|
608 compptr = cinfo->cur_comp_info[ci]; |
|
609 if (! encode_one_block(&state, |
|
610 MCU_data[blkn][0], state.cur.last_dc_val[ci], |
|
611 entropy->dc_derived_tbls[compptr->dc_tbl_no], |
|
612 entropy->ac_derived_tbls[compptr->ac_tbl_no])) |
|
613 return FALSE; |
|
614 /* Update last_dc_val */ |
|
615 state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; |
|
616 } |
|
617 |
|
618 /* Completed MCU, so update state */ |
|
619 cinfo->dest->next_output_byte = state.next_output_byte; |
|
620 cinfo->dest->free_in_buffer = state.free_in_buffer; |
|
621 ASSIGN_STATE(entropy->saved, state.cur); |
|
622 |
|
623 /* Update restart-interval state too */ |
|
624 if (cinfo->restart_interval) { |
|
625 if (entropy->restarts_to_go == 0) { |
|
626 entropy->restarts_to_go = cinfo->restart_interval; |
|
627 entropy->next_restart_num++; |
|
628 entropy->next_restart_num &= 7; |
|
629 } |
|
630 entropy->restarts_to_go--; |
|
631 } |
|
632 |
|
633 return TRUE; |
|
634 } |
|
635 |
|
636 |
|
637 /* |
|
638 * Finish up at the end of a Huffman-compressed scan. |
|
639 */ |
|
640 |
|
641 METHODDEF(void) |
|
642 finish_pass_huff (j_compress_ptr cinfo) |
|
643 { |
|
644 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
|
645 working_state state; |
|
646 |
|
647 /* Load up working state ... flush_bits needs it */ |
|
648 state.next_output_byte = cinfo->dest->next_output_byte; |
|
649 state.free_in_buffer = cinfo->dest->free_in_buffer; |
|
650 ASSIGN_STATE(state.cur, entropy->saved); |
|
651 state.cinfo = cinfo; |
|
652 |
|
653 /* Flush out the last data */ |
|
654 if (! flush_bits(&state)) |
|
655 ERREXIT(cinfo, JERR_CANT_SUSPEND); |
|
656 |
|
657 /* Update state */ |
|
658 cinfo->dest->next_output_byte = state.next_output_byte; |
|
659 cinfo->dest->free_in_buffer = state.free_in_buffer; |
|
660 ASSIGN_STATE(entropy->saved, state.cur); |
|
661 } |
|
662 |
|
663 |
|
664 /* |
|
665 * Huffman coding optimization. |
|
666 * |
|
667 * We first scan the supplied data and count the number of uses of each symbol |
|
668 * that is to be Huffman-coded. (This process MUST agree with the code above.) |
|
669 * Then we build a Huffman coding tree for the observed counts. |
|
670 * Symbols which are not needed at all for the particular image are not |
|
671 * assigned any code, which saves space in the DHT marker as well as in |
|
672 * the compressed data. |
|
673 */ |
|
674 |
|
675 #ifdef ENTROPY_OPT_SUPPORTED |
|
676 |
|
677 |
|
678 /* Process a single block's worth of coefficients */ |
|
679 |
|
680 LOCAL(void) |
|
681 htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, |
|
682 long dc_counts[], long ac_counts[]) |
|
683 { |
|
684 register int temp; |
|
685 register int nbits; |
|
686 register int k, r; |
|
687 |
|
688 /* Encode the DC coefficient difference per section F.1.2.1 */ |
|
689 |
|
690 temp = block[0] - last_dc_val; |
|
691 if (temp < 0) |
|
692 temp = -temp; |
|
693 |
|
694 /* Find the number of bits needed for the magnitude of the coefficient */ |
|
695 nbits = 0; |
|
696 while (temp) { |
|
697 nbits++; |
|
698 temp >>= 1; |
|
699 } |
|
700 /* Check for out-of-range coefficient values. |
|
701 * Since we're encoding a difference, the range limit is twice as much. |
|
702 */ |
|
703 if (nbits > MAX_COEF_BITS+1) |
|
704 ERREXIT(cinfo, JERR_BAD_DCT_COEF); |
|
705 |
|
706 /* Count the Huffman symbol for the number of bits */ |
|
707 dc_counts[nbits]++; |
|
708 |
|
709 /* Encode the AC coefficients per section F.1.2.2 */ |
|
710 |
|
711 r = 0; /* r = run length of zeros */ |
|
712 |
|
713 for (k = 1; k < DCTSIZE2; k++) { |
|
714 if ((temp = block[jpeg_natural_order[k]]) == 0) { |
|
715 r++; |
|
716 } else { |
|
717 /* if run length > 15, must emit special run-length-16 codes (0xF0) */ |
|
718 while (r > 15) { |
|
719 ac_counts[0xF0]++; |
|
720 r -= 16; |
|
721 } |
|
722 |
|
723 /* Find the number of bits needed for the magnitude of the coefficient */ |
|
724 if (temp < 0) |
|
725 temp = -temp; |
|
726 |
|
727 /* Find the number of bits needed for the magnitude of the coefficient */ |
|
728 nbits = 1; /* there must be at least one 1 bit */ |
|
729 while ((temp >>= 1)) |
|
730 nbits++; |
|
731 /* Check for out-of-range coefficient values */ |
|
732 if (nbits > MAX_COEF_BITS) |
|
733 ERREXIT(cinfo, JERR_BAD_DCT_COEF); |
|
734 |
|
735 /* Count Huffman symbol for run length / number of bits */ |
|
736 ac_counts[(r << 4) + nbits]++; |
|
737 |
|
738 r = 0; |
|
739 } |
|
740 } |
|
741 |
|
742 /* If the last coef(s) were zero, emit an end-of-block code */ |
|
743 if (r > 0) |
|
744 ac_counts[0]++; |
|
745 } |
|
746 |
|
747 |
|
748 /* |
|
749 * Trial-encode one MCU's worth of Huffman-compressed coefficients. |
|
750 * No data is actually output, so no suspension return is possible. |
|
751 */ |
|
752 |
|
753 METHODDEF(boolean) |
|
754 encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
|
755 { |
|
756 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
|
757 int blkn, ci; |
|
758 jpeg_component_info * compptr; |
|
759 |
|
760 /* Take care of restart intervals if needed */ |
|
761 if (cinfo->restart_interval) { |
|
762 if (entropy->restarts_to_go == 0) { |
|
763 /* Re-initialize DC predictions to 0 */ |
|
764 for (ci = 0; ci < cinfo->comps_in_scan; ci++) |
|
765 entropy->saved.last_dc_val[ci] = 0; |
|
766 /* Update restart state */ |
|
767 entropy->restarts_to_go = cinfo->restart_interval; |
|
768 } |
|
769 entropy->restarts_to_go--; |
|
770 } |
|
771 |
|
772 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
|
773 ci = cinfo->MCU_membership[blkn]; |
|
774 compptr = cinfo->cur_comp_info[ci]; |
|
775 htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci], |
|
776 entropy->dc_count_ptrs[compptr->dc_tbl_no], |
|
777 entropy->ac_count_ptrs[compptr->ac_tbl_no]); |
|
778 entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0]; |
|
779 } |
|
780 |
|
781 return TRUE; |
|
782 } |
|
783 |
|
784 |
|
785 /* |
|
786 * Generate the best Huffman code table for the given counts, fill htbl. |
|
787 * Note this is also used by jcphuff.c. |
|
788 * |
|
789 * The JPEG standard requires that no symbol be assigned a codeword of all |
|
790 * one bits (so that padding bits added at the end of a compressed segment |
|
791 * can't look like a valid code). Because of the canonical ordering of |
|
792 * codewords, this just means that there must be an unused slot in the |
|
793 * longest codeword length category. Section K.2 of the JPEG spec suggests |
|
794 * reserving such a slot by pretending that symbol 256 is a valid symbol |
|
795 * with count 1. In theory that's not optimal; giving it count zero but |
|
796 * including it in the symbol set anyway should give a better Huffman code. |
|
797 * But the theoretically better code actually seems to come out worse in |
|
798 * practice, because it produces more all-ones bytes (which incur stuffed |
|
799 * zero bytes in the final file). In any case the difference is tiny. |
|
800 * |
|
801 * The JPEG standard requires Huffman codes to be no more than 16 bits long. |
|
802 * If some symbols have a very small but nonzero probability, the Huffman tree |
|
803 * must be adjusted to meet the code length restriction. We currently use |
|
804 * the adjustment method suggested in JPEG section K.2. This method is *not* |
|
805 * optimal; it may not choose the best possible limited-length code. But |
|
806 * typically only very-low-frequency symbols will be given less-than-optimal |
|
807 * lengths, so the code is almost optimal. Experimental comparisons against |
|
808 * an optimal limited-length-code algorithm indicate that the difference is |
|
809 * microscopic --- usually less than a hundredth of a percent of total size. |
|
810 * So the extra complexity of an optimal algorithm doesn't seem worthwhile. |
|
811 */ |
|
812 |
|
813 GLOBAL(void) |
|
814 jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]) |
|
815 { |
|
816 #define MAX_CLEN 32 /* assumed maximum initial code length */ |
|
817 UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ |
|
818 int codesize[257]; /* codesize[k] = code length of symbol k */ |
|
819 int others[257]; /* next symbol in current branch of tree */ |
|
820 int c1, c2; |
|
821 int p, i, j; |
|
822 long v; |
|
823 |
|
824 /* This algorithm is explained in section K.2 of the JPEG standard */ |
|
825 |
|
826 MEMZERO(bits, SIZEOF(bits)); |
|
827 MEMZERO(codesize, SIZEOF(codesize)); |
|
828 for (i = 0; i < 257; i++) |
|
829 others[i] = -1; /* init links to empty */ |
|
830 |
|
831 freq[256] = 1; /* make sure 256 has a nonzero count */ |
|
832 /* Including the pseudo-symbol 256 in the Huffman procedure guarantees |
|
833 * that no real symbol is given code-value of all ones, because 256 |
|
834 * will be placed last in the largest codeword category. |
|
835 */ |
|
836 |
|
837 /* Huffman's basic algorithm to assign optimal code lengths to symbols */ |
|
838 |
|
839 for (;;) { |
|
840 /* Find the smallest nonzero frequency, set c1 = its symbol */ |
|
841 /* In case of ties, take the larger symbol number */ |
|
842 c1 = -1; |
|
843 v = 1000000000L; |
|
844 for (i = 0; i <= 256; i++) { |
|
845 if (freq[i] && freq[i] <= v) { |
|
846 v = freq[i]; |
|
847 c1 = i; |
|
848 } |
|
849 } |
|
850 |
|
851 /* Find the next smallest nonzero frequency, set c2 = its symbol */ |
|
852 /* In case of ties, take the larger symbol number */ |
|
853 c2 = -1; |
|
854 v = 1000000000L; |
|
855 for (i = 0; i <= 256; i++) { |
|
856 if (freq[i] && freq[i] <= v && i != c1) { |
|
857 v = freq[i]; |
|
858 c2 = i; |
|
859 } |
|
860 } |
|
861 |
|
862 /* Done if we've merged everything into one frequency */ |
|
863 if (c2 < 0) |
|
864 break; |
|
865 |
|
866 /* Else merge the two counts/trees */ |
|
867 freq[c1] += freq[c2]; |
|
868 freq[c2] = 0; |
|
869 |
|
870 /* Increment the codesize of everything in c1's tree branch */ |
|
871 codesize[c1]++; |
|
872 while (others[c1] >= 0) { |
|
873 c1 = others[c1]; |
|
874 codesize[c1]++; |
|
875 } |
|
876 |
|
877 others[c1] = c2; /* chain c2 onto c1's tree branch */ |
|
878 |
|
879 /* Increment the codesize of everything in c2's tree branch */ |
|
880 codesize[c2]++; |
|
881 while (others[c2] >= 0) { |
|
882 c2 = others[c2]; |
|
883 codesize[c2]++; |
|
884 } |
|
885 } |
|
886 |
|
887 /* Now count the number of symbols of each code length */ |
|
888 for (i = 0; i <= 256; i++) { |
|
889 if (codesize[i]) { |
|
890 /* The JPEG standard seems to think that this can't happen, */ |
|
891 /* but I'm paranoid... */ |
|
892 if (codesize[i] > MAX_CLEN) |
|
893 ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); |
|
894 |
|
895 bits[codesize[i]]++; |
|
896 } |
|
897 } |
|
898 |
|
899 /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure |
|
900 * Huffman procedure assigned any such lengths, we must adjust the coding. |
|
901 * Here is what the JPEG spec says about how this next bit works: |
|
902 * Since symbols are paired for the longest Huffman code, the symbols are |
|
903 * removed from this length category two at a time. The prefix for the pair |
|
904 * (which is one bit shorter) is allocated to one of the pair; then, |
|
905 * skipping the BITS entry for that prefix length, a code word from the next |
|
906 * shortest nonzero BITS entry is converted into a prefix for two code words |
|
907 * one bit longer. |
|
908 */ |
|
909 |
|
910 for (i = MAX_CLEN; i > 16; i--) { |
|
911 while (bits[i] > 0) { |
|
912 j = i - 2; /* find length of new prefix to be used */ |
|
913 while (bits[j] == 0) |
|
914 j--; |
|
915 |
|
916 bits[i] -= 2; /* remove two symbols */ |
|
917 bits[i-1]++; /* one goes in this length */ |
|
918 bits[j+1] += 2; /* two new symbols in this length */ |
|
919 bits[j]--; /* symbol of this length is now a prefix */ |
|
920 } |
|
921 } |
|
922 |
|
923 /* Remove the count for the pseudo-symbol 256 from the largest codelength */ |
|
924 while (bits[i] == 0) /* find largest codelength still in use */ |
|
925 i--; |
|
926 bits[i]--; |
|
927 |
|
928 /* Return final symbol counts (only for lengths 0..16) */ |
|
929 MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits)); |
|
930 |
|
931 /* Return a list of the symbols sorted by code length */ |
|
932 /* It's not real clear to me why we don't need to consider the codelength |
|
933 * changes made above, but the JPEG spec seems to think this works. |
|
934 */ |
|
935 p = 0; |
|
936 for (i = 1; i <= MAX_CLEN; i++) { |
|
937 for (j = 0; j <= 255; j++) { |
|
938 if (codesize[j] == i) { |
|
939 htbl->huffval[p] = (UINT8) j; |
|
940 p++; |
|
941 } |
|
942 } |
|
943 } |
|
944 |
|
945 /* Set sent_table FALSE so updated table will be written to JPEG file. */ |
|
946 htbl->sent_table = FALSE; |
|
947 } |
|
948 |
|
949 |
|
950 /* |
|
951 * Finish up a statistics-gathering pass and create the new Huffman tables. |
|
952 */ |
|
953 |
|
954 METHODDEF(void) |
|
955 finish_pass_gather (j_compress_ptr cinfo) |
|
956 { |
|
957 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
|
958 int ci, dctbl, actbl; |
|
959 jpeg_component_info * compptr; |
|
960 JHUFF_TBL **htblptr; |
|
961 boolean did_dc[NUM_HUFF_TBLS]; |
|
962 boolean did_ac[NUM_HUFF_TBLS]; |
|
963 |
|
964 /* It's important not to apply jpeg_gen_optimal_table more than once |
|
965 * per table, because it clobbers the input frequency counts! |
|
966 */ |
|
967 MEMZERO(did_dc, SIZEOF(did_dc)); |
|
968 MEMZERO(did_ac, SIZEOF(did_ac)); |
|
969 |
|
970 for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
|
971 compptr = cinfo->cur_comp_info[ci]; |
|
972 dctbl = compptr->dc_tbl_no; |
|
973 actbl = compptr->ac_tbl_no; |
|
974 if (! did_dc[dctbl]) { |
|
975 htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl]; |
|
976 if (*htblptr == NULL) |
|
977 *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
|
978 jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]); |
|
979 did_dc[dctbl] = TRUE; |
|
980 } |
|
981 if (! did_ac[actbl]) { |
|
982 htblptr = & cinfo->ac_huff_tbl_ptrs[actbl]; |
|
983 if (*htblptr == NULL) |
|
984 *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
|
985 jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]); |
|
986 did_ac[actbl] = TRUE; |
|
987 } |
|
988 } |
|
989 } |
|
990 |
|
991 |
|
992 #endif /* ENTROPY_OPT_SUPPORTED */ |
|
993 |
|
994 |
|
995 /* |
|
996 * Module initialization routine for Huffman entropy encoding. |
|
997 */ |
|
998 |
|
999 GLOBAL(void) |
|
1000 jinit_huff_encoder (j_compress_ptr cinfo) |
|
1001 { |
|
1002 huff_entropy_ptr entropy; |
|
1003 int i; |
|
1004 |
|
1005 entropy = (huff_entropy_ptr) |
|
1006 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
|
1007 SIZEOF(huff_entropy_encoder)); |
|
1008 cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; |
|
1009 entropy->pub.start_pass = start_pass_huff; |
|
1010 |
|
1011 /* Mark tables unallocated */ |
|
1012 for (i = 0; i < NUM_HUFF_TBLS; i++) { |
|
1013 entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL; |
|
1014 #ifdef ENTROPY_OPT_SUPPORTED |
|
1015 entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL; |
|
1016 #endif |
|
1017 } |
|
1018 } |