|
1 /* |
|
2 * jsimd_i386.c |
|
3 * |
|
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
|
5 * Copyright 2009-2011 D. R. Commander |
|
6 * |
|
7 * Based on the x86 SIMD extension for IJG JPEG library, |
|
8 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
|
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
|
10 * |
|
11 * This file contains the interface between the "normal" portions |
|
12 * of the library and the SIMD implementations when running on a |
|
13 * 32-bit x86 architecture. |
|
14 */ |
|
15 |
|
16 #define JPEG_INTERNALS |
|
17 #include "../jinclude.h" |
|
18 #include "../jpeglib.h" |
|
19 #include "../jsimd.h" |
|
20 #include "../jdct.h" |
|
21 #include "../jsimddct.h" |
|
22 #include "jsimd.h" |
|
23 |
|
24 /* |
|
25 * In the PIC cases, we have no guarantee that constants will keep |
|
26 * their alignment. This macro allows us to verify it at runtime. |
|
27 */ |
|
28 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) |
|
29 |
|
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
|
31 |
|
32 static unsigned int simd_support = ~0; |
|
33 |
|
34 /* |
|
35 * Check what SIMD accelerations are supported. |
|
36 * |
|
37 * FIXME: This code is racy under a multi-threaded environment. |
|
38 */ |
|
39 LOCAL(void) |
|
40 init_simd (void) |
|
41 { |
|
42 char *env = NULL; |
|
43 |
|
44 if (simd_support != ~0U) |
|
45 return; |
|
46 |
|
47 simd_support = jpeg_simd_cpu_support(); |
|
48 |
|
49 /* Force different settings through environment variables */ |
|
50 env = getenv("JSIMD_FORCEMMX"); |
|
51 if ((env != NULL) && (strcmp(env, "1") == 0)) |
|
52 simd_support &= JSIMD_MMX; |
|
53 env = getenv("JSIMD_FORCE3DNOW"); |
|
54 if ((env != NULL) && (strcmp(env, "1") == 0)) |
|
55 simd_support &= JSIMD_3DNOW|JSIMD_MMX; |
|
56 env = getenv("JSIMD_FORCESSE"); |
|
57 if ((env != NULL) && (strcmp(env, "1") == 0)) |
|
58 simd_support &= JSIMD_SSE|JSIMD_MMX; |
|
59 env = getenv("JSIMD_FORCESSE2"); |
|
60 if ((env != NULL) && (strcmp(env, "1") == 0)) |
|
61 simd_support &= JSIMD_SSE2; |
|
62 } |
|
63 |
|
64 GLOBAL(int) |
|
65 jsimd_can_rgb_ycc (void) |
|
66 { |
|
67 init_simd(); |
|
68 |
|
69 /* The code is optimised for these values only */ |
|
70 if (BITS_IN_JSAMPLE != 8) |
|
71 return 0; |
|
72 if (sizeof(JDIMENSION) != 4) |
|
73 return 0; |
|
74 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
|
75 return 0; |
|
76 |
|
77 if ((simd_support & JSIMD_SSE2) && |
|
78 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
|
79 return 1; |
|
80 if (simd_support & JSIMD_MMX) |
|
81 return 1; |
|
82 |
|
83 return 0; |
|
84 } |
|
85 |
|
86 GLOBAL(int) |
|
87 jsimd_can_rgb_gray (void) |
|
88 { |
|
89 init_simd(); |
|
90 |
|
91 /* The code is optimised for these values only */ |
|
92 if (BITS_IN_JSAMPLE != 8) |
|
93 return 0; |
|
94 if (sizeof(JDIMENSION) != 4) |
|
95 return 0; |
|
96 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
|
97 return 0; |
|
98 |
|
99 if ((simd_support & JSIMD_SSE2) && |
|
100 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
|
101 return 1; |
|
102 if (simd_support & JSIMD_MMX) |
|
103 return 1; |
|
104 |
|
105 return 0; |
|
106 } |
|
107 |
|
108 GLOBAL(int) |
|
109 jsimd_can_ycc_rgb (void) |
|
110 { |
|
111 init_simd(); |
|
112 |
|
113 /* The code is optimised for these values only */ |
|
114 if (BITS_IN_JSAMPLE != 8) |
|
115 return 0; |
|
116 if (sizeof(JDIMENSION) != 4) |
|
117 return 0; |
|
118 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
|
119 return 0; |
|
120 |
|
121 if ((simd_support & JSIMD_SSE2) && |
|
122 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
|
123 return 1; |
|
124 if (simd_support & JSIMD_MMX) |
|
125 return 1; |
|
126 |
|
127 return 0; |
|
128 } |
|
129 |
|
130 GLOBAL(void) |
|
131 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
|
132 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
|
133 JDIMENSION output_row, int num_rows) |
|
134 { |
|
135 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
136 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
137 |
|
138 switch(cinfo->in_color_space) |
|
139 { |
|
140 case JCS_EXT_RGB: |
|
141 sse2fct=jsimd_extrgb_ycc_convert_sse2; |
|
142 mmxfct=jsimd_extrgb_ycc_convert_mmx; |
|
143 break; |
|
144 case JCS_EXT_RGBX: |
|
145 case JCS_EXT_RGBA: |
|
146 sse2fct=jsimd_extrgbx_ycc_convert_sse2; |
|
147 mmxfct=jsimd_extrgbx_ycc_convert_mmx; |
|
148 break; |
|
149 case JCS_EXT_BGR: |
|
150 sse2fct=jsimd_extbgr_ycc_convert_sse2; |
|
151 mmxfct=jsimd_extbgr_ycc_convert_mmx; |
|
152 break; |
|
153 case JCS_EXT_BGRX: |
|
154 case JCS_EXT_BGRA: |
|
155 sse2fct=jsimd_extbgrx_ycc_convert_sse2; |
|
156 mmxfct=jsimd_extbgrx_ycc_convert_mmx; |
|
157 break; |
|
158 case JCS_EXT_XBGR: |
|
159 case JCS_EXT_ABGR: |
|
160 sse2fct=jsimd_extxbgr_ycc_convert_sse2; |
|
161 mmxfct=jsimd_extxbgr_ycc_convert_mmx; |
|
162 break; |
|
163 case JCS_EXT_XRGB: |
|
164 case JCS_EXT_ARGB: |
|
165 sse2fct=jsimd_extxrgb_ycc_convert_sse2; |
|
166 mmxfct=jsimd_extxrgb_ycc_convert_mmx; |
|
167 break; |
|
168 default: |
|
169 sse2fct=jsimd_rgb_ycc_convert_sse2; |
|
170 mmxfct=jsimd_rgb_ycc_convert_mmx; |
|
171 break; |
|
172 } |
|
173 |
|
174 if ((simd_support & JSIMD_SSE2) && |
|
175 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
|
176 sse2fct(cinfo->image_width, input_buf, |
|
177 output_buf, output_row, num_rows); |
|
178 else if (simd_support & JSIMD_MMX) |
|
179 mmxfct(cinfo->image_width, input_buf, |
|
180 output_buf, output_row, num_rows); |
|
181 } |
|
182 |
|
183 GLOBAL(void) |
|
184 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
|
185 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
|
186 JDIMENSION output_row, int num_rows) |
|
187 { |
|
188 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
189 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
190 |
|
191 switch(cinfo->in_color_space) |
|
192 { |
|
193 case JCS_EXT_RGB: |
|
194 sse2fct=jsimd_extrgb_gray_convert_sse2; |
|
195 mmxfct=jsimd_extrgb_gray_convert_mmx; |
|
196 break; |
|
197 case JCS_EXT_RGBX: |
|
198 case JCS_EXT_RGBA: |
|
199 sse2fct=jsimd_extrgbx_gray_convert_sse2; |
|
200 mmxfct=jsimd_extrgbx_gray_convert_mmx; |
|
201 break; |
|
202 case JCS_EXT_BGR: |
|
203 sse2fct=jsimd_extbgr_gray_convert_sse2; |
|
204 mmxfct=jsimd_extbgr_gray_convert_mmx; |
|
205 break; |
|
206 case JCS_EXT_BGRX: |
|
207 case JCS_EXT_BGRA: |
|
208 sse2fct=jsimd_extbgrx_gray_convert_sse2; |
|
209 mmxfct=jsimd_extbgrx_gray_convert_mmx; |
|
210 break; |
|
211 case JCS_EXT_XBGR: |
|
212 case JCS_EXT_ABGR: |
|
213 sse2fct=jsimd_extxbgr_gray_convert_sse2; |
|
214 mmxfct=jsimd_extxbgr_gray_convert_mmx; |
|
215 break; |
|
216 case JCS_EXT_XRGB: |
|
217 case JCS_EXT_ARGB: |
|
218 sse2fct=jsimd_extxrgb_gray_convert_sse2; |
|
219 mmxfct=jsimd_extxrgb_gray_convert_mmx; |
|
220 break; |
|
221 default: |
|
222 sse2fct=jsimd_rgb_gray_convert_sse2; |
|
223 mmxfct=jsimd_rgb_gray_convert_mmx; |
|
224 break; |
|
225 } |
|
226 |
|
227 if ((simd_support & JSIMD_SSE2) && |
|
228 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
|
229 sse2fct(cinfo->image_width, input_buf, |
|
230 output_buf, output_row, num_rows); |
|
231 else if (simd_support & JSIMD_MMX) |
|
232 mmxfct(cinfo->image_width, input_buf, |
|
233 output_buf, output_row, num_rows); |
|
234 } |
|
235 |
|
236 GLOBAL(void) |
|
237 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
|
238 JSAMPIMAGE input_buf, JDIMENSION input_row, |
|
239 JSAMPARRAY output_buf, int num_rows) |
|
240 { |
|
241 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
|
242 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
|
243 |
|
244 switch(cinfo->out_color_space) |
|
245 { |
|
246 case JCS_EXT_RGB: |
|
247 sse2fct=jsimd_ycc_extrgb_convert_sse2; |
|
248 mmxfct=jsimd_ycc_extrgb_convert_mmx; |
|
249 break; |
|
250 case JCS_EXT_RGBX: |
|
251 case JCS_EXT_RGBA: |
|
252 sse2fct=jsimd_ycc_extrgbx_convert_sse2; |
|
253 mmxfct=jsimd_ycc_extrgbx_convert_mmx; |
|
254 break; |
|
255 case JCS_EXT_BGR: |
|
256 sse2fct=jsimd_ycc_extbgr_convert_sse2; |
|
257 mmxfct=jsimd_ycc_extbgr_convert_mmx; |
|
258 break; |
|
259 case JCS_EXT_BGRX: |
|
260 case JCS_EXT_BGRA: |
|
261 sse2fct=jsimd_ycc_extbgrx_convert_sse2; |
|
262 mmxfct=jsimd_ycc_extbgrx_convert_mmx; |
|
263 break; |
|
264 case JCS_EXT_XBGR: |
|
265 case JCS_EXT_ABGR: |
|
266 sse2fct=jsimd_ycc_extxbgr_convert_sse2; |
|
267 mmxfct=jsimd_ycc_extxbgr_convert_mmx; |
|
268 break; |
|
269 case JCS_EXT_XRGB: |
|
270 case JCS_EXT_ARGB: |
|
271 sse2fct=jsimd_ycc_extxrgb_convert_sse2; |
|
272 mmxfct=jsimd_ycc_extxrgb_convert_mmx; |
|
273 break; |
|
274 default: |
|
275 sse2fct=jsimd_ycc_rgb_convert_sse2; |
|
276 mmxfct=jsimd_ycc_rgb_convert_mmx; |
|
277 break; |
|
278 } |
|
279 |
|
280 if ((simd_support & JSIMD_SSE2) && |
|
281 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
|
282 sse2fct(cinfo->output_width, input_buf, |
|
283 input_row, output_buf, num_rows); |
|
284 else if (simd_support & JSIMD_MMX) |
|
285 mmxfct(cinfo->output_width, input_buf, |
|
286 input_row, output_buf, num_rows); |
|
287 } |
|
288 |
|
289 GLOBAL(int) |
|
290 jsimd_can_h2v2_downsample (void) |
|
291 { |
|
292 init_simd(); |
|
293 |
|
294 /* The code is optimised for these values only */ |
|
295 if (BITS_IN_JSAMPLE != 8) |
|
296 return 0; |
|
297 if (sizeof(JDIMENSION) != 4) |
|
298 return 0; |
|
299 |
|
300 if (simd_support & JSIMD_SSE2) |
|
301 return 1; |
|
302 if (simd_support & JSIMD_MMX) |
|
303 return 1; |
|
304 |
|
305 return 0; |
|
306 } |
|
307 |
|
308 GLOBAL(int) |
|
309 jsimd_can_h2v1_downsample (void) |
|
310 { |
|
311 init_simd(); |
|
312 |
|
313 /* The code is optimised for these values only */ |
|
314 if (BITS_IN_JSAMPLE != 8) |
|
315 return 0; |
|
316 if (sizeof(JDIMENSION) != 4) |
|
317 return 0; |
|
318 |
|
319 if (simd_support & JSIMD_SSE2) |
|
320 return 1; |
|
321 if (simd_support & JSIMD_MMX) |
|
322 return 1; |
|
323 |
|
324 return 0; |
|
325 } |
|
326 |
|
327 GLOBAL(void) |
|
328 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
|
329 JSAMPARRAY input_data, JSAMPARRAY output_data) |
|
330 { |
|
331 if (simd_support & JSIMD_SSE2) |
|
332 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
|
333 compptr->v_samp_factor, compptr->width_in_blocks, |
|
334 input_data, output_data); |
|
335 else if (simd_support & JSIMD_MMX) |
|
336 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
|
337 compptr->v_samp_factor, compptr->width_in_blocks, |
|
338 input_data, output_data); |
|
339 } |
|
340 |
|
341 GLOBAL(void) |
|
342 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
|
343 JSAMPARRAY input_data, JSAMPARRAY output_data) |
|
344 { |
|
345 if (simd_support & JSIMD_SSE2) |
|
346 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
|
347 compptr->v_samp_factor, compptr->width_in_blocks, |
|
348 input_data, output_data); |
|
349 else if (simd_support & JSIMD_MMX) |
|
350 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
|
351 compptr->v_samp_factor, compptr->width_in_blocks, |
|
352 input_data, output_data); |
|
353 } |
|
354 |
|
355 GLOBAL(int) |
|
356 jsimd_can_h2v2_upsample (void) |
|
357 { |
|
358 init_simd(); |
|
359 |
|
360 /* The code is optimised for these values only */ |
|
361 if (BITS_IN_JSAMPLE != 8) |
|
362 return 0; |
|
363 if (sizeof(JDIMENSION) != 4) |
|
364 return 0; |
|
365 |
|
366 if (simd_support & JSIMD_SSE2) |
|
367 return 1; |
|
368 if (simd_support & JSIMD_MMX) |
|
369 return 1; |
|
370 |
|
371 return 0; |
|
372 } |
|
373 |
|
374 GLOBAL(int) |
|
375 jsimd_can_h2v1_upsample (void) |
|
376 { |
|
377 init_simd(); |
|
378 |
|
379 /* The code is optimised for these values only */ |
|
380 if (BITS_IN_JSAMPLE != 8) |
|
381 return 0; |
|
382 if (sizeof(JDIMENSION) != 4) |
|
383 return 0; |
|
384 |
|
385 if (simd_support & JSIMD_SSE2) |
|
386 return 1; |
|
387 if (simd_support & JSIMD_MMX) |
|
388 return 1; |
|
389 |
|
390 return 0; |
|
391 } |
|
392 |
|
393 GLOBAL(void) |
|
394 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
|
395 jpeg_component_info * compptr, |
|
396 JSAMPARRAY input_data, |
|
397 JSAMPARRAY * output_data_ptr) |
|
398 { |
|
399 if (simd_support & JSIMD_SSE2) |
|
400 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, |
|
401 cinfo->output_width, input_data, output_data_ptr); |
|
402 else if (simd_support & JSIMD_MMX) |
|
403 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, |
|
404 cinfo->output_width, input_data, output_data_ptr); |
|
405 } |
|
406 |
|
407 GLOBAL(void) |
|
408 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
|
409 jpeg_component_info * compptr, |
|
410 JSAMPARRAY input_data, |
|
411 JSAMPARRAY * output_data_ptr) |
|
412 { |
|
413 if (simd_support & JSIMD_SSE2) |
|
414 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, |
|
415 cinfo->output_width, input_data, output_data_ptr); |
|
416 else if (simd_support & JSIMD_MMX) |
|
417 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, |
|
418 cinfo->output_width, input_data, output_data_ptr); |
|
419 } |
|
420 |
|
421 GLOBAL(int) |
|
422 jsimd_can_h2v2_fancy_upsample (void) |
|
423 { |
|
424 init_simd(); |
|
425 |
|
426 /* The code is optimised for these values only */ |
|
427 if (BITS_IN_JSAMPLE != 8) |
|
428 return 0; |
|
429 if (sizeof(JDIMENSION) != 4) |
|
430 return 0; |
|
431 |
|
432 if ((simd_support & JSIMD_SSE2) && |
|
433 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
|
434 return 1; |
|
435 if (simd_support & JSIMD_MMX) |
|
436 return 1; |
|
437 |
|
438 return 0; |
|
439 } |
|
440 |
|
441 GLOBAL(int) |
|
442 jsimd_can_h2v1_fancy_upsample (void) |
|
443 { |
|
444 init_simd(); |
|
445 |
|
446 /* The code is optimised for these values only */ |
|
447 if (BITS_IN_JSAMPLE != 8) |
|
448 return 0; |
|
449 if (sizeof(JDIMENSION) != 4) |
|
450 return 0; |
|
451 |
|
452 if ((simd_support & JSIMD_SSE2) && |
|
453 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
|
454 return 1; |
|
455 if (simd_support & JSIMD_MMX) |
|
456 return 1; |
|
457 |
|
458 return 0; |
|
459 } |
|
460 |
|
461 GLOBAL(void) |
|
462 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
|
463 jpeg_component_info * compptr, |
|
464 JSAMPARRAY input_data, |
|
465 JSAMPARRAY * output_data_ptr) |
|
466 { |
|
467 if ((simd_support & JSIMD_SSE2) && |
|
468 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
|
469 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
|
470 compptr->downsampled_width, input_data, output_data_ptr); |
|
471 else if (simd_support & JSIMD_MMX) |
|
472 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
|
473 compptr->downsampled_width, input_data, output_data_ptr); |
|
474 } |
|
475 |
|
476 GLOBAL(void) |
|
477 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
|
478 jpeg_component_info * compptr, |
|
479 JSAMPARRAY input_data, |
|
480 JSAMPARRAY * output_data_ptr) |
|
481 { |
|
482 if ((simd_support & JSIMD_SSE2) && |
|
483 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
|
484 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
|
485 compptr->downsampled_width, input_data, output_data_ptr); |
|
486 else if (simd_support & JSIMD_MMX) |
|
487 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
|
488 compptr->downsampled_width, input_data, output_data_ptr); |
|
489 } |
|
490 |
|
491 GLOBAL(int) |
|
492 jsimd_can_h2v2_merged_upsample (void) |
|
493 { |
|
494 init_simd(); |
|
495 |
|
496 /* The code is optimised for these values only */ |
|
497 if (BITS_IN_JSAMPLE != 8) |
|
498 return 0; |
|
499 if (sizeof(JDIMENSION) != 4) |
|
500 return 0; |
|
501 |
|
502 if ((simd_support & JSIMD_SSE2) && |
|
503 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
|
504 return 1; |
|
505 if (simd_support & JSIMD_MMX) |
|
506 return 1; |
|
507 |
|
508 return 0; |
|
509 } |
|
510 |
|
511 GLOBAL(int) |
|
512 jsimd_can_h2v1_merged_upsample (void) |
|
513 { |
|
514 init_simd(); |
|
515 |
|
516 /* The code is optimised for these values only */ |
|
517 if (BITS_IN_JSAMPLE != 8) |
|
518 return 0; |
|
519 if (sizeof(JDIMENSION) != 4) |
|
520 return 0; |
|
521 |
|
522 if ((simd_support & JSIMD_SSE2) && |
|
523 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
|
524 return 1; |
|
525 if (simd_support & JSIMD_MMX) |
|
526 return 1; |
|
527 |
|
528 return 0; |
|
529 } |
|
530 |
|
531 GLOBAL(void) |
|
532 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
|
533 JSAMPIMAGE input_buf, |
|
534 JDIMENSION in_row_group_ctr, |
|
535 JSAMPARRAY output_buf) |
|
536 { |
|
537 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
538 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
539 |
|
540 switch(cinfo->out_color_space) |
|
541 { |
|
542 case JCS_EXT_RGB: |
|
543 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; |
|
544 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; |
|
545 break; |
|
546 case JCS_EXT_RGBX: |
|
547 case JCS_EXT_RGBA: |
|
548 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; |
|
549 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; |
|
550 break; |
|
551 case JCS_EXT_BGR: |
|
552 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; |
|
553 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; |
|
554 break; |
|
555 case JCS_EXT_BGRX: |
|
556 case JCS_EXT_BGRA: |
|
557 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; |
|
558 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; |
|
559 break; |
|
560 case JCS_EXT_XBGR: |
|
561 case JCS_EXT_ABGR: |
|
562 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; |
|
563 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; |
|
564 break; |
|
565 case JCS_EXT_XRGB: |
|
566 case JCS_EXT_ARGB: |
|
567 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; |
|
568 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; |
|
569 break; |
|
570 default: |
|
571 sse2fct=jsimd_h2v2_merged_upsample_sse2; |
|
572 mmxfct=jsimd_h2v2_merged_upsample_mmx; |
|
573 break; |
|
574 } |
|
575 |
|
576 if ((simd_support & JSIMD_SSE2) && |
|
577 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
|
578 sse2fct(cinfo->output_width, input_buf, |
|
579 in_row_group_ctr, output_buf); |
|
580 else if (simd_support & JSIMD_MMX) |
|
581 mmxfct(cinfo->output_width, input_buf, |
|
582 in_row_group_ctr, output_buf); |
|
583 } |
|
584 |
|
585 GLOBAL(void) |
|
586 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
|
587 JSAMPIMAGE input_buf, |
|
588 JDIMENSION in_row_group_ctr, |
|
589 JSAMPARRAY output_buf) |
|
590 { |
|
591 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
592 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
593 |
|
594 switch(cinfo->out_color_space) |
|
595 { |
|
596 case JCS_EXT_RGB: |
|
597 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; |
|
598 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; |
|
599 break; |
|
600 case JCS_EXT_RGBX: |
|
601 case JCS_EXT_RGBA: |
|
602 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; |
|
603 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; |
|
604 break; |
|
605 case JCS_EXT_BGR: |
|
606 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; |
|
607 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; |
|
608 break; |
|
609 case JCS_EXT_BGRX: |
|
610 case JCS_EXT_BGRA: |
|
611 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; |
|
612 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; |
|
613 break; |
|
614 case JCS_EXT_XBGR: |
|
615 case JCS_EXT_ABGR: |
|
616 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; |
|
617 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; |
|
618 break; |
|
619 case JCS_EXT_XRGB: |
|
620 case JCS_EXT_ARGB: |
|
621 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; |
|
622 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; |
|
623 break; |
|
624 default: |
|
625 sse2fct=jsimd_h2v1_merged_upsample_sse2; |
|
626 mmxfct=jsimd_h2v1_merged_upsample_mmx; |
|
627 break; |
|
628 } |
|
629 |
|
630 if ((simd_support & JSIMD_SSE2) && |
|
631 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
|
632 sse2fct(cinfo->output_width, input_buf, |
|
633 in_row_group_ctr, output_buf); |
|
634 else if (simd_support & JSIMD_MMX) |
|
635 mmxfct(cinfo->output_width, input_buf, |
|
636 in_row_group_ctr, output_buf); |
|
637 } |
|
638 |
|
639 GLOBAL(int) |
|
640 jsimd_can_convsamp (void) |
|
641 { |
|
642 init_simd(); |
|
643 |
|
644 /* The code is optimised for these values only */ |
|
645 if (DCTSIZE != 8) |
|
646 return 0; |
|
647 if (BITS_IN_JSAMPLE != 8) |
|
648 return 0; |
|
649 if (sizeof(JDIMENSION) != 4) |
|
650 return 0; |
|
651 if (sizeof(DCTELEM) != 2) |
|
652 return 0; |
|
653 |
|
654 if (simd_support & JSIMD_SSE2) |
|
655 return 1; |
|
656 if (simd_support & JSIMD_MMX) |
|
657 return 1; |
|
658 |
|
659 return 0; |
|
660 } |
|
661 |
|
662 GLOBAL(int) |
|
663 jsimd_can_convsamp_float (void) |
|
664 { |
|
665 init_simd(); |
|
666 |
|
667 /* The code is optimised for these values only */ |
|
668 if (DCTSIZE != 8) |
|
669 return 0; |
|
670 if (BITS_IN_JSAMPLE != 8) |
|
671 return 0; |
|
672 if (sizeof(JDIMENSION) != 4) |
|
673 return 0; |
|
674 if (sizeof(FAST_FLOAT) != 4) |
|
675 return 0; |
|
676 |
|
677 if (simd_support & JSIMD_SSE2) |
|
678 return 1; |
|
679 if (simd_support & JSIMD_SSE) |
|
680 return 1; |
|
681 if (simd_support & JSIMD_3DNOW) |
|
682 return 1; |
|
683 |
|
684 return 0; |
|
685 } |
|
686 |
|
687 GLOBAL(void) |
|
688 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
|
689 DCTELEM * workspace) |
|
690 { |
|
691 if (simd_support & JSIMD_SSE2) |
|
692 jsimd_convsamp_sse2(sample_data, start_col, workspace); |
|
693 else if (simd_support & JSIMD_MMX) |
|
694 jsimd_convsamp_mmx(sample_data, start_col, workspace); |
|
695 } |
|
696 |
|
697 GLOBAL(void) |
|
698 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
|
699 FAST_FLOAT * workspace) |
|
700 { |
|
701 if (simd_support & JSIMD_SSE2) |
|
702 jsimd_convsamp_float_sse2(sample_data, start_col, workspace); |
|
703 else if (simd_support & JSIMD_SSE) |
|
704 jsimd_convsamp_float_sse(sample_data, start_col, workspace); |
|
705 else if (simd_support & JSIMD_3DNOW) |
|
706 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); |
|
707 } |
|
708 |
|
709 GLOBAL(int) |
|
710 jsimd_can_fdct_islow (void) |
|
711 { |
|
712 init_simd(); |
|
713 |
|
714 /* The code is optimised for these values only */ |
|
715 if (DCTSIZE != 8) |
|
716 return 0; |
|
717 if (sizeof(DCTELEM) != 2) |
|
718 return 0; |
|
719 |
|
720 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
|
721 return 1; |
|
722 if (simd_support & JSIMD_MMX) |
|
723 return 1; |
|
724 |
|
725 return 0; |
|
726 } |
|
727 |
|
728 GLOBAL(int) |
|
729 jsimd_can_fdct_ifast (void) |
|
730 { |
|
731 init_simd(); |
|
732 |
|
733 /* The code is optimised for these values only */ |
|
734 if (DCTSIZE != 8) |
|
735 return 0; |
|
736 if (sizeof(DCTELEM) != 2) |
|
737 return 0; |
|
738 |
|
739 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) |
|
740 return 1; |
|
741 if (simd_support & JSIMD_MMX) |
|
742 return 1; |
|
743 |
|
744 return 0; |
|
745 } |
|
746 |
|
747 GLOBAL(int) |
|
748 jsimd_can_fdct_float (void) |
|
749 { |
|
750 init_simd(); |
|
751 |
|
752 /* The code is optimised for these values only */ |
|
753 if (DCTSIZE != 8) |
|
754 return 0; |
|
755 if (sizeof(FAST_FLOAT) != 4) |
|
756 return 0; |
|
757 |
|
758 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
|
759 return 1; |
|
760 if (simd_support & JSIMD_3DNOW) |
|
761 return 1; |
|
762 |
|
763 return 0; |
|
764 } |
|
765 |
|
766 GLOBAL(void) |
|
767 jsimd_fdct_islow (DCTELEM * data) |
|
768 { |
|
769 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
|
770 jsimd_fdct_islow_sse2(data); |
|
771 else if (simd_support & JSIMD_MMX) |
|
772 jsimd_fdct_islow_mmx(data); |
|
773 } |
|
774 |
|
775 GLOBAL(void) |
|
776 jsimd_fdct_ifast (DCTELEM * data) |
|
777 { |
|
778 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
|
779 jsimd_fdct_ifast_sse2(data); |
|
780 else if (simd_support & JSIMD_MMX) |
|
781 jsimd_fdct_ifast_mmx(data); |
|
782 } |
|
783 |
|
784 GLOBAL(void) |
|
785 jsimd_fdct_float (FAST_FLOAT * data) |
|
786 { |
|
787 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
|
788 jsimd_fdct_float_sse(data); |
|
789 else if (simd_support & JSIMD_3DNOW) |
|
790 jsimd_fdct_float_3dnow(data); |
|
791 } |
|
792 |
|
793 GLOBAL(int) |
|
794 jsimd_can_quantize (void) |
|
795 { |
|
796 init_simd(); |
|
797 |
|
798 /* The code is optimised for these values only */ |
|
799 if (DCTSIZE != 8) |
|
800 return 0; |
|
801 if (sizeof(JCOEF) != 2) |
|
802 return 0; |
|
803 if (sizeof(DCTELEM) != 2) |
|
804 return 0; |
|
805 |
|
806 if (simd_support & JSIMD_SSE2) |
|
807 return 1; |
|
808 if (simd_support & JSIMD_MMX) |
|
809 return 1; |
|
810 |
|
811 return 0; |
|
812 } |
|
813 |
|
814 GLOBAL(int) |
|
815 jsimd_can_quantize_float (void) |
|
816 { |
|
817 init_simd(); |
|
818 |
|
819 /* The code is optimised for these values only */ |
|
820 if (DCTSIZE != 8) |
|
821 return 0; |
|
822 if (sizeof(JCOEF) != 2) |
|
823 return 0; |
|
824 if (sizeof(FAST_FLOAT) != 4) |
|
825 return 0; |
|
826 |
|
827 if (simd_support & JSIMD_SSE2) |
|
828 return 1; |
|
829 if (simd_support & JSIMD_SSE) |
|
830 return 1; |
|
831 if (simd_support & JSIMD_3DNOW) |
|
832 return 1; |
|
833 |
|
834 return 0; |
|
835 } |
|
836 |
|
837 GLOBAL(void) |
|
838 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, |
|
839 DCTELEM * workspace) |
|
840 { |
|
841 if (simd_support & JSIMD_SSE2) |
|
842 jsimd_quantize_sse2(coef_block, divisors, workspace); |
|
843 else if (simd_support & JSIMD_MMX) |
|
844 jsimd_quantize_mmx(coef_block, divisors, workspace); |
|
845 } |
|
846 |
|
847 GLOBAL(void) |
|
848 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
|
849 FAST_FLOAT * workspace) |
|
850 { |
|
851 if (simd_support & JSIMD_SSE2) |
|
852 jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
|
853 else if (simd_support & JSIMD_SSE) |
|
854 jsimd_quantize_float_sse(coef_block, divisors, workspace); |
|
855 else if (simd_support & JSIMD_3DNOW) |
|
856 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); |
|
857 } |
|
858 |
|
859 GLOBAL(int) |
|
860 jsimd_can_idct_2x2 (void) |
|
861 { |
|
862 init_simd(); |
|
863 |
|
864 /* The code is optimised for these values only */ |
|
865 if (DCTSIZE != 8) |
|
866 return 0; |
|
867 if (sizeof(JCOEF) != 2) |
|
868 return 0; |
|
869 if (BITS_IN_JSAMPLE != 8) |
|
870 return 0; |
|
871 if (sizeof(JDIMENSION) != 4) |
|
872 return 0; |
|
873 if (sizeof(ISLOW_MULT_TYPE) != 2) |
|
874 return 0; |
|
875 |
|
876 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
|
877 return 1; |
|
878 if (simd_support & JSIMD_MMX) |
|
879 return 1; |
|
880 |
|
881 return 0; |
|
882 } |
|
883 |
|
884 GLOBAL(int) |
|
885 jsimd_can_idct_4x4 (void) |
|
886 { |
|
887 init_simd(); |
|
888 |
|
889 /* The code is optimised for these values only */ |
|
890 if (DCTSIZE != 8) |
|
891 return 0; |
|
892 if (sizeof(JCOEF) != 2) |
|
893 return 0; |
|
894 if (BITS_IN_JSAMPLE != 8) |
|
895 return 0; |
|
896 if (sizeof(JDIMENSION) != 4) |
|
897 return 0; |
|
898 if (sizeof(ISLOW_MULT_TYPE) != 2) |
|
899 return 0; |
|
900 |
|
901 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
|
902 return 1; |
|
903 if (simd_support & JSIMD_MMX) |
|
904 return 1; |
|
905 |
|
906 return 0; |
|
907 } |
|
908 |
|
909 GLOBAL(void) |
|
910 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
911 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
912 JDIMENSION output_col) |
|
913 { |
|
914 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
|
915 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
|
916 else if (simd_support & JSIMD_MMX) |
|
917 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
|
918 } |
|
919 |
|
920 GLOBAL(void) |
|
921 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
922 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
923 JDIMENSION output_col) |
|
924 { |
|
925 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
|
926 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
|
927 else if (simd_support & JSIMD_MMX) |
|
928 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
|
929 } |
|
930 |
|
931 GLOBAL(int) |
|
932 jsimd_can_idct_islow (void) |
|
933 { |
|
934 init_simd(); |
|
935 |
|
936 /* The code is optimised for these values only */ |
|
937 if (DCTSIZE != 8) |
|
938 return 0; |
|
939 if (sizeof(JCOEF) != 2) |
|
940 return 0; |
|
941 if (BITS_IN_JSAMPLE != 8) |
|
942 return 0; |
|
943 if (sizeof(JDIMENSION) != 4) |
|
944 return 0; |
|
945 if (sizeof(ISLOW_MULT_TYPE) != 2) |
|
946 return 0; |
|
947 |
|
948 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
|
949 return 1; |
|
950 if (simd_support & JSIMD_MMX) |
|
951 return 1; |
|
952 |
|
953 return 0; |
|
954 } |
|
955 |
|
956 GLOBAL(int) |
|
957 jsimd_can_idct_ifast (void) |
|
958 { |
|
959 init_simd(); |
|
960 |
|
961 /* The code is optimised for these values only */ |
|
962 if (DCTSIZE != 8) |
|
963 return 0; |
|
964 if (sizeof(JCOEF) != 2) |
|
965 return 0; |
|
966 if (BITS_IN_JSAMPLE != 8) |
|
967 return 0; |
|
968 if (sizeof(JDIMENSION) != 4) |
|
969 return 0; |
|
970 if (sizeof(IFAST_MULT_TYPE) != 2) |
|
971 return 0; |
|
972 if (IFAST_SCALE_BITS != 2) |
|
973 return 0; |
|
974 |
|
975 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
|
976 return 1; |
|
977 if (simd_support & JSIMD_MMX) |
|
978 return 1; |
|
979 |
|
980 return 0; |
|
981 } |
|
982 |
|
983 GLOBAL(int) |
|
984 jsimd_can_idct_float (void) |
|
985 { |
|
986 init_simd(); |
|
987 |
|
988 if (DCTSIZE != 8) |
|
989 return 0; |
|
990 if (sizeof(JCOEF) != 2) |
|
991 return 0; |
|
992 if (BITS_IN_JSAMPLE != 8) |
|
993 return 0; |
|
994 if (sizeof(JDIMENSION) != 4) |
|
995 return 0; |
|
996 if (sizeof(FAST_FLOAT) != 4) |
|
997 return 0; |
|
998 if (sizeof(FLOAT_MULT_TYPE) != 4) |
|
999 return 0; |
|
1000 |
|
1001 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
|
1002 return 1; |
|
1003 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
|
1004 return 1; |
|
1005 if (simd_support & JSIMD_3DNOW) |
|
1006 return 1; |
|
1007 |
|
1008 return 0; |
|
1009 } |
|
1010 |
|
1011 GLOBAL(void) |
|
1012 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1013 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
1014 JDIMENSION output_col) |
|
1015 { |
|
1016 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
|
1017 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
|
1018 else if (simd_support & JSIMD_MMX) |
|
1019 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
|
1020 } |
|
1021 |
|
1022 GLOBAL(void) |
|
1023 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1024 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
1025 JDIMENSION output_col) |
|
1026 { |
|
1027 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
|
1028 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
|
1029 else if (simd_support & JSIMD_MMX) |
|
1030 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
|
1031 } |
|
1032 |
|
1033 GLOBAL(void) |
|
1034 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
|
1035 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
1036 JDIMENSION output_col) |
|
1037 { |
|
1038 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
|
1039 jsimd_idct_float_sse2(compptr->dct_table, coef_block, |
|
1040 output_buf, output_col); |
|
1041 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
|
1042 jsimd_idct_float_sse(compptr->dct_table, coef_block, |
|
1043 output_buf, output_col); |
|
1044 else if (simd_support & JSIMD_3DNOW) |
|
1045 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, |
|
1046 output_buf, output_col); |
|
1047 } |
|
1048 |