Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /*
2 * jsimd_i386.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * 32-bit x86 architecture.
14 */
16 #define JPEG_INTERNALS
17 #include "../jinclude.h"
18 #include "../jpeglib.h"
19 #include "../jsimd.h"
20 #include "../jdct.h"
21 #include "../jsimddct.h"
22 #include "jsimd.h"
24 /*
25 * In the PIC cases, we have no guarantee that constants will keep
26 * their alignment. This macro allows us to verify it at runtime.
27 */
28 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 static unsigned int simd_support = ~0;
34 /*
35 * Check what SIMD accelerations are supported.
36 *
37 * FIXME: This code is racy under a multi-threaded environment.
38 */
39 LOCAL(void)
40 init_simd (void)
41 {
42 char *env = NULL;
44 if (simd_support != ~0U)
45 return;
47 simd_support = jpeg_simd_cpu_support();
49 /* Force different settings through environment variables */
50 env = getenv("JSIMD_FORCEMMX");
51 if ((env != NULL) && (strcmp(env, "1") == 0))
52 simd_support &= JSIMD_MMX;
53 env = getenv("JSIMD_FORCE3DNOW");
54 if ((env != NULL) && (strcmp(env, "1") == 0))
55 simd_support &= JSIMD_3DNOW|JSIMD_MMX;
56 env = getenv("JSIMD_FORCESSE");
57 if ((env != NULL) && (strcmp(env, "1") == 0))
58 simd_support &= JSIMD_SSE|JSIMD_MMX;
59 env = getenv("JSIMD_FORCESSE2");
60 if ((env != NULL) && (strcmp(env, "1") == 0))
61 simd_support &= JSIMD_SSE2;
62 }
64 GLOBAL(int)
65 jsimd_can_rgb_ycc (void)
66 {
67 init_simd();
69 /* The code is optimised for these values only */
70 if (BITS_IN_JSAMPLE != 8)
71 return 0;
72 if (sizeof(JDIMENSION) != 4)
73 return 0;
74 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
75 return 0;
77 if ((simd_support & JSIMD_SSE2) &&
78 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
79 return 1;
80 if (simd_support & JSIMD_MMX)
81 return 1;
83 return 0;
84 }
86 GLOBAL(int)
87 jsimd_can_rgb_gray (void)
88 {
89 init_simd();
91 /* The code is optimised for these values only */
92 if (BITS_IN_JSAMPLE != 8)
93 return 0;
94 if (sizeof(JDIMENSION) != 4)
95 return 0;
96 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
97 return 0;
99 if ((simd_support & JSIMD_SSE2) &&
100 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
101 return 1;
102 if (simd_support & JSIMD_MMX)
103 return 1;
105 return 0;
106 }
108 GLOBAL(int)
109 jsimd_can_ycc_rgb (void)
110 {
111 init_simd();
113 /* The code is optimised for these values only */
114 if (BITS_IN_JSAMPLE != 8)
115 return 0;
116 if (sizeof(JDIMENSION) != 4)
117 return 0;
118 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
119 return 0;
121 if ((simd_support & JSIMD_SSE2) &&
122 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
123 return 1;
124 if (simd_support & JSIMD_MMX)
125 return 1;
127 return 0;
128 }
130 GLOBAL(void)
131 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
132 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
133 JDIMENSION output_row, int num_rows)
134 {
135 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
136 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
138 switch(cinfo->in_color_space)
139 {
140 case JCS_EXT_RGB:
141 sse2fct=jsimd_extrgb_ycc_convert_sse2;
142 mmxfct=jsimd_extrgb_ycc_convert_mmx;
143 break;
144 case JCS_EXT_RGBX:
145 case JCS_EXT_RGBA:
146 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
147 mmxfct=jsimd_extrgbx_ycc_convert_mmx;
148 break;
149 case JCS_EXT_BGR:
150 sse2fct=jsimd_extbgr_ycc_convert_sse2;
151 mmxfct=jsimd_extbgr_ycc_convert_mmx;
152 break;
153 case JCS_EXT_BGRX:
154 case JCS_EXT_BGRA:
155 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
156 mmxfct=jsimd_extbgrx_ycc_convert_mmx;
157 break;
158 case JCS_EXT_XBGR:
159 case JCS_EXT_ABGR:
160 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
161 mmxfct=jsimd_extxbgr_ycc_convert_mmx;
162 break;
163 case JCS_EXT_XRGB:
164 case JCS_EXT_ARGB:
165 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
166 mmxfct=jsimd_extxrgb_ycc_convert_mmx;
167 break;
168 default:
169 sse2fct=jsimd_rgb_ycc_convert_sse2;
170 mmxfct=jsimd_rgb_ycc_convert_mmx;
171 break;
172 }
174 if ((simd_support & JSIMD_SSE2) &&
175 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
176 sse2fct(cinfo->image_width, input_buf,
177 output_buf, output_row, num_rows);
178 else if (simd_support & JSIMD_MMX)
179 mmxfct(cinfo->image_width, input_buf,
180 output_buf, output_row, num_rows);
181 }
183 GLOBAL(void)
184 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
185 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
186 JDIMENSION output_row, int num_rows)
187 {
188 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
189 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
191 switch(cinfo->in_color_space)
192 {
193 case JCS_EXT_RGB:
194 sse2fct=jsimd_extrgb_gray_convert_sse2;
195 mmxfct=jsimd_extrgb_gray_convert_mmx;
196 break;
197 case JCS_EXT_RGBX:
198 case JCS_EXT_RGBA:
199 sse2fct=jsimd_extrgbx_gray_convert_sse2;
200 mmxfct=jsimd_extrgbx_gray_convert_mmx;
201 break;
202 case JCS_EXT_BGR:
203 sse2fct=jsimd_extbgr_gray_convert_sse2;
204 mmxfct=jsimd_extbgr_gray_convert_mmx;
205 break;
206 case JCS_EXT_BGRX:
207 case JCS_EXT_BGRA:
208 sse2fct=jsimd_extbgrx_gray_convert_sse2;
209 mmxfct=jsimd_extbgrx_gray_convert_mmx;
210 break;
211 case JCS_EXT_XBGR:
212 case JCS_EXT_ABGR:
213 sse2fct=jsimd_extxbgr_gray_convert_sse2;
214 mmxfct=jsimd_extxbgr_gray_convert_mmx;
215 break;
216 case JCS_EXT_XRGB:
217 case JCS_EXT_ARGB:
218 sse2fct=jsimd_extxrgb_gray_convert_sse2;
219 mmxfct=jsimd_extxrgb_gray_convert_mmx;
220 break;
221 default:
222 sse2fct=jsimd_rgb_gray_convert_sse2;
223 mmxfct=jsimd_rgb_gray_convert_mmx;
224 break;
225 }
227 if ((simd_support & JSIMD_SSE2) &&
228 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
229 sse2fct(cinfo->image_width, input_buf,
230 output_buf, output_row, num_rows);
231 else if (simd_support & JSIMD_MMX)
232 mmxfct(cinfo->image_width, input_buf,
233 output_buf, output_row, num_rows);
234 }
236 GLOBAL(void)
237 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
238 JSAMPIMAGE input_buf, JDIMENSION input_row,
239 JSAMPARRAY output_buf, int num_rows)
240 {
241 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
242 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
244 switch(cinfo->out_color_space)
245 {
246 case JCS_EXT_RGB:
247 sse2fct=jsimd_ycc_extrgb_convert_sse2;
248 mmxfct=jsimd_ycc_extrgb_convert_mmx;
249 break;
250 case JCS_EXT_RGBX:
251 case JCS_EXT_RGBA:
252 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
253 mmxfct=jsimd_ycc_extrgbx_convert_mmx;
254 break;
255 case JCS_EXT_BGR:
256 sse2fct=jsimd_ycc_extbgr_convert_sse2;
257 mmxfct=jsimd_ycc_extbgr_convert_mmx;
258 break;
259 case JCS_EXT_BGRX:
260 case JCS_EXT_BGRA:
261 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
262 mmxfct=jsimd_ycc_extbgrx_convert_mmx;
263 break;
264 case JCS_EXT_XBGR:
265 case JCS_EXT_ABGR:
266 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
267 mmxfct=jsimd_ycc_extxbgr_convert_mmx;
268 break;
269 case JCS_EXT_XRGB:
270 case JCS_EXT_ARGB:
271 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
272 mmxfct=jsimd_ycc_extxrgb_convert_mmx;
273 break;
274 default:
275 sse2fct=jsimd_ycc_rgb_convert_sse2;
276 mmxfct=jsimd_ycc_rgb_convert_mmx;
277 break;
278 }
280 if ((simd_support & JSIMD_SSE2) &&
281 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
282 sse2fct(cinfo->output_width, input_buf,
283 input_row, output_buf, num_rows);
284 else if (simd_support & JSIMD_MMX)
285 mmxfct(cinfo->output_width, input_buf,
286 input_row, output_buf, num_rows);
287 }
289 GLOBAL(int)
290 jsimd_can_h2v2_downsample (void)
291 {
292 init_simd();
294 /* The code is optimised for these values only */
295 if (BITS_IN_JSAMPLE != 8)
296 return 0;
297 if (sizeof(JDIMENSION) != 4)
298 return 0;
300 if (simd_support & JSIMD_SSE2)
301 return 1;
302 if (simd_support & JSIMD_MMX)
303 return 1;
305 return 0;
306 }
308 GLOBAL(int)
309 jsimd_can_h2v1_downsample (void)
310 {
311 init_simd();
313 /* The code is optimised for these values only */
314 if (BITS_IN_JSAMPLE != 8)
315 return 0;
316 if (sizeof(JDIMENSION) != 4)
317 return 0;
319 if (simd_support & JSIMD_SSE2)
320 return 1;
321 if (simd_support & JSIMD_MMX)
322 return 1;
324 return 0;
325 }
327 GLOBAL(void)
328 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
329 JSAMPARRAY input_data, JSAMPARRAY output_data)
330 {
331 if (simd_support & JSIMD_SSE2)
332 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
333 compptr->v_samp_factor, compptr->width_in_blocks,
334 input_data, output_data);
335 else if (simd_support & JSIMD_MMX)
336 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
337 compptr->v_samp_factor, compptr->width_in_blocks,
338 input_data, output_data);
339 }
341 GLOBAL(void)
342 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
343 JSAMPARRAY input_data, JSAMPARRAY output_data)
344 {
345 if (simd_support & JSIMD_SSE2)
346 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
347 compptr->v_samp_factor, compptr->width_in_blocks,
348 input_data, output_data);
349 else if (simd_support & JSIMD_MMX)
350 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
351 compptr->v_samp_factor, compptr->width_in_blocks,
352 input_data, output_data);
353 }
355 GLOBAL(int)
356 jsimd_can_h2v2_upsample (void)
357 {
358 init_simd();
360 /* The code is optimised for these values only */
361 if (BITS_IN_JSAMPLE != 8)
362 return 0;
363 if (sizeof(JDIMENSION) != 4)
364 return 0;
366 if (simd_support & JSIMD_SSE2)
367 return 1;
368 if (simd_support & JSIMD_MMX)
369 return 1;
371 return 0;
372 }
374 GLOBAL(int)
375 jsimd_can_h2v1_upsample (void)
376 {
377 init_simd();
379 /* The code is optimised for these values only */
380 if (BITS_IN_JSAMPLE != 8)
381 return 0;
382 if (sizeof(JDIMENSION) != 4)
383 return 0;
385 if (simd_support & JSIMD_SSE2)
386 return 1;
387 if (simd_support & JSIMD_MMX)
388 return 1;
390 return 0;
391 }
393 GLOBAL(void)
394 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
395 jpeg_component_info * compptr,
396 JSAMPARRAY input_data,
397 JSAMPARRAY * output_data_ptr)
398 {
399 if (simd_support & JSIMD_SSE2)
400 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
401 cinfo->output_width, input_data, output_data_ptr);
402 else if (simd_support & JSIMD_MMX)
403 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
404 cinfo->output_width, input_data, output_data_ptr);
405 }
407 GLOBAL(void)
408 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
409 jpeg_component_info * compptr,
410 JSAMPARRAY input_data,
411 JSAMPARRAY * output_data_ptr)
412 {
413 if (simd_support & JSIMD_SSE2)
414 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
415 cinfo->output_width, input_data, output_data_ptr);
416 else if (simd_support & JSIMD_MMX)
417 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
418 cinfo->output_width, input_data, output_data_ptr);
419 }
421 GLOBAL(int)
422 jsimd_can_h2v2_fancy_upsample (void)
423 {
424 init_simd();
426 /* The code is optimised for these values only */
427 if (BITS_IN_JSAMPLE != 8)
428 return 0;
429 if (sizeof(JDIMENSION) != 4)
430 return 0;
432 if ((simd_support & JSIMD_SSE2) &&
433 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
434 return 1;
435 if (simd_support & JSIMD_MMX)
436 return 1;
438 return 0;
439 }
441 GLOBAL(int)
442 jsimd_can_h2v1_fancy_upsample (void)
443 {
444 init_simd();
446 /* The code is optimised for these values only */
447 if (BITS_IN_JSAMPLE != 8)
448 return 0;
449 if (sizeof(JDIMENSION) != 4)
450 return 0;
452 if ((simd_support & JSIMD_SSE2) &&
453 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
454 return 1;
455 if (simd_support & JSIMD_MMX)
456 return 1;
458 return 0;
459 }
461 GLOBAL(void)
462 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
463 jpeg_component_info * compptr,
464 JSAMPARRAY input_data,
465 JSAMPARRAY * output_data_ptr)
466 {
467 if ((simd_support & JSIMD_SSE2) &&
468 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
469 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
470 compptr->downsampled_width, input_data, output_data_ptr);
471 else if (simd_support & JSIMD_MMX)
472 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
473 compptr->downsampled_width, input_data, output_data_ptr);
474 }
476 GLOBAL(void)
477 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
478 jpeg_component_info * compptr,
479 JSAMPARRAY input_data,
480 JSAMPARRAY * output_data_ptr)
481 {
482 if ((simd_support & JSIMD_SSE2) &&
483 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
484 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
485 compptr->downsampled_width, input_data, output_data_ptr);
486 else if (simd_support & JSIMD_MMX)
487 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
488 compptr->downsampled_width, input_data, output_data_ptr);
489 }
491 GLOBAL(int)
492 jsimd_can_h2v2_merged_upsample (void)
493 {
494 init_simd();
496 /* The code is optimised for these values only */
497 if (BITS_IN_JSAMPLE != 8)
498 return 0;
499 if (sizeof(JDIMENSION) != 4)
500 return 0;
502 if ((simd_support & JSIMD_SSE2) &&
503 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
504 return 1;
505 if (simd_support & JSIMD_MMX)
506 return 1;
508 return 0;
509 }
511 GLOBAL(int)
512 jsimd_can_h2v1_merged_upsample (void)
513 {
514 init_simd();
516 /* The code is optimised for these values only */
517 if (BITS_IN_JSAMPLE != 8)
518 return 0;
519 if (sizeof(JDIMENSION) != 4)
520 return 0;
522 if ((simd_support & JSIMD_SSE2) &&
523 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
524 return 1;
525 if (simd_support & JSIMD_MMX)
526 return 1;
528 return 0;
529 }
531 GLOBAL(void)
532 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
533 JSAMPIMAGE input_buf,
534 JDIMENSION in_row_group_ctr,
535 JSAMPARRAY output_buf)
536 {
537 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
538 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
540 switch(cinfo->out_color_space)
541 {
542 case JCS_EXT_RGB:
543 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
544 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
545 break;
546 case JCS_EXT_RGBX:
547 case JCS_EXT_RGBA:
548 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
549 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
550 break;
551 case JCS_EXT_BGR:
552 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
553 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
554 break;
555 case JCS_EXT_BGRX:
556 case JCS_EXT_BGRA:
557 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
558 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
559 break;
560 case JCS_EXT_XBGR:
561 case JCS_EXT_ABGR:
562 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
563 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
564 break;
565 case JCS_EXT_XRGB:
566 case JCS_EXT_ARGB:
567 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
568 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
569 break;
570 default:
571 sse2fct=jsimd_h2v2_merged_upsample_sse2;
572 mmxfct=jsimd_h2v2_merged_upsample_mmx;
573 break;
574 }
576 if ((simd_support & JSIMD_SSE2) &&
577 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
578 sse2fct(cinfo->output_width, input_buf,
579 in_row_group_ctr, output_buf);
580 else if (simd_support & JSIMD_MMX)
581 mmxfct(cinfo->output_width, input_buf,
582 in_row_group_ctr, output_buf);
583 }
585 GLOBAL(void)
586 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
587 JSAMPIMAGE input_buf,
588 JDIMENSION in_row_group_ctr,
589 JSAMPARRAY output_buf)
590 {
591 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
592 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
594 switch(cinfo->out_color_space)
595 {
596 case JCS_EXT_RGB:
597 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
598 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
599 break;
600 case JCS_EXT_RGBX:
601 case JCS_EXT_RGBA:
602 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
603 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
604 break;
605 case JCS_EXT_BGR:
606 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
607 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
608 break;
609 case JCS_EXT_BGRX:
610 case JCS_EXT_BGRA:
611 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
612 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
613 break;
614 case JCS_EXT_XBGR:
615 case JCS_EXT_ABGR:
616 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
617 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
618 break;
619 case JCS_EXT_XRGB:
620 case JCS_EXT_ARGB:
621 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
622 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
623 break;
624 default:
625 sse2fct=jsimd_h2v1_merged_upsample_sse2;
626 mmxfct=jsimd_h2v1_merged_upsample_mmx;
627 break;
628 }
630 if ((simd_support & JSIMD_SSE2) &&
631 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
632 sse2fct(cinfo->output_width, input_buf,
633 in_row_group_ctr, output_buf);
634 else if (simd_support & JSIMD_MMX)
635 mmxfct(cinfo->output_width, input_buf,
636 in_row_group_ctr, output_buf);
637 }
639 GLOBAL(int)
640 jsimd_can_convsamp (void)
641 {
642 init_simd();
644 /* The code is optimised for these values only */
645 if (DCTSIZE != 8)
646 return 0;
647 if (BITS_IN_JSAMPLE != 8)
648 return 0;
649 if (sizeof(JDIMENSION) != 4)
650 return 0;
651 if (sizeof(DCTELEM) != 2)
652 return 0;
654 if (simd_support & JSIMD_SSE2)
655 return 1;
656 if (simd_support & JSIMD_MMX)
657 return 1;
659 return 0;
660 }
662 GLOBAL(int)
663 jsimd_can_convsamp_float (void)
664 {
665 init_simd();
667 /* The code is optimised for these values only */
668 if (DCTSIZE != 8)
669 return 0;
670 if (BITS_IN_JSAMPLE != 8)
671 return 0;
672 if (sizeof(JDIMENSION) != 4)
673 return 0;
674 if (sizeof(FAST_FLOAT) != 4)
675 return 0;
677 if (simd_support & JSIMD_SSE2)
678 return 1;
679 if (simd_support & JSIMD_SSE)
680 return 1;
681 if (simd_support & JSIMD_3DNOW)
682 return 1;
684 return 0;
685 }
687 GLOBAL(void)
688 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
689 DCTELEM * workspace)
690 {
691 if (simd_support & JSIMD_SSE2)
692 jsimd_convsamp_sse2(sample_data, start_col, workspace);
693 else if (simd_support & JSIMD_MMX)
694 jsimd_convsamp_mmx(sample_data, start_col, workspace);
695 }
697 GLOBAL(void)
698 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
699 FAST_FLOAT * workspace)
700 {
701 if (simd_support & JSIMD_SSE2)
702 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
703 else if (simd_support & JSIMD_SSE)
704 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
705 else if (simd_support & JSIMD_3DNOW)
706 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
707 }
709 GLOBAL(int)
710 jsimd_can_fdct_islow (void)
711 {
712 init_simd();
714 /* The code is optimised for these values only */
715 if (DCTSIZE != 8)
716 return 0;
717 if (sizeof(DCTELEM) != 2)
718 return 0;
720 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
721 return 1;
722 if (simd_support & JSIMD_MMX)
723 return 1;
725 return 0;
726 }
728 GLOBAL(int)
729 jsimd_can_fdct_ifast (void)
730 {
731 init_simd();
733 /* The code is optimised for these values only */
734 if (DCTSIZE != 8)
735 return 0;
736 if (sizeof(DCTELEM) != 2)
737 return 0;
739 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
740 return 1;
741 if (simd_support & JSIMD_MMX)
742 return 1;
744 return 0;
745 }
747 GLOBAL(int)
748 jsimd_can_fdct_float (void)
749 {
750 init_simd();
752 /* The code is optimised for these values only */
753 if (DCTSIZE != 8)
754 return 0;
755 if (sizeof(FAST_FLOAT) != 4)
756 return 0;
758 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
759 return 1;
760 if (simd_support & JSIMD_3DNOW)
761 return 1;
763 return 0;
764 }
766 GLOBAL(void)
767 jsimd_fdct_islow (DCTELEM * data)
768 {
769 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
770 jsimd_fdct_islow_sse2(data);
771 else if (simd_support & JSIMD_MMX)
772 jsimd_fdct_islow_mmx(data);
773 }
775 GLOBAL(void)
776 jsimd_fdct_ifast (DCTELEM * data)
777 {
778 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
779 jsimd_fdct_ifast_sse2(data);
780 else if (simd_support & JSIMD_MMX)
781 jsimd_fdct_ifast_mmx(data);
782 }
784 GLOBAL(void)
785 jsimd_fdct_float (FAST_FLOAT * data)
786 {
787 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
788 jsimd_fdct_float_sse(data);
789 else if (simd_support & JSIMD_3DNOW)
790 jsimd_fdct_float_3dnow(data);
791 }
793 GLOBAL(int)
794 jsimd_can_quantize (void)
795 {
796 init_simd();
798 /* The code is optimised for these values only */
799 if (DCTSIZE != 8)
800 return 0;
801 if (sizeof(JCOEF) != 2)
802 return 0;
803 if (sizeof(DCTELEM) != 2)
804 return 0;
806 if (simd_support & JSIMD_SSE2)
807 return 1;
808 if (simd_support & JSIMD_MMX)
809 return 1;
811 return 0;
812 }
814 GLOBAL(int)
815 jsimd_can_quantize_float (void)
816 {
817 init_simd();
819 /* The code is optimised for these values only */
820 if (DCTSIZE != 8)
821 return 0;
822 if (sizeof(JCOEF) != 2)
823 return 0;
824 if (sizeof(FAST_FLOAT) != 4)
825 return 0;
827 if (simd_support & JSIMD_SSE2)
828 return 1;
829 if (simd_support & JSIMD_SSE)
830 return 1;
831 if (simd_support & JSIMD_3DNOW)
832 return 1;
834 return 0;
835 }
837 GLOBAL(void)
838 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
839 DCTELEM * workspace)
840 {
841 if (simd_support & JSIMD_SSE2)
842 jsimd_quantize_sse2(coef_block, divisors, workspace);
843 else if (simd_support & JSIMD_MMX)
844 jsimd_quantize_mmx(coef_block, divisors, workspace);
845 }
847 GLOBAL(void)
848 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
849 FAST_FLOAT * workspace)
850 {
851 if (simd_support & JSIMD_SSE2)
852 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
853 else if (simd_support & JSIMD_SSE)
854 jsimd_quantize_float_sse(coef_block, divisors, workspace);
855 else if (simd_support & JSIMD_3DNOW)
856 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
857 }
859 GLOBAL(int)
860 jsimd_can_idct_2x2 (void)
861 {
862 init_simd();
864 /* The code is optimised for these values only */
865 if (DCTSIZE != 8)
866 return 0;
867 if (sizeof(JCOEF) != 2)
868 return 0;
869 if (BITS_IN_JSAMPLE != 8)
870 return 0;
871 if (sizeof(JDIMENSION) != 4)
872 return 0;
873 if (sizeof(ISLOW_MULT_TYPE) != 2)
874 return 0;
876 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
877 return 1;
878 if (simd_support & JSIMD_MMX)
879 return 1;
881 return 0;
882 }
884 GLOBAL(int)
885 jsimd_can_idct_4x4 (void)
886 {
887 init_simd();
889 /* The code is optimised for these values only */
890 if (DCTSIZE != 8)
891 return 0;
892 if (sizeof(JCOEF) != 2)
893 return 0;
894 if (BITS_IN_JSAMPLE != 8)
895 return 0;
896 if (sizeof(JDIMENSION) != 4)
897 return 0;
898 if (sizeof(ISLOW_MULT_TYPE) != 2)
899 return 0;
901 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
902 return 1;
903 if (simd_support & JSIMD_MMX)
904 return 1;
906 return 0;
907 }
909 GLOBAL(void)
910 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
911 JCOEFPTR coef_block, JSAMPARRAY output_buf,
912 JDIMENSION output_col)
913 {
914 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
915 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
916 else if (simd_support & JSIMD_MMX)
917 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
918 }
920 GLOBAL(void)
921 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
922 JCOEFPTR coef_block, JSAMPARRAY output_buf,
923 JDIMENSION output_col)
924 {
925 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
926 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
927 else if (simd_support & JSIMD_MMX)
928 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
929 }
931 GLOBAL(int)
932 jsimd_can_idct_islow (void)
933 {
934 init_simd();
936 /* The code is optimised for these values only */
937 if (DCTSIZE != 8)
938 return 0;
939 if (sizeof(JCOEF) != 2)
940 return 0;
941 if (BITS_IN_JSAMPLE != 8)
942 return 0;
943 if (sizeof(JDIMENSION) != 4)
944 return 0;
945 if (sizeof(ISLOW_MULT_TYPE) != 2)
946 return 0;
948 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
949 return 1;
950 if (simd_support & JSIMD_MMX)
951 return 1;
953 return 0;
954 }
956 GLOBAL(int)
957 jsimd_can_idct_ifast (void)
958 {
959 init_simd();
961 /* The code is optimised for these values only */
962 if (DCTSIZE != 8)
963 return 0;
964 if (sizeof(JCOEF) != 2)
965 return 0;
966 if (BITS_IN_JSAMPLE != 8)
967 return 0;
968 if (sizeof(JDIMENSION) != 4)
969 return 0;
970 if (sizeof(IFAST_MULT_TYPE) != 2)
971 return 0;
972 if (IFAST_SCALE_BITS != 2)
973 return 0;
975 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
976 return 1;
977 if (simd_support & JSIMD_MMX)
978 return 1;
980 return 0;
981 }
983 GLOBAL(int)
984 jsimd_can_idct_float (void)
985 {
986 init_simd();
988 if (DCTSIZE != 8)
989 return 0;
990 if (sizeof(JCOEF) != 2)
991 return 0;
992 if (BITS_IN_JSAMPLE != 8)
993 return 0;
994 if (sizeof(JDIMENSION) != 4)
995 return 0;
996 if (sizeof(FAST_FLOAT) != 4)
997 return 0;
998 if (sizeof(FLOAT_MULT_TYPE) != 4)
999 return 0;
1001 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1002 return 1;
1003 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1004 return 1;
1005 if (simd_support & JSIMD_3DNOW)
1006 return 1;
1008 return 0;
1009 }
1011 GLOBAL(void)
1012 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1013 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1014 JDIMENSION output_col)
1015 {
1016 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1017 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
1018 else if (simd_support & JSIMD_MMX)
1019 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1020 }
1022 GLOBAL(void)
1023 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1024 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1025 JDIMENSION output_col)
1026 {
1027 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1028 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
1029 else if (simd_support & JSIMD_MMX)
1030 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1031 }
1033 GLOBAL(void)
1034 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1035 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1036 JDIMENSION output_col)
1037 {
1038 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1039 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
1040 output_buf, output_col);
1041 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1042 jsimd_idct_float_sse(compptr->dct_table, coef_block,
1043 output_buf, output_col);
1044 else if (simd_support & JSIMD_3DNOW)
1045 jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
1046 output_buf, output_col);
1047 }