Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | * jsimd_i386.c |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
michael@0 | 5 | * Copyright 2009-2011 D. R. Commander |
michael@0 | 6 | * |
michael@0 | 7 | * Based on the x86 SIMD extension for IJG JPEG library, |
michael@0 | 8 | * Copyright (C) 1999-2006, MIYASAKA Masaru. |
michael@0 | 9 | * For conditions of distribution and use, see copyright notice in jsimdext.inc |
michael@0 | 10 | * |
michael@0 | 11 | * This file contains the interface between the "normal" portions |
michael@0 | 12 | * of the library and the SIMD implementations when running on a |
michael@0 | 13 | * 32-bit x86 architecture. |
michael@0 | 14 | */ |
michael@0 | 15 | |
michael@0 | 16 | #define JPEG_INTERNALS |
michael@0 | 17 | #include "../jinclude.h" |
michael@0 | 18 | #include "../jpeglib.h" |
michael@0 | 19 | #include "../jsimd.h" |
michael@0 | 20 | #include "../jdct.h" |
michael@0 | 21 | #include "../jsimddct.h" |
michael@0 | 22 | #include "jsimd.h" |
michael@0 | 23 | |
michael@0 | 24 | /* |
michael@0 | 25 | * In the PIC cases, we have no guarantee that constants will keep |
michael@0 | 26 | * their alignment. This macro allows us to verify it at runtime. |
michael@0 | 27 | */ |
michael@0 | 28 | #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) |
michael@0 | 29 | |
michael@0 | 30 | #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
michael@0 | 31 | |
michael@0 | 32 | static unsigned int simd_support = ~0; |
michael@0 | 33 | |
michael@0 | 34 | /* |
michael@0 | 35 | * Check what SIMD accelerations are supported. |
michael@0 | 36 | * |
michael@0 | 37 | * FIXME: This code is racy under a multi-threaded environment. |
michael@0 | 38 | */ |
michael@0 | 39 | LOCAL(void) |
michael@0 | 40 | init_simd (void) |
michael@0 | 41 | { |
michael@0 | 42 | char *env = NULL; |
michael@0 | 43 | |
michael@0 | 44 | if (simd_support != ~0U) |
michael@0 | 45 | return; |
michael@0 | 46 | |
michael@0 | 47 | simd_support = jpeg_simd_cpu_support(); |
michael@0 | 48 | |
michael@0 | 49 | /* Force different settings through environment variables */ |
michael@0 | 50 | env = getenv("JSIMD_FORCEMMX"); |
michael@0 | 51 | if ((env != NULL) && (strcmp(env, "1") == 0)) |
michael@0 | 52 | simd_support &= JSIMD_MMX; |
michael@0 | 53 | env = getenv("JSIMD_FORCE3DNOW"); |
michael@0 | 54 | if ((env != NULL) && (strcmp(env, "1") == 0)) |
michael@0 | 55 | simd_support &= JSIMD_3DNOW|JSIMD_MMX; |
michael@0 | 56 | env = getenv("JSIMD_FORCESSE"); |
michael@0 | 57 | if ((env != NULL) && (strcmp(env, "1") == 0)) |
michael@0 | 58 | simd_support &= JSIMD_SSE|JSIMD_MMX; |
michael@0 | 59 | env = getenv("JSIMD_FORCESSE2"); |
michael@0 | 60 | if ((env != NULL) && (strcmp(env, "1") == 0)) |
michael@0 | 61 | simd_support &= JSIMD_SSE2; |
michael@0 | 62 | } |
michael@0 | 63 | |
michael@0 | 64 | GLOBAL(int) |
michael@0 | 65 | jsimd_can_rgb_ycc (void) |
michael@0 | 66 | { |
michael@0 | 67 | init_simd(); |
michael@0 | 68 | |
michael@0 | 69 | /* The code is optimised for these values only */ |
michael@0 | 70 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 71 | return 0; |
michael@0 | 72 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 73 | return 0; |
michael@0 | 74 | if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
michael@0 | 75 | return 0; |
michael@0 | 76 | |
michael@0 | 77 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 78 | IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
michael@0 | 79 | return 1; |
michael@0 | 80 | if (simd_support & JSIMD_MMX) |
michael@0 | 81 | return 1; |
michael@0 | 82 | |
michael@0 | 83 | return 0; |
michael@0 | 84 | } |
michael@0 | 85 | |
michael@0 | 86 | GLOBAL(int) |
michael@0 | 87 | jsimd_can_rgb_gray (void) |
michael@0 | 88 | { |
michael@0 | 89 | init_simd(); |
michael@0 | 90 | |
michael@0 | 91 | /* The code is optimised for these values only */ |
michael@0 | 92 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 93 | return 0; |
michael@0 | 94 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 95 | return 0; |
michael@0 | 96 | if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
michael@0 | 97 | return 0; |
michael@0 | 98 | |
michael@0 | 99 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 100 | IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
michael@0 | 101 | return 1; |
michael@0 | 102 | if (simd_support & JSIMD_MMX) |
michael@0 | 103 | return 1; |
michael@0 | 104 | |
michael@0 | 105 | return 0; |
michael@0 | 106 | } |
michael@0 | 107 | |
michael@0 | 108 | GLOBAL(int) |
michael@0 | 109 | jsimd_can_ycc_rgb (void) |
michael@0 | 110 | { |
michael@0 | 111 | init_simd(); |
michael@0 | 112 | |
michael@0 | 113 | /* The code is optimised for these values only */ |
michael@0 | 114 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 115 | return 0; |
michael@0 | 116 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 117 | return 0; |
michael@0 | 118 | if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
michael@0 | 119 | return 0; |
michael@0 | 120 | |
michael@0 | 121 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 122 | IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
michael@0 | 123 | return 1; |
michael@0 | 124 | if (simd_support & JSIMD_MMX) |
michael@0 | 125 | return 1; |
michael@0 | 126 | |
michael@0 | 127 | return 0; |
michael@0 | 128 | } |
michael@0 | 129 | |
michael@0 | 130 | GLOBAL(void) |
michael@0 | 131 | jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
michael@0 | 132 | JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
michael@0 | 133 | JDIMENSION output_row, int num_rows) |
michael@0 | 134 | { |
michael@0 | 135 | void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
michael@0 | 136 | void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
michael@0 | 137 | |
michael@0 | 138 | switch(cinfo->in_color_space) |
michael@0 | 139 | { |
michael@0 | 140 | case JCS_EXT_RGB: |
michael@0 | 141 | sse2fct=jsimd_extrgb_ycc_convert_sse2; |
michael@0 | 142 | mmxfct=jsimd_extrgb_ycc_convert_mmx; |
michael@0 | 143 | break; |
michael@0 | 144 | case JCS_EXT_RGBX: |
michael@0 | 145 | case JCS_EXT_RGBA: |
michael@0 | 146 | sse2fct=jsimd_extrgbx_ycc_convert_sse2; |
michael@0 | 147 | mmxfct=jsimd_extrgbx_ycc_convert_mmx; |
michael@0 | 148 | break; |
michael@0 | 149 | case JCS_EXT_BGR: |
michael@0 | 150 | sse2fct=jsimd_extbgr_ycc_convert_sse2; |
michael@0 | 151 | mmxfct=jsimd_extbgr_ycc_convert_mmx; |
michael@0 | 152 | break; |
michael@0 | 153 | case JCS_EXT_BGRX: |
michael@0 | 154 | case JCS_EXT_BGRA: |
michael@0 | 155 | sse2fct=jsimd_extbgrx_ycc_convert_sse2; |
michael@0 | 156 | mmxfct=jsimd_extbgrx_ycc_convert_mmx; |
michael@0 | 157 | break; |
michael@0 | 158 | case JCS_EXT_XBGR: |
michael@0 | 159 | case JCS_EXT_ABGR: |
michael@0 | 160 | sse2fct=jsimd_extxbgr_ycc_convert_sse2; |
michael@0 | 161 | mmxfct=jsimd_extxbgr_ycc_convert_mmx; |
michael@0 | 162 | break; |
michael@0 | 163 | case JCS_EXT_XRGB: |
michael@0 | 164 | case JCS_EXT_ARGB: |
michael@0 | 165 | sse2fct=jsimd_extxrgb_ycc_convert_sse2; |
michael@0 | 166 | mmxfct=jsimd_extxrgb_ycc_convert_mmx; |
michael@0 | 167 | break; |
michael@0 | 168 | default: |
michael@0 | 169 | sse2fct=jsimd_rgb_ycc_convert_sse2; |
michael@0 | 170 | mmxfct=jsimd_rgb_ycc_convert_mmx; |
michael@0 | 171 | break; |
michael@0 | 172 | } |
michael@0 | 173 | |
michael@0 | 174 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 175 | IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
michael@0 | 176 | sse2fct(cinfo->image_width, input_buf, |
michael@0 | 177 | output_buf, output_row, num_rows); |
michael@0 | 178 | else if (simd_support & JSIMD_MMX) |
michael@0 | 179 | mmxfct(cinfo->image_width, input_buf, |
michael@0 | 180 | output_buf, output_row, num_rows); |
michael@0 | 181 | } |
michael@0 | 182 | |
michael@0 | 183 | GLOBAL(void) |
michael@0 | 184 | jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
michael@0 | 185 | JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
michael@0 | 186 | JDIMENSION output_row, int num_rows) |
michael@0 | 187 | { |
michael@0 | 188 | void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
michael@0 | 189 | void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
michael@0 | 190 | |
michael@0 | 191 | switch(cinfo->in_color_space) |
michael@0 | 192 | { |
michael@0 | 193 | case JCS_EXT_RGB: |
michael@0 | 194 | sse2fct=jsimd_extrgb_gray_convert_sse2; |
michael@0 | 195 | mmxfct=jsimd_extrgb_gray_convert_mmx; |
michael@0 | 196 | break; |
michael@0 | 197 | case JCS_EXT_RGBX: |
michael@0 | 198 | case JCS_EXT_RGBA: |
michael@0 | 199 | sse2fct=jsimd_extrgbx_gray_convert_sse2; |
michael@0 | 200 | mmxfct=jsimd_extrgbx_gray_convert_mmx; |
michael@0 | 201 | break; |
michael@0 | 202 | case JCS_EXT_BGR: |
michael@0 | 203 | sse2fct=jsimd_extbgr_gray_convert_sse2; |
michael@0 | 204 | mmxfct=jsimd_extbgr_gray_convert_mmx; |
michael@0 | 205 | break; |
michael@0 | 206 | case JCS_EXT_BGRX: |
michael@0 | 207 | case JCS_EXT_BGRA: |
michael@0 | 208 | sse2fct=jsimd_extbgrx_gray_convert_sse2; |
michael@0 | 209 | mmxfct=jsimd_extbgrx_gray_convert_mmx; |
michael@0 | 210 | break; |
michael@0 | 211 | case JCS_EXT_XBGR: |
michael@0 | 212 | case JCS_EXT_ABGR: |
michael@0 | 213 | sse2fct=jsimd_extxbgr_gray_convert_sse2; |
michael@0 | 214 | mmxfct=jsimd_extxbgr_gray_convert_mmx; |
michael@0 | 215 | break; |
michael@0 | 216 | case JCS_EXT_XRGB: |
michael@0 | 217 | case JCS_EXT_ARGB: |
michael@0 | 218 | sse2fct=jsimd_extxrgb_gray_convert_sse2; |
michael@0 | 219 | mmxfct=jsimd_extxrgb_gray_convert_mmx; |
michael@0 | 220 | break; |
michael@0 | 221 | default: |
michael@0 | 222 | sse2fct=jsimd_rgb_gray_convert_sse2; |
michael@0 | 223 | mmxfct=jsimd_rgb_gray_convert_mmx; |
michael@0 | 224 | break; |
michael@0 | 225 | } |
michael@0 | 226 | |
michael@0 | 227 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 228 | IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
michael@0 | 229 | sse2fct(cinfo->image_width, input_buf, |
michael@0 | 230 | output_buf, output_row, num_rows); |
michael@0 | 231 | else if (simd_support & JSIMD_MMX) |
michael@0 | 232 | mmxfct(cinfo->image_width, input_buf, |
michael@0 | 233 | output_buf, output_row, num_rows); |
michael@0 | 234 | } |
michael@0 | 235 | |
michael@0 | 236 | GLOBAL(void) |
michael@0 | 237 | jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
michael@0 | 238 | JSAMPIMAGE input_buf, JDIMENSION input_row, |
michael@0 | 239 | JSAMPARRAY output_buf, int num_rows) |
michael@0 | 240 | { |
michael@0 | 241 | void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
michael@0 | 242 | void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
michael@0 | 243 | |
michael@0 | 244 | switch(cinfo->out_color_space) |
michael@0 | 245 | { |
michael@0 | 246 | case JCS_EXT_RGB: |
michael@0 | 247 | sse2fct=jsimd_ycc_extrgb_convert_sse2; |
michael@0 | 248 | mmxfct=jsimd_ycc_extrgb_convert_mmx; |
michael@0 | 249 | break; |
michael@0 | 250 | case JCS_EXT_RGBX: |
michael@0 | 251 | case JCS_EXT_RGBA: |
michael@0 | 252 | sse2fct=jsimd_ycc_extrgbx_convert_sse2; |
michael@0 | 253 | mmxfct=jsimd_ycc_extrgbx_convert_mmx; |
michael@0 | 254 | break; |
michael@0 | 255 | case JCS_EXT_BGR: |
michael@0 | 256 | sse2fct=jsimd_ycc_extbgr_convert_sse2; |
michael@0 | 257 | mmxfct=jsimd_ycc_extbgr_convert_mmx; |
michael@0 | 258 | break; |
michael@0 | 259 | case JCS_EXT_BGRX: |
michael@0 | 260 | case JCS_EXT_BGRA: |
michael@0 | 261 | sse2fct=jsimd_ycc_extbgrx_convert_sse2; |
michael@0 | 262 | mmxfct=jsimd_ycc_extbgrx_convert_mmx; |
michael@0 | 263 | break; |
michael@0 | 264 | case JCS_EXT_XBGR: |
michael@0 | 265 | case JCS_EXT_ABGR: |
michael@0 | 266 | sse2fct=jsimd_ycc_extxbgr_convert_sse2; |
michael@0 | 267 | mmxfct=jsimd_ycc_extxbgr_convert_mmx; |
michael@0 | 268 | break; |
michael@0 | 269 | case JCS_EXT_XRGB: |
michael@0 | 270 | case JCS_EXT_ARGB: |
michael@0 | 271 | sse2fct=jsimd_ycc_extxrgb_convert_sse2; |
michael@0 | 272 | mmxfct=jsimd_ycc_extxrgb_convert_mmx; |
michael@0 | 273 | break; |
michael@0 | 274 | default: |
michael@0 | 275 | sse2fct=jsimd_ycc_rgb_convert_sse2; |
michael@0 | 276 | mmxfct=jsimd_ycc_rgb_convert_mmx; |
michael@0 | 277 | break; |
michael@0 | 278 | } |
michael@0 | 279 | |
michael@0 | 280 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 281 | IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
michael@0 | 282 | sse2fct(cinfo->output_width, input_buf, |
michael@0 | 283 | input_row, output_buf, num_rows); |
michael@0 | 284 | else if (simd_support & JSIMD_MMX) |
michael@0 | 285 | mmxfct(cinfo->output_width, input_buf, |
michael@0 | 286 | input_row, output_buf, num_rows); |
michael@0 | 287 | } |
michael@0 | 288 | |
michael@0 | 289 | GLOBAL(int) |
michael@0 | 290 | jsimd_can_h2v2_downsample (void) |
michael@0 | 291 | { |
michael@0 | 292 | init_simd(); |
michael@0 | 293 | |
michael@0 | 294 | /* The code is optimised for these values only */ |
michael@0 | 295 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 296 | return 0; |
michael@0 | 297 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 298 | return 0; |
michael@0 | 299 | |
michael@0 | 300 | if (simd_support & JSIMD_SSE2) |
michael@0 | 301 | return 1; |
michael@0 | 302 | if (simd_support & JSIMD_MMX) |
michael@0 | 303 | return 1; |
michael@0 | 304 | |
michael@0 | 305 | return 0; |
michael@0 | 306 | } |
michael@0 | 307 | |
michael@0 | 308 | GLOBAL(int) |
michael@0 | 309 | jsimd_can_h2v1_downsample (void) |
michael@0 | 310 | { |
michael@0 | 311 | init_simd(); |
michael@0 | 312 | |
michael@0 | 313 | /* The code is optimised for these values only */ |
michael@0 | 314 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 315 | return 0; |
michael@0 | 316 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 317 | return 0; |
michael@0 | 318 | |
michael@0 | 319 | if (simd_support & JSIMD_SSE2) |
michael@0 | 320 | return 1; |
michael@0 | 321 | if (simd_support & JSIMD_MMX) |
michael@0 | 322 | return 1; |
michael@0 | 323 | |
michael@0 | 324 | return 0; |
michael@0 | 325 | } |
michael@0 | 326 | |
michael@0 | 327 | GLOBAL(void) |
michael@0 | 328 | jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
michael@0 | 329 | JSAMPARRAY input_data, JSAMPARRAY output_data) |
michael@0 | 330 | { |
michael@0 | 331 | if (simd_support & JSIMD_SSE2) |
michael@0 | 332 | jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
michael@0 | 333 | compptr->v_samp_factor, compptr->width_in_blocks, |
michael@0 | 334 | input_data, output_data); |
michael@0 | 335 | else if (simd_support & JSIMD_MMX) |
michael@0 | 336 | jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
michael@0 | 337 | compptr->v_samp_factor, compptr->width_in_blocks, |
michael@0 | 338 | input_data, output_data); |
michael@0 | 339 | } |
michael@0 | 340 | |
michael@0 | 341 | GLOBAL(void) |
michael@0 | 342 | jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
michael@0 | 343 | JSAMPARRAY input_data, JSAMPARRAY output_data) |
michael@0 | 344 | { |
michael@0 | 345 | if (simd_support & JSIMD_SSE2) |
michael@0 | 346 | jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
michael@0 | 347 | compptr->v_samp_factor, compptr->width_in_blocks, |
michael@0 | 348 | input_data, output_data); |
michael@0 | 349 | else if (simd_support & JSIMD_MMX) |
michael@0 | 350 | jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
michael@0 | 351 | compptr->v_samp_factor, compptr->width_in_blocks, |
michael@0 | 352 | input_data, output_data); |
michael@0 | 353 | } |
michael@0 | 354 | |
michael@0 | 355 | GLOBAL(int) |
michael@0 | 356 | jsimd_can_h2v2_upsample (void) |
michael@0 | 357 | { |
michael@0 | 358 | init_simd(); |
michael@0 | 359 | |
michael@0 | 360 | /* The code is optimised for these values only */ |
michael@0 | 361 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 362 | return 0; |
michael@0 | 363 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 364 | return 0; |
michael@0 | 365 | |
michael@0 | 366 | if (simd_support & JSIMD_SSE2) |
michael@0 | 367 | return 1; |
michael@0 | 368 | if (simd_support & JSIMD_MMX) |
michael@0 | 369 | return 1; |
michael@0 | 370 | |
michael@0 | 371 | return 0; |
michael@0 | 372 | } |
michael@0 | 373 | |
michael@0 | 374 | GLOBAL(int) |
michael@0 | 375 | jsimd_can_h2v1_upsample (void) |
michael@0 | 376 | { |
michael@0 | 377 | init_simd(); |
michael@0 | 378 | |
michael@0 | 379 | /* The code is optimised for these values only */ |
michael@0 | 380 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 381 | return 0; |
michael@0 | 382 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 383 | return 0; |
michael@0 | 384 | |
michael@0 | 385 | if (simd_support & JSIMD_SSE2) |
michael@0 | 386 | return 1; |
michael@0 | 387 | if (simd_support & JSIMD_MMX) |
michael@0 | 388 | return 1; |
michael@0 | 389 | |
michael@0 | 390 | return 0; |
michael@0 | 391 | } |
michael@0 | 392 | |
michael@0 | 393 | GLOBAL(void) |
michael@0 | 394 | jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
michael@0 | 395 | jpeg_component_info * compptr, |
michael@0 | 396 | JSAMPARRAY input_data, |
michael@0 | 397 | JSAMPARRAY * output_data_ptr) |
michael@0 | 398 | { |
michael@0 | 399 | if (simd_support & JSIMD_SSE2) |
michael@0 | 400 | jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, |
michael@0 | 401 | cinfo->output_width, input_data, output_data_ptr); |
michael@0 | 402 | else if (simd_support & JSIMD_MMX) |
michael@0 | 403 | jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, |
michael@0 | 404 | cinfo->output_width, input_data, output_data_ptr); |
michael@0 | 405 | } |
michael@0 | 406 | |
michael@0 | 407 | GLOBAL(void) |
michael@0 | 408 | jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
michael@0 | 409 | jpeg_component_info * compptr, |
michael@0 | 410 | JSAMPARRAY input_data, |
michael@0 | 411 | JSAMPARRAY * output_data_ptr) |
michael@0 | 412 | { |
michael@0 | 413 | if (simd_support & JSIMD_SSE2) |
michael@0 | 414 | jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, |
michael@0 | 415 | cinfo->output_width, input_data, output_data_ptr); |
michael@0 | 416 | else if (simd_support & JSIMD_MMX) |
michael@0 | 417 | jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, |
michael@0 | 418 | cinfo->output_width, input_data, output_data_ptr); |
michael@0 | 419 | } |
michael@0 | 420 | |
michael@0 | 421 | GLOBAL(int) |
michael@0 | 422 | jsimd_can_h2v2_fancy_upsample (void) |
michael@0 | 423 | { |
michael@0 | 424 | init_simd(); |
michael@0 | 425 | |
michael@0 | 426 | /* The code is optimised for these values only */ |
michael@0 | 427 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 428 | return 0; |
michael@0 | 429 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 430 | return 0; |
michael@0 | 431 | |
michael@0 | 432 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 433 | IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
michael@0 | 434 | return 1; |
michael@0 | 435 | if (simd_support & JSIMD_MMX) |
michael@0 | 436 | return 1; |
michael@0 | 437 | |
michael@0 | 438 | return 0; |
michael@0 | 439 | } |
michael@0 | 440 | |
michael@0 | 441 | GLOBAL(int) |
michael@0 | 442 | jsimd_can_h2v1_fancy_upsample (void) |
michael@0 | 443 | { |
michael@0 | 444 | init_simd(); |
michael@0 | 445 | |
michael@0 | 446 | /* The code is optimised for these values only */ |
michael@0 | 447 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 448 | return 0; |
michael@0 | 449 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 450 | return 0; |
michael@0 | 451 | |
michael@0 | 452 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 453 | IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
michael@0 | 454 | return 1; |
michael@0 | 455 | if (simd_support & JSIMD_MMX) |
michael@0 | 456 | return 1; |
michael@0 | 457 | |
michael@0 | 458 | return 0; |
michael@0 | 459 | } |
michael@0 | 460 | |
michael@0 | 461 | GLOBAL(void) |
michael@0 | 462 | jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
michael@0 | 463 | jpeg_component_info * compptr, |
michael@0 | 464 | JSAMPARRAY input_data, |
michael@0 | 465 | JSAMPARRAY * output_data_ptr) |
michael@0 | 466 | { |
michael@0 | 467 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 468 | IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
michael@0 | 469 | jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
michael@0 | 470 | compptr->downsampled_width, input_data, output_data_ptr); |
michael@0 | 471 | else if (simd_support & JSIMD_MMX) |
michael@0 | 472 | jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
michael@0 | 473 | compptr->downsampled_width, input_data, output_data_ptr); |
michael@0 | 474 | } |
michael@0 | 475 | |
michael@0 | 476 | GLOBAL(void) |
michael@0 | 477 | jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
michael@0 | 478 | jpeg_component_info * compptr, |
michael@0 | 479 | JSAMPARRAY input_data, |
michael@0 | 480 | JSAMPARRAY * output_data_ptr) |
michael@0 | 481 | { |
michael@0 | 482 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 483 | IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
michael@0 | 484 | jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
michael@0 | 485 | compptr->downsampled_width, input_data, output_data_ptr); |
michael@0 | 486 | else if (simd_support & JSIMD_MMX) |
michael@0 | 487 | jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
michael@0 | 488 | compptr->downsampled_width, input_data, output_data_ptr); |
michael@0 | 489 | } |
michael@0 | 490 | |
michael@0 | 491 | GLOBAL(int) |
michael@0 | 492 | jsimd_can_h2v2_merged_upsample (void) |
michael@0 | 493 | { |
michael@0 | 494 | init_simd(); |
michael@0 | 495 | |
michael@0 | 496 | /* The code is optimised for these values only */ |
michael@0 | 497 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 498 | return 0; |
michael@0 | 499 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 500 | return 0; |
michael@0 | 501 | |
michael@0 | 502 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 503 | IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
michael@0 | 504 | return 1; |
michael@0 | 505 | if (simd_support & JSIMD_MMX) |
michael@0 | 506 | return 1; |
michael@0 | 507 | |
michael@0 | 508 | return 0; |
michael@0 | 509 | } |
michael@0 | 510 | |
michael@0 | 511 | GLOBAL(int) |
michael@0 | 512 | jsimd_can_h2v1_merged_upsample (void) |
michael@0 | 513 | { |
michael@0 | 514 | init_simd(); |
michael@0 | 515 | |
michael@0 | 516 | /* The code is optimised for these values only */ |
michael@0 | 517 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 518 | return 0; |
michael@0 | 519 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 520 | return 0; |
michael@0 | 521 | |
michael@0 | 522 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 523 | IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
michael@0 | 524 | return 1; |
michael@0 | 525 | if (simd_support & JSIMD_MMX) |
michael@0 | 526 | return 1; |
michael@0 | 527 | |
michael@0 | 528 | return 0; |
michael@0 | 529 | } |
michael@0 | 530 | |
michael@0 | 531 | GLOBAL(void) |
michael@0 | 532 | jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
michael@0 | 533 | JSAMPIMAGE input_buf, |
michael@0 | 534 | JDIMENSION in_row_group_ctr, |
michael@0 | 535 | JSAMPARRAY output_buf) |
michael@0 | 536 | { |
michael@0 | 537 | void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
michael@0 | 538 | void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
michael@0 | 539 | |
michael@0 | 540 | switch(cinfo->out_color_space) |
michael@0 | 541 | { |
michael@0 | 542 | case JCS_EXT_RGB: |
michael@0 | 543 | sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; |
michael@0 | 544 | mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; |
michael@0 | 545 | break; |
michael@0 | 546 | case JCS_EXT_RGBX: |
michael@0 | 547 | case JCS_EXT_RGBA: |
michael@0 | 548 | sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; |
michael@0 | 549 | mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; |
michael@0 | 550 | break; |
michael@0 | 551 | case JCS_EXT_BGR: |
michael@0 | 552 | sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; |
michael@0 | 553 | mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; |
michael@0 | 554 | break; |
michael@0 | 555 | case JCS_EXT_BGRX: |
michael@0 | 556 | case JCS_EXT_BGRA: |
michael@0 | 557 | sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; |
michael@0 | 558 | mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; |
michael@0 | 559 | break; |
michael@0 | 560 | case JCS_EXT_XBGR: |
michael@0 | 561 | case JCS_EXT_ABGR: |
michael@0 | 562 | sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; |
michael@0 | 563 | mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; |
michael@0 | 564 | break; |
michael@0 | 565 | case JCS_EXT_XRGB: |
michael@0 | 566 | case JCS_EXT_ARGB: |
michael@0 | 567 | sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; |
michael@0 | 568 | mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; |
michael@0 | 569 | break; |
michael@0 | 570 | default: |
michael@0 | 571 | sse2fct=jsimd_h2v2_merged_upsample_sse2; |
michael@0 | 572 | mmxfct=jsimd_h2v2_merged_upsample_mmx; |
michael@0 | 573 | break; |
michael@0 | 574 | } |
michael@0 | 575 | |
michael@0 | 576 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 577 | IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
michael@0 | 578 | sse2fct(cinfo->output_width, input_buf, |
michael@0 | 579 | in_row_group_ctr, output_buf); |
michael@0 | 580 | else if (simd_support & JSIMD_MMX) |
michael@0 | 581 | mmxfct(cinfo->output_width, input_buf, |
michael@0 | 582 | in_row_group_ctr, output_buf); |
michael@0 | 583 | } |
michael@0 | 584 | |
michael@0 | 585 | GLOBAL(void) |
michael@0 | 586 | jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
michael@0 | 587 | JSAMPIMAGE input_buf, |
michael@0 | 588 | JDIMENSION in_row_group_ctr, |
michael@0 | 589 | JSAMPARRAY output_buf) |
michael@0 | 590 | { |
michael@0 | 591 | void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
michael@0 | 592 | void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
michael@0 | 593 | |
michael@0 | 594 | switch(cinfo->out_color_space) |
michael@0 | 595 | { |
michael@0 | 596 | case JCS_EXT_RGB: |
michael@0 | 597 | sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; |
michael@0 | 598 | mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; |
michael@0 | 599 | break; |
michael@0 | 600 | case JCS_EXT_RGBX: |
michael@0 | 601 | case JCS_EXT_RGBA: |
michael@0 | 602 | sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; |
michael@0 | 603 | mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; |
michael@0 | 604 | break; |
michael@0 | 605 | case JCS_EXT_BGR: |
michael@0 | 606 | sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; |
michael@0 | 607 | mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; |
michael@0 | 608 | break; |
michael@0 | 609 | case JCS_EXT_BGRX: |
michael@0 | 610 | case JCS_EXT_BGRA: |
michael@0 | 611 | sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; |
michael@0 | 612 | mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; |
michael@0 | 613 | break; |
michael@0 | 614 | case JCS_EXT_XBGR: |
michael@0 | 615 | case JCS_EXT_ABGR: |
michael@0 | 616 | sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; |
michael@0 | 617 | mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; |
michael@0 | 618 | break; |
michael@0 | 619 | case JCS_EXT_XRGB: |
michael@0 | 620 | case JCS_EXT_ARGB: |
michael@0 | 621 | sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; |
michael@0 | 622 | mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; |
michael@0 | 623 | break; |
michael@0 | 624 | default: |
michael@0 | 625 | sse2fct=jsimd_h2v1_merged_upsample_sse2; |
michael@0 | 626 | mmxfct=jsimd_h2v1_merged_upsample_mmx; |
michael@0 | 627 | break; |
michael@0 | 628 | } |
michael@0 | 629 | |
michael@0 | 630 | if ((simd_support & JSIMD_SSE2) && |
michael@0 | 631 | IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
michael@0 | 632 | sse2fct(cinfo->output_width, input_buf, |
michael@0 | 633 | in_row_group_ctr, output_buf); |
michael@0 | 634 | else if (simd_support & JSIMD_MMX) |
michael@0 | 635 | mmxfct(cinfo->output_width, input_buf, |
michael@0 | 636 | in_row_group_ctr, output_buf); |
michael@0 | 637 | } |
michael@0 | 638 | |
michael@0 | 639 | GLOBAL(int) |
michael@0 | 640 | jsimd_can_convsamp (void) |
michael@0 | 641 | { |
michael@0 | 642 | init_simd(); |
michael@0 | 643 | |
michael@0 | 644 | /* The code is optimised for these values only */ |
michael@0 | 645 | if (DCTSIZE != 8) |
michael@0 | 646 | return 0; |
michael@0 | 647 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 648 | return 0; |
michael@0 | 649 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 650 | return 0; |
michael@0 | 651 | if (sizeof(DCTELEM) != 2) |
michael@0 | 652 | return 0; |
michael@0 | 653 | |
michael@0 | 654 | if (simd_support & JSIMD_SSE2) |
michael@0 | 655 | return 1; |
michael@0 | 656 | if (simd_support & JSIMD_MMX) |
michael@0 | 657 | return 1; |
michael@0 | 658 | |
michael@0 | 659 | return 0; |
michael@0 | 660 | } |
michael@0 | 661 | |
michael@0 | 662 | GLOBAL(int) |
michael@0 | 663 | jsimd_can_convsamp_float (void) |
michael@0 | 664 | { |
michael@0 | 665 | init_simd(); |
michael@0 | 666 | |
michael@0 | 667 | /* The code is optimised for these values only */ |
michael@0 | 668 | if (DCTSIZE != 8) |
michael@0 | 669 | return 0; |
michael@0 | 670 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 671 | return 0; |
michael@0 | 672 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 673 | return 0; |
michael@0 | 674 | if (sizeof(FAST_FLOAT) != 4) |
michael@0 | 675 | return 0; |
michael@0 | 676 | |
michael@0 | 677 | if (simd_support & JSIMD_SSE2) |
michael@0 | 678 | return 1; |
michael@0 | 679 | if (simd_support & JSIMD_SSE) |
michael@0 | 680 | return 1; |
michael@0 | 681 | if (simd_support & JSIMD_3DNOW) |
michael@0 | 682 | return 1; |
michael@0 | 683 | |
michael@0 | 684 | return 0; |
michael@0 | 685 | } |
michael@0 | 686 | |
michael@0 | 687 | GLOBAL(void) |
michael@0 | 688 | jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
michael@0 | 689 | DCTELEM * workspace) |
michael@0 | 690 | { |
michael@0 | 691 | if (simd_support & JSIMD_SSE2) |
michael@0 | 692 | jsimd_convsamp_sse2(sample_data, start_col, workspace); |
michael@0 | 693 | else if (simd_support & JSIMD_MMX) |
michael@0 | 694 | jsimd_convsamp_mmx(sample_data, start_col, workspace); |
michael@0 | 695 | } |
michael@0 | 696 | |
michael@0 | 697 | GLOBAL(void) |
michael@0 | 698 | jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
michael@0 | 699 | FAST_FLOAT * workspace) |
michael@0 | 700 | { |
michael@0 | 701 | if (simd_support & JSIMD_SSE2) |
michael@0 | 702 | jsimd_convsamp_float_sse2(sample_data, start_col, workspace); |
michael@0 | 703 | else if (simd_support & JSIMD_SSE) |
michael@0 | 704 | jsimd_convsamp_float_sse(sample_data, start_col, workspace); |
michael@0 | 705 | else if (simd_support & JSIMD_3DNOW) |
michael@0 | 706 | jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); |
michael@0 | 707 | } |
michael@0 | 708 | |
michael@0 | 709 | GLOBAL(int) |
michael@0 | 710 | jsimd_can_fdct_islow (void) |
michael@0 | 711 | { |
michael@0 | 712 | init_simd(); |
michael@0 | 713 | |
michael@0 | 714 | /* The code is optimised for these values only */ |
michael@0 | 715 | if (DCTSIZE != 8) |
michael@0 | 716 | return 0; |
michael@0 | 717 | if (sizeof(DCTELEM) != 2) |
michael@0 | 718 | return 0; |
michael@0 | 719 | |
michael@0 | 720 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
michael@0 | 721 | return 1; |
michael@0 | 722 | if (simd_support & JSIMD_MMX) |
michael@0 | 723 | return 1; |
michael@0 | 724 | |
michael@0 | 725 | return 0; |
michael@0 | 726 | } |
michael@0 | 727 | |
michael@0 | 728 | GLOBAL(int) |
michael@0 | 729 | jsimd_can_fdct_ifast (void) |
michael@0 | 730 | { |
michael@0 | 731 | init_simd(); |
michael@0 | 732 | |
michael@0 | 733 | /* The code is optimised for these values only */ |
michael@0 | 734 | if (DCTSIZE != 8) |
michael@0 | 735 | return 0; |
michael@0 | 736 | if (sizeof(DCTELEM) != 2) |
michael@0 | 737 | return 0; |
michael@0 | 738 | |
michael@0 | 739 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) |
michael@0 | 740 | return 1; |
michael@0 | 741 | if (simd_support & JSIMD_MMX) |
michael@0 | 742 | return 1; |
michael@0 | 743 | |
michael@0 | 744 | return 0; |
michael@0 | 745 | } |
michael@0 | 746 | |
michael@0 | 747 | GLOBAL(int) |
michael@0 | 748 | jsimd_can_fdct_float (void) |
michael@0 | 749 | { |
michael@0 | 750 | init_simd(); |
michael@0 | 751 | |
michael@0 | 752 | /* The code is optimised for these values only */ |
michael@0 | 753 | if (DCTSIZE != 8) |
michael@0 | 754 | return 0; |
michael@0 | 755 | if (sizeof(FAST_FLOAT) != 4) |
michael@0 | 756 | return 0; |
michael@0 | 757 | |
michael@0 | 758 | if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
michael@0 | 759 | return 1; |
michael@0 | 760 | if (simd_support & JSIMD_3DNOW) |
michael@0 | 761 | return 1; |
michael@0 | 762 | |
michael@0 | 763 | return 0; |
michael@0 | 764 | } |
michael@0 | 765 | |
michael@0 | 766 | GLOBAL(void) |
michael@0 | 767 | jsimd_fdct_islow (DCTELEM * data) |
michael@0 | 768 | { |
michael@0 | 769 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
michael@0 | 770 | jsimd_fdct_islow_sse2(data); |
michael@0 | 771 | else if (simd_support & JSIMD_MMX) |
michael@0 | 772 | jsimd_fdct_islow_mmx(data); |
michael@0 | 773 | } |
michael@0 | 774 | |
michael@0 | 775 | GLOBAL(void) |
michael@0 | 776 | jsimd_fdct_ifast (DCTELEM * data) |
michael@0 | 777 | { |
michael@0 | 778 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
michael@0 | 779 | jsimd_fdct_ifast_sse2(data); |
michael@0 | 780 | else if (simd_support & JSIMD_MMX) |
michael@0 | 781 | jsimd_fdct_ifast_mmx(data); |
michael@0 | 782 | } |
michael@0 | 783 | |
michael@0 | 784 | GLOBAL(void) |
michael@0 | 785 | jsimd_fdct_float (FAST_FLOAT * data) |
michael@0 | 786 | { |
michael@0 | 787 | if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
michael@0 | 788 | jsimd_fdct_float_sse(data); |
michael@0 | 789 | else if (simd_support & JSIMD_3DNOW) |
michael@0 | 790 | jsimd_fdct_float_3dnow(data); |
michael@0 | 791 | } |
michael@0 | 792 | |
michael@0 | 793 | GLOBAL(int) |
michael@0 | 794 | jsimd_can_quantize (void) |
michael@0 | 795 | { |
michael@0 | 796 | init_simd(); |
michael@0 | 797 | |
michael@0 | 798 | /* The code is optimised for these values only */ |
michael@0 | 799 | if (DCTSIZE != 8) |
michael@0 | 800 | return 0; |
michael@0 | 801 | if (sizeof(JCOEF) != 2) |
michael@0 | 802 | return 0; |
michael@0 | 803 | if (sizeof(DCTELEM) != 2) |
michael@0 | 804 | return 0; |
michael@0 | 805 | |
michael@0 | 806 | if (simd_support & JSIMD_SSE2) |
michael@0 | 807 | return 1; |
michael@0 | 808 | if (simd_support & JSIMD_MMX) |
michael@0 | 809 | return 1; |
michael@0 | 810 | |
michael@0 | 811 | return 0; |
michael@0 | 812 | } |
michael@0 | 813 | |
michael@0 | 814 | GLOBAL(int) |
michael@0 | 815 | jsimd_can_quantize_float (void) |
michael@0 | 816 | { |
michael@0 | 817 | init_simd(); |
michael@0 | 818 | |
michael@0 | 819 | /* The code is optimised for these values only */ |
michael@0 | 820 | if (DCTSIZE != 8) |
michael@0 | 821 | return 0; |
michael@0 | 822 | if (sizeof(JCOEF) != 2) |
michael@0 | 823 | return 0; |
michael@0 | 824 | if (sizeof(FAST_FLOAT) != 4) |
michael@0 | 825 | return 0; |
michael@0 | 826 | |
michael@0 | 827 | if (simd_support & JSIMD_SSE2) |
michael@0 | 828 | return 1; |
michael@0 | 829 | if (simd_support & JSIMD_SSE) |
michael@0 | 830 | return 1; |
michael@0 | 831 | if (simd_support & JSIMD_3DNOW) |
michael@0 | 832 | return 1; |
michael@0 | 833 | |
michael@0 | 834 | return 0; |
michael@0 | 835 | } |
michael@0 | 836 | |
michael@0 | 837 | GLOBAL(void) |
michael@0 | 838 | jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, |
michael@0 | 839 | DCTELEM * workspace) |
michael@0 | 840 | { |
michael@0 | 841 | if (simd_support & JSIMD_SSE2) |
michael@0 | 842 | jsimd_quantize_sse2(coef_block, divisors, workspace); |
michael@0 | 843 | else if (simd_support & JSIMD_MMX) |
michael@0 | 844 | jsimd_quantize_mmx(coef_block, divisors, workspace); |
michael@0 | 845 | } |
michael@0 | 846 | |
michael@0 | 847 | GLOBAL(void) |
michael@0 | 848 | jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
michael@0 | 849 | FAST_FLOAT * workspace) |
michael@0 | 850 | { |
michael@0 | 851 | if (simd_support & JSIMD_SSE2) |
michael@0 | 852 | jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
michael@0 | 853 | else if (simd_support & JSIMD_SSE) |
michael@0 | 854 | jsimd_quantize_float_sse(coef_block, divisors, workspace); |
michael@0 | 855 | else if (simd_support & JSIMD_3DNOW) |
michael@0 | 856 | jsimd_quantize_float_3dnow(coef_block, divisors, workspace); |
michael@0 | 857 | } |
michael@0 | 858 | |
michael@0 | 859 | GLOBAL(int) |
michael@0 | 860 | jsimd_can_idct_2x2 (void) |
michael@0 | 861 | { |
michael@0 | 862 | init_simd(); |
michael@0 | 863 | |
michael@0 | 864 | /* The code is optimised for these values only */ |
michael@0 | 865 | if (DCTSIZE != 8) |
michael@0 | 866 | return 0; |
michael@0 | 867 | if (sizeof(JCOEF) != 2) |
michael@0 | 868 | return 0; |
michael@0 | 869 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 870 | return 0; |
michael@0 | 871 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 872 | return 0; |
michael@0 | 873 | if (sizeof(ISLOW_MULT_TYPE) != 2) |
michael@0 | 874 | return 0; |
michael@0 | 875 | |
michael@0 | 876 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
michael@0 | 877 | return 1; |
michael@0 | 878 | if (simd_support & JSIMD_MMX) |
michael@0 | 879 | return 1; |
michael@0 | 880 | |
michael@0 | 881 | return 0; |
michael@0 | 882 | } |
michael@0 | 883 | |
michael@0 | 884 | GLOBAL(int) |
michael@0 | 885 | jsimd_can_idct_4x4 (void) |
michael@0 | 886 | { |
michael@0 | 887 | init_simd(); |
michael@0 | 888 | |
michael@0 | 889 | /* The code is optimised for these values only */ |
michael@0 | 890 | if (DCTSIZE != 8) |
michael@0 | 891 | return 0; |
michael@0 | 892 | if (sizeof(JCOEF) != 2) |
michael@0 | 893 | return 0; |
michael@0 | 894 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 895 | return 0; |
michael@0 | 896 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 897 | return 0; |
michael@0 | 898 | if (sizeof(ISLOW_MULT_TYPE) != 2) |
michael@0 | 899 | return 0; |
michael@0 | 900 | |
michael@0 | 901 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
michael@0 | 902 | return 1; |
michael@0 | 903 | if (simd_support & JSIMD_MMX) |
michael@0 | 904 | return 1; |
michael@0 | 905 | |
michael@0 | 906 | return 0; |
michael@0 | 907 | } |
michael@0 | 908 | |
michael@0 | 909 | GLOBAL(void) |
michael@0 | 910 | jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
michael@0 | 911 | JCOEFPTR coef_block, JSAMPARRAY output_buf, |
michael@0 | 912 | JDIMENSION output_col) |
michael@0 | 913 | { |
michael@0 | 914 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
michael@0 | 915 | jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
michael@0 | 916 | else if (simd_support & JSIMD_MMX) |
michael@0 | 917 | jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
michael@0 | 918 | } |
michael@0 | 919 | |
michael@0 | 920 | GLOBAL(void) |
michael@0 | 921 | jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
michael@0 | 922 | JCOEFPTR coef_block, JSAMPARRAY output_buf, |
michael@0 | 923 | JDIMENSION output_col) |
michael@0 | 924 | { |
michael@0 | 925 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
michael@0 | 926 | jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
michael@0 | 927 | else if (simd_support & JSIMD_MMX) |
michael@0 | 928 | jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
michael@0 | 929 | } |
michael@0 | 930 | |
michael@0 | 931 | GLOBAL(int) |
michael@0 | 932 | jsimd_can_idct_islow (void) |
michael@0 | 933 | { |
michael@0 | 934 | init_simd(); |
michael@0 | 935 | |
michael@0 | 936 | /* The code is optimised for these values only */ |
michael@0 | 937 | if (DCTSIZE != 8) |
michael@0 | 938 | return 0; |
michael@0 | 939 | if (sizeof(JCOEF) != 2) |
michael@0 | 940 | return 0; |
michael@0 | 941 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 942 | return 0; |
michael@0 | 943 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 944 | return 0; |
michael@0 | 945 | if (sizeof(ISLOW_MULT_TYPE) != 2) |
michael@0 | 946 | return 0; |
michael@0 | 947 | |
michael@0 | 948 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
michael@0 | 949 | return 1; |
michael@0 | 950 | if (simd_support & JSIMD_MMX) |
michael@0 | 951 | return 1; |
michael@0 | 952 | |
michael@0 | 953 | return 0; |
michael@0 | 954 | } |
michael@0 | 955 | |
michael@0 | 956 | GLOBAL(int) |
michael@0 | 957 | jsimd_can_idct_ifast (void) |
michael@0 | 958 | { |
michael@0 | 959 | init_simd(); |
michael@0 | 960 | |
michael@0 | 961 | /* The code is optimised for these values only */ |
michael@0 | 962 | if (DCTSIZE != 8) |
michael@0 | 963 | return 0; |
michael@0 | 964 | if (sizeof(JCOEF) != 2) |
michael@0 | 965 | return 0; |
michael@0 | 966 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 967 | return 0; |
michael@0 | 968 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 969 | return 0; |
michael@0 | 970 | if (sizeof(IFAST_MULT_TYPE) != 2) |
michael@0 | 971 | return 0; |
michael@0 | 972 | if (IFAST_SCALE_BITS != 2) |
michael@0 | 973 | return 0; |
michael@0 | 974 | |
michael@0 | 975 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
michael@0 | 976 | return 1; |
michael@0 | 977 | if (simd_support & JSIMD_MMX) |
michael@0 | 978 | return 1; |
michael@0 | 979 | |
michael@0 | 980 | return 0; |
michael@0 | 981 | } |
michael@0 | 982 | |
michael@0 | 983 | GLOBAL(int) |
michael@0 | 984 | jsimd_can_idct_float (void) |
michael@0 | 985 | { |
michael@0 | 986 | init_simd(); |
michael@0 | 987 | |
michael@0 | 988 | if (DCTSIZE != 8) |
michael@0 | 989 | return 0; |
michael@0 | 990 | if (sizeof(JCOEF) != 2) |
michael@0 | 991 | return 0; |
michael@0 | 992 | if (BITS_IN_JSAMPLE != 8) |
michael@0 | 993 | return 0; |
michael@0 | 994 | if (sizeof(JDIMENSION) != 4) |
michael@0 | 995 | return 0; |
michael@0 | 996 | if (sizeof(FAST_FLOAT) != 4) |
michael@0 | 997 | return 0; |
michael@0 | 998 | if (sizeof(FLOAT_MULT_TYPE) != 4) |
michael@0 | 999 | return 0; |
michael@0 | 1000 | |
michael@0 | 1001 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
michael@0 | 1002 | return 1; |
michael@0 | 1003 | if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
michael@0 | 1004 | return 1; |
michael@0 | 1005 | if (simd_support & JSIMD_3DNOW) |
michael@0 | 1006 | return 1; |
michael@0 | 1007 | |
michael@0 | 1008 | return 0; |
michael@0 | 1009 | } |
michael@0 | 1010 | |
michael@0 | 1011 | GLOBAL(void) |
michael@0 | 1012 | jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
michael@0 | 1013 | JCOEFPTR coef_block, JSAMPARRAY output_buf, |
michael@0 | 1014 | JDIMENSION output_col) |
michael@0 | 1015 | { |
michael@0 | 1016 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) |
michael@0 | 1017 | jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
michael@0 | 1018 | else if (simd_support & JSIMD_MMX) |
michael@0 | 1019 | jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
michael@0 | 1020 | } |
michael@0 | 1021 | |
michael@0 | 1022 | GLOBAL(void) |
michael@0 | 1023 | jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
michael@0 | 1024 | JCOEFPTR coef_block, JSAMPARRAY output_buf, |
michael@0 | 1025 | JDIMENSION output_col) |
michael@0 | 1026 | { |
michael@0 | 1027 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
michael@0 | 1028 | jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col); |
michael@0 | 1029 | else if (simd_support & JSIMD_MMX) |
michael@0 | 1030 | jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col); |
michael@0 | 1031 | } |
michael@0 | 1032 | |
michael@0 | 1033 | GLOBAL(void) |
michael@0 | 1034 | jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
michael@0 | 1035 | JCOEFPTR coef_block, JSAMPARRAY output_buf, |
michael@0 | 1036 | JDIMENSION output_col) |
michael@0 | 1037 | { |
michael@0 | 1038 | if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
michael@0 | 1039 | jsimd_idct_float_sse2(compptr->dct_table, coef_block, |
michael@0 | 1040 | output_buf, output_col); |
michael@0 | 1041 | else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
michael@0 | 1042 | jsimd_idct_float_sse(compptr->dct_table, coef_block, |
michael@0 | 1043 | output_buf, output_col); |
michael@0 | 1044 | else if (simd_support & JSIMD_3DNOW) |
michael@0 | 1045 | jsimd_idct_float_3dnow(compptr->dct_table, coef_block, |
michael@0 | 1046 | output_buf, output_col); |
michael@0 | 1047 | } |
michael@0 | 1048 |