1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libjpeg/simd/jsimd_i386.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1048 @@ 1.4 +/* 1.5 + * jsimd_i386.c 1.6 + * 1.7 + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 1.8 + * Copyright 2009-2011 D. R. Commander 1.9 + * 1.10 + * Based on the x86 SIMD extension for IJG JPEG library, 1.11 + * Copyright (C) 1999-2006, MIYASAKA Masaru. 1.12 + * For conditions of distribution and use, see copyright notice in jsimdext.inc 1.13 + * 1.14 + * This file contains the interface between the "normal" portions 1.15 + * of the library and the SIMD implementations when running on a 1.16 + * 32-bit x86 architecture. 1.17 + */ 1.18 + 1.19 +#define JPEG_INTERNALS 1.20 +#include "../jinclude.h" 1.21 +#include "../jpeglib.h" 1.22 +#include "../jsimd.h" 1.23 +#include "../jdct.h" 1.24 +#include "../jsimddct.h" 1.25 +#include "jsimd.h" 1.26 + 1.27 +/* 1.28 + * In the PIC cases, we have no guarantee that constants will keep 1.29 + * their alignment. This macro allows us to verify it at runtime. 1.30 + */ 1.31 +#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) 1.32 + 1.33 +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ 1.34 + 1.35 +static unsigned int simd_support = ~0; 1.36 + 1.37 +/* 1.38 + * Check what SIMD accelerations are supported. 1.39 + * 1.40 + * FIXME: This code is racy under a multi-threaded environment. 1.41 + */ 1.42 +LOCAL(void) 1.43 +init_simd (void) 1.44 +{ 1.45 + char *env = NULL; 1.46 + 1.47 + if (simd_support != ~0U) 1.48 + return; 1.49 + 1.50 + simd_support = jpeg_simd_cpu_support(); 1.51 + 1.52 + /* Force different settings through environment variables */ 1.53 + env = getenv("JSIMD_FORCEMMX"); 1.54 + if ((env != NULL) && (strcmp(env, "1") == 0)) 1.55 + simd_support &= JSIMD_MMX; 1.56 + env = getenv("JSIMD_FORCE3DNOW"); 1.57 + if ((env != NULL) && (strcmp(env, "1") == 0)) 1.58 + simd_support &= JSIMD_3DNOW|JSIMD_MMX; 1.59 + env = getenv("JSIMD_FORCESSE"); 1.60 + if ((env != NULL) && (strcmp(env, "1") == 0)) 1.61 + simd_support &= JSIMD_SSE|JSIMD_MMX; 1.62 + env = getenv("JSIMD_FORCESSE2"); 1.63 + if ((env != NULL) && (strcmp(env, "1") == 0)) 1.64 + simd_support &= JSIMD_SSE2; 1.65 +} 1.66 + 1.67 +GLOBAL(int) 1.68 +jsimd_can_rgb_ycc (void) 1.69 +{ 1.70 + init_simd(); 1.71 + 1.72 + /* The code is optimised for these values only */ 1.73 + if (BITS_IN_JSAMPLE != 8) 1.74 + return 0; 1.75 + if (sizeof(JDIMENSION) != 4) 1.76 + return 0; 1.77 + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 1.78 + return 0; 1.79 + 1.80 + if ((simd_support & JSIMD_SSE2) && 1.81 + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 1.82 + return 1; 1.83 + if (simd_support & JSIMD_MMX) 1.84 + return 1; 1.85 + 1.86 + return 0; 1.87 +} 1.88 + 1.89 +GLOBAL(int) 1.90 +jsimd_can_rgb_gray (void) 1.91 +{ 1.92 + init_simd(); 1.93 + 1.94 + /* The code is optimised for these values only */ 1.95 + if (BITS_IN_JSAMPLE != 8) 1.96 + return 0; 1.97 + if (sizeof(JDIMENSION) != 4) 1.98 + return 0; 1.99 + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 1.100 + return 0; 1.101 + 1.102 + if ((simd_support & JSIMD_SSE2) && 1.103 + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 1.104 + return 1; 1.105 + if (simd_support & JSIMD_MMX) 1.106 + return 1; 1.107 + 1.108 + return 0; 1.109 +} 1.110 + 1.111 +GLOBAL(int) 1.112 +jsimd_can_ycc_rgb (void) 1.113 +{ 1.114 + init_simd(); 1.115 + 1.116 + /* The code is optimised for these values only */ 1.117 + if (BITS_IN_JSAMPLE != 8) 1.118 + return 0; 1.119 + if (sizeof(JDIMENSION) != 4) 1.120 + return 0; 1.121 + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 1.122 + return 0; 1.123 + 1.124 + if ((simd_support & JSIMD_SSE2) && 1.125 + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 1.126 + return 1; 1.127 + if (simd_support & JSIMD_MMX) 1.128 + return 1; 1.129 + 1.130 + return 0; 1.131 +} 1.132 + 1.133 +GLOBAL(void) 1.134 +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 1.135 + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 1.136 + JDIMENSION output_row, int num_rows) 1.137 +{ 1.138 + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 1.139 + void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 1.140 + 1.141 + switch(cinfo->in_color_space) 1.142 + { 1.143 + case JCS_EXT_RGB: 1.144 + sse2fct=jsimd_extrgb_ycc_convert_sse2; 1.145 + mmxfct=jsimd_extrgb_ycc_convert_mmx; 1.146 + break; 1.147 + case JCS_EXT_RGBX: 1.148 + case JCS_EXT_RGBA: 1.149 + sse2fct=jsimd_extrgbx_ycc_convert_sse2; 1.150 + mmxfct=jsimd_extrgbx_ycc_convert_mmx; 1.151 + break; 1.152 + case JCS_EXT_BGR: 1.153 + sse2fct=jsimd_extbgr_ycc_convert_sse2; 1.154 + mmxfct=jsimd_extbgr_ycc_convert_mmx; 1.155 + break; 1.156 + case JCS_EXT_BGRX: 1.157 + case JCS_EXT_BGRA: 1.158 + sse2fct=jsimd_extbgrx_ycc_convert_sse2; 1.159 + mmxfct=jsimd_extbgrx_ycc_convert_mmx; 1.160 + break; 1.161 + case JCS_EXT_XBGR: 1.162 + case JCS_EXT_ABGR: 1.163 + sse2fct=jsimd_extxbgr_ycc_convert_sse2; 1.164 + mmxfct=jsimd_extxbgr_ycc_convert_mmx; 1.165 + break; 1.166 + case JCS_EXT_XRGB: 1.167 + case JCS_EXT_ARGB: 1.168 + sse2fct=jsimd_extxrgb_ycc_convert_sse2; 1.169 + mmxfct=jsimd_extxrgb_ycc_convert_mmx; 1.170 + break; 1.171 + default: 1.172 + sse2fct=jsimd_rgb_ycc_convert_sse2; 1.173 + mmxfct=jsimd_rgb_ycc_convert_mmx; 1.174 + break; 1.175 + } 1.176 + 1.177 + if ((simd_support & JSIMD_SSE2) && 1.178 + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) 1.179 + sse2fct(cinfo->image_width, input_buf, 1.180 + output_buf, output_row, num_rows); 1.181 + else if (simd_support & JSIMD_MMX) 1.182 + mmxfct(cinfo->image_width, input_buf, 1.183 + output_buf, output_row, num_rows); 1.184 +} 1.185 + 1.186 +GLOBAL(void) 1.187 +jsimd_rgb_gray_convert (j_compress_ptr cinfo, 1.188 + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 1.189 + JDIMENSION output_row, int num_rows) 1.190 +{ 1.191 + void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 1.192 + void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 1.193 + 1.194 + switch(cinfo->in_color_space) 1.195 + { 1.196 + case JCS_EXT_RGB: 1.197 + sse2fct=jsimd_extrgb_gray_convert_sse2; 1.198 + mmxfct=jsimd_extrgb_gray_convert_mmx; 1.199 + break; 1.200 + case JCS_EXT_RGBX: 1.201 + case JCS_EXT_RGBA: 1.202 + sse2fct=jsimd_extrgbx_gray_convert_sse2; 1.203 + mmxfct=jsimd_extrgbx_gray_convert_mmx; 1.204 + break; 1.205 + case JCS_EXT_BGR: 1.206 + sse2fct=jsimd_extbgr_gray_convert_sse2; 1.207 + mmxfct=jsimd_extbgr_gray_convert_mmx; 1.208 + break; 1.209 + case JCS_EXT_BGRX: 1.210 + case JCS_EXT_BGRA: 1.211 + sse2fct=jsimd_extbgrx_gray_convert_sse2; 1.212 + mmxfct=jsimd_extbgrx_gray_convert_mmx; 1.213 + break; 1.214 + case JCS_EXT_XBGR: 1.215 + case JCS_EXT_ABGR: 1.216 + sse2fct=jsimd_extxbgr_gray_convert_sse2; 1.217 + mmxfct=jsimd_extxbgr_gray_convert_mmx; 1.218 + break; 1.219 + case JCS_EXT_XRGB: 1.220 + case JCS_EXT_ARGB: 1.221 + sse2fct=jsimd_extxrgb_gray_convert_sse2; 1.222 + mmxfct=jsimd_extxrgb_gray_convert_mmx; 1.223 + break; 1.224 + default: 1.225 + sse2fct=jsimd_rgb_gray_convert_sse2; 1.226 + mmxfct=jsimd_rgb_gray_convert_mmx; 1.227 + break; 1.228 + } 1.229 + 1.230 + if ((simd_support & JSIMD_SSE2) && 1.231 + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) 1.232 + sse2fct(cinfo->image_width, input_buf, 1.233 + output_buf, output_row, num_rows); 1.234 + else if (simd_support & JSIMD_MMX) 1.235 + mmxfct(cinfo->image_width, input_buf, 1.236 + output_buf, output_row, num_rows); 1.237 +} 1.238 + 1.239 +GLOBAL(void) 1.240 +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 1.241 + JSAMPIMAGE input_buf, JDIMENSION input_row, 1.242 + JSAMPARRAY output_buf, int num_rows) 1.243 +{ 1.244 + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 1.245 + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 1.246 + 1.247 + switch(cinfo->out_color_space) 1.248 + { 1.249 + case JCS_EXT_RGB: 1.250 + sse2fct=jsimd_ycc_extrgb_convert_sse2; 1.251 + mmxfct=jsimd_ycc_extrgb_convert_mmx; 1.252 + break; 1.253 + case JCS_EXT_RGBX: 1.254 + case JCS_EXT_RGBA: 1.255 + sse2fct=jsimd_ycc_extrgbx_convert_sse2; 1.256 + mmxfct=jsimd_ycc_extrgbx_convert_mmx; 1.257 + break; 1.258 + case JCS_EXT_BGR: 1.259 + sse2fct=jsimd_ycc_extbgr_convert_sse2; 1.260 + mmxfct=jsimd_ycc_extbgr_convert_mmx; 1.261 + break; 1.262 + case JCS_EXT_BGRX: 1.263 + case JCS_EXT_BGRA: 1.264 + sse2fct=jsimd_ycc_extbgrx_convert_sse2; 1.265 + mmxfct=jsimd_ycc_extbgrx_convert_mmx; 1.266 + break; 1.267 + case JCS_EXT_XBGR: 1.268 + case JCS_EXT_ABGR: 1.269 + sse2fct=jsimd_ycc_extxbgr_convert_sse2; 1.270 + mmxfct=jsimd_ycc_extxbgr_convert_mmx; 1.271 + break; 1.272 + case JCS_EXT_XRGB: 1.273 + case JCS_EXT_ARGB: 1.274 + sse2fct=jsimd_ycc_extxrgb_convert_sse2; 1.275 + mmxfct=jsimd_ycc_extxrgb_convert_mmx; 1.276 + break; 1.277 + default: 1.278 + sse2fct=jsimd_ycc_rgb_convert_sse2; 1.279 + mmxfct=jsimd_ycc_rgb_convert_mmx; 1.280 + break; 1.281 + } 1.282 + 1.283 + if ((simd_support & JSIMD_SSE2) && 1.284 + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) 1.285 + sse2fct(cinfo->output_width, input_buf, 1.286 + input_row, output_buf, num_rows); 1.287 + else if (simd_support & JSIMD_MMX) 1.288 + mmxfct(cinfo->output_width, input_buf, 1.289 + input_row, output_buf, num_rows); 1.290 +} 1.291 + 1.292 +GLOBAL(int) 1.293 +jsimd_can_h2v2_downsample (void) 1.294 +{ 1.295 + init_simd(); 1.296 + 1.297 + /* The code is optimised for these values only */ 1.298 + if (BITS_IN_JSAMPLE != 8) 1.299 + return 0; 1.300 + if (sizeof(JDIMENSION) != 4) 1.301 + return 0; 1.302 + 1.303 + if (simd_support & JSIMD_SSE2) 1.304 + return 1; 1.305 + if (simd_support & JSIMD_MMX) 1.306 + return 1; 1.307 + 1.308 + return 0; 1.309 +} 1.310 + 1.311 +GLOBAL(int) 1.312 +jsimd_can_h2v1_downsample (void) 1.313 +{ 1.314 + init_simd(); 1.315 + 1.316 + /* The code is optimised for these values only */ 1.317 + if (BITS_IN_JSAMPLE != 8) 1.318 + return 0; 1.319 + if (sizeof(JDIMENSION) != 4) 1.320 + return 0; 1.321 + 1.322 + if (simd_support & JSIMD_SSE2) 1.323 + return 1; 1.324 + if (simd_support & JSIMD_MMX) 1.325 + return 1; 1.326 + 1.327 + return 0; 1.328 +} 1.329 + 1.330 +GLOBAL(void) 1.331 +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 1.332 + JSAMPARRAY input_data, JSAMPARRAY output_data) 1.333 +{ 1.334 + if (simd_support & JSIMD_SSE2) 1.335 + jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 1.336 + compptr->v_samp_factor, compptr->width_in_blocks, 1.337 + input_data, output_data); 1.338 + else if (simd_support & JSIMD_MMX) 1.339 + jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, 1.340 + compptr->v_samp_factor, compptr->width_in_blocks, 1.341 + input_data, output_data); 1.342 +} 1.343 + 1.344 +GLOBAL(void) 1.345 +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 1.346 + JSAMPARRAY input_data, JSAMPARRAY output_data) 1.347 +{ 1.348 + if (simd_support & JSIMD_SSE2) 1.349 + jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, 1.350 + compptr->v_samp_factor, compptr->width_in_blocks, 1.351 + input_data, output_data); 1.352 + else if (simd_support & JSIMD_MMX) 1.353 + jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, 1.354 + compptr->v_samp_factor, compptr->width_in_blocks, 1.355 + input_data, output_data); 1.356 +} 1.357 + 1.358 +GLOBAL(int) 1.359 +jsimd_can_h2v2_upsample (void) 1.360 +{ 1.361 + init_simd(); 1.362 + 1.363 + /* The code is optimised for these values only */ 1.364 + if (BITS_IN_JSAMPLE != 8) 1.365 + return 0; 1.366 + if (sizeof(JDIMENSION) != 4) 1.367 + return 0; 1.368 + 1.369 + if (simd_support & JSIMD_SSE2) 1.370 + return 1; 1.371 + if (simd_support & JSIMD_MMX) 1.372 + return 1; 1.373 + 1.374 + return 0; 1.375 +} 1.376 + 1.377 +GLOBAL(int) 1.378 +jsimd_can_h2v1_upsample (void) 1.379 +{ 1.380 + init_simd(); 1.381 + 1.382 + /* The code is optimised for these values only */ 1.383 + if (BITS_IN_JSAMPLE != 8) 1.384 + return 0; 1.385 + if (sizeof(JDIMENSION) != 4) 1.386 + return 0; 1.387 + 1.388 + if (simd_support & JSIMD_SSE2) 1.389 + return 1; 1.390 + if (simd_support & JSIMD_MMX) 1.391 + return 1; 1.392 + 1.393 + return 0; 1.394 +} 1.395 + 1.396 +GLOBAL(void) 1.397 +jsimd_h2v2_upsample (j_decompress_ptr cinfo, 1.398 + jpeg_component_info * compptr, 1.399 + JSAMPARRAY input_data, 1.400 + JSAMPARRAY * output_data_ptr) 1.401 +{ 1.402 + if (simd_support & JSIMD_SSE2) 1.403 + jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, 1.404 + cinfo->output_width, input_data, output_data_ptr); 1.405 + else if (simd_support & JSIMD_MMX) 1.406 + jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, 1.407 + cinfo->output_width, input_data, output_data_ptr); 1.408 +} 1.409 + 1.410 +GLOBAL(void) 1.411 +jsimd_h2v1_upsample (j_decompress_ptr cinfo, 1.412 + jpeg_component_info * compptr, 1.413 + JSAMPARRAY input_data, 1.414 + JSAMPARRAY * output_data_ptr) 1.415 +{ 1.416 + if (simd_support & JSIMD_SSE2) 1.417 + jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, 1.418 + cinfo->output_width, input_data, output_data_ptr); 1.419 + else if (simd_support & JSIMD_MMX) 1.420 + jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, 1.421 + cinfo->output_width, input_data, output_data_ptr); 1.422 +} 1.423 + 1.424 +GLOBAL(int) 1.425 +jsimd_can_h2v2_fancy_upsample (void) 1.426 +{ 1.427 + init_simd(); 1.428 + 1.429 + /* The code is optimised for these values only */ 1.430 + if (BITS_IN_JSAMPLE != 8) 1.431 + return 0; 1.432 + if (sizeof(JDIMENSION) != 4) 1.433 + return 0; 1.434 + 1.435 + if ((simd_support & JSIMD_SSE2) && 1.436 + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 1.437 + return 1; 1.438 + if (simd_support & JSIMD_MMX) 1.439 + return 1; 1.440 + 1.441 + return 0; 1.442 +} 1.443 + 1.444 +GLOBAL(int) 1.445 +jsimd_can_h2v1_fancy_upsample (void) 1.446 +{ 1.447 + init_simd(); 1.448 + 1.449 + /* The code is optimised for these values only */ 1.450 + if (BITS_IN_JSAMPLE != 8) 1.451 + return 0; 1.452 + if (sizeof(JDIMENSION) != 4) 1.453 + return 0; 1.454 + 1.455 + if ((simd_support & JSIMD_SSE2) && 1.456 + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 1.457 + return 1; 1.458 + if (simd_support & JSIMD_MMX) 1.459 + return 1; 1.460 + 1.461 + return 0; 1.462 +} 1.463 + 1.464 +GLOBAL(void) 1.465 +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 1.466 + jpeg_component_info * compptr, 1.467 + JSAMPARRAY input_data, 1.468 + JSAMPARRAY * output_data_ptr) 1.469 +{ 1.470 + if ((simd_support & JSIMD_SSE2) && 1.471 + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 1.472 + jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, 1.473 + compptr->downsampled_width, input_data, output_data_ptr); 1.474 + else if (simd_support & JSIMD_MMX) 1.475 + jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, 1.476 + compptr->downsampled_width, input_data, output_data_ptr); 1.477 +} 1.478 + 1.479 +GLOBAL(void) 1.480 +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 1.481 + jpeg_component_info * compptr, 1.482 + JSAMPARRAY input_data, 1.483 + JSAMPARRAY * output_data_ptr) 1.484 +{ 1.485 + if ((simd_support & JSIMD_SSE2) && 1.486 + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) 1.487 + jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, 1.488 + compptr->downsampled_width, input_data, output_data_ptr); 1.489 + else if (simd_support & JSIMD_MMX) 1.490 + jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, 1.491 + compptr->downsampled_width, input_data, output_data_ptr); 1.492 +} 1.493 + 1.494 +GLOBAL(int) 1.495 +jsimd_can_h2v2_merged_upsample (void) 1.496 +{ 1.497 + init_simd(); 1.498 + 1.499 + /* The code is optimised for these values only */ 1.500 + if (BITS_IN_JSAMPLE != 8) 1.501 + return 0; 1.502 + if (sizeof(JDIMENSION) != 4) 1.503 + return 0; 1.504 + 1.505 + if ((simd_support & JSIMD_SSE2) && 1.506 + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 1.507 + return 1; 1.508 + if (simd_support & JSIMD_MMX) 1.509 + return 1; 1.510 + 1.511 + return 0; 1.512 +} 1.513 + 1.514 +GLOBAL(int) 1.515 +jsimd_can_h2v1_merged_upsample (void) 1.516 +{ 1.517 + init_simd(); 1.518 + 1.519 + /* The code is optimised for these values only */ 1.520 + if (BITS_IN_JSAMPLE != 8) 1.521 + return 0; 1.522 + if (sizeof(JDIMENSION) != 4) 1.523 + return 0; 1.524 + 1.525 + if ((simd_support & JSIMD_SSE2) && 1.526 + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 1.527 + return 1; 1.528 + if (simd_support & JSIMD_MMX) 1.529 + return 1; 1.530 + 1.531 + return 0; 1.532 +} 1.533 + 1.534 +GLOBAL(void) 1.535 +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 1.536 + JSAMPIMAGE input_buf, 1.537 + JDIMENSION in_row_group_ctr, 1.538 + JSAMPARRAY output_buf) 1.539 +{ 1.540 + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 1.541 + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 1.542 + 1.543 + switch(cinfo->out_color_space) 1.544 + { 1.545 + case JCS_EXT_RGB: 1.546 + sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; 1.547 + mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; 1.548 + break; 1.549 + case JCS_EXT_RGBX: 1.550 + case JCS_EXT_RGBA: 1.551 + sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; 1.552 + mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; 1.553 + break; 1.554 + case JCS_EXT_BGR: 1.555 + sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; 1.556 + mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; 1.557 + break; 1.558 + case JCS_EXT_BGRX: 1.559 + case JCS_EXT_BGRA: 1.560 + sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; 1.561 + mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; 1.562 + break; 1.563 + case JCS_EXT_XBGR: 1.564 + case JCS_EXT_ABGR: 1.565 + sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; 1.566 + mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; 1.567 + break; 1.568 + case JCS_EXT_XRGB: 1.569 + case JCS_EXT_ARGB: 1.570 + sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; 1.571 + mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; 1.572 + break; 1.573 + default: 1.574 + sse2fct=jsimd_h2v2_merged_upsample_sse2; 1.575 + mmxfct=jsimd_h2v2_merged_upsample_mmx; 1.576 + break; 1.577 + } 1.578 + 1.579 + if ((simd_support & JSIMD_SSE2) && 1.580 + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 1.581 + sse2fct(cinfo->output_width, input_buf, 1.582 + in_row_group_ctr, output_buf); 1.583 + else if (simd_support & JSIMD_MMX) 1.584 + mmxfct(cinfo->output_width, input_buf, 1.585 + in_row_group_ctr, output_buf); 1.586 +} 1.587 + 1.588 +GLOBAL(void) 1.589 +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 1.590 + JSAMPIMAGE input_buf, 1.591 + JDIMENSION in_row_group_ctr, 1.592 + JSAMPARRAY output_buf) 1.593 +{ 1.594 + void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 1.595 + void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); 1.596 + 1.597 + switch(cinfo->out_color_space) 1.598 + { 1.599 + case JCS_EXT_RGB: 1.600 + sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; 1.601 + mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; 1.602 + break; 1.603 + case JCS_EXT_RGBX: 1.604 + case JCS_EXT_RGBA: 1.605 + sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; 1.606 + mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; 1.607 + break; 1.608 + case JCS_EXT_BGR: 1.609 + sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; 1.610 + mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; 1.611 + break; 1.612 + case JCS_EXT_BGRX: 1.613 + case JCS_EXT_BGRA: 1.614 + sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; 1.615 + mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; 1.616 + break; 1.617 + case JCS_EXT_XBGR: 1.618 + case JCS_EXT_ABGR: 1.619 + sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; 1.620 + mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; 1.621 + break; 1.622 + case JCS_EXT_XRGB: 1.623 + case JCS_EXT_ARGB: 1.624 + sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; 1.625 + mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; 1.626 + break; 1.627 + default: 1.628 + sse2fct=jsimd_h2v1_merged_upsample_sse2; 1.629 + mmxfct=jsimd_h2v1_merged_upsample_mmx; 1.630 + break; 1.631 + } 1.632 + 1.633 + if ((simd_support & JSIMD_SSE2) && 1.634 + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) 1.635 + sse2fct(cinfo->output_width, input_buf, 1.636 + in_row_group_ctr, output_buf); 1.637 + else if (simd_support & JSIMD_MMX) 1.638 + mmxfct(cinfo->output_width, input_buf, 1.639 + in_row_group_ctr, output_buf); 1.640 +} 1.641 + 1.642 +GLOBAL(int) 1.643 +jsimd_can_convsamp (void) 1.644 +{ 1.645 + init_simd(); 1.646 + 1.647 + /* The code is optimised for these values only */ 1.648 + if (DCTSIZE != 8) 1.649 + return 0; 1.650 + if (BITS_IN_JSAMPLE != 8) 1.651 + return 0; 1.652 + if (sizeof(JDIMENSION) != 4) 1.653 + return 0; 1.654 + if (sizeof(DCTELEM) != 2) 1.655 + return 0; 1.656 + 1.657 + if (simd_support & JSIMD_SSE2) 1.658 + return 1; 1.659 + if (simd_support & JSIMD_MMX) 1.660 + return 1; 1.661 + 1.662 + return 0; 1.663 +} 1.664 + 1.665 +GLOBAL(int) 1.666 +jsimd_can_convsamp_float (void) 1.667 +{ 1.668 + init_simd(); 1.669 + 1.670 + /* The code is optimised for these values only */ 1.671 + if (DCTSIZE != 8) 1.672 + return 0; 1.673 + if (BITS_IN_JSAMPLE != 8) 1.674 + return 0; 1.675 + if (sizeof(JDIMENSION) != 4) 1.676 + return 0; 1.677 + if (sizeof(FAST_FLOAT) != 4) 1.678 + return 0; 1.679 + 1.680 + if (simd_support & JSIMD_SSE2) 1.681 + return 1; 1.682 + if (simd_support & JSIMD_SSE) 1.683 + return 1; 1.684 + if (simd_support & JSIMD_3DNOW) 1.685 + return 1; 1.686 + 1.687 + return 0; 1.688 +} 1.689 + 1.690 +GLOBAL(void) 1.691 +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 1.692 + DCTELEM * workspace) 1.693 +{ 1.694 + if (simd_support & JSIMD_SSE2) 1.695 + jsimd_convsamp_sse2(sample_data, start_col, workspace); 1.696 + else if (simd_support & JSIMD_MMX) 1.697 + jsimd_convsamp_mmx(sample_data, start_col, workspace); 1.698 +} 1.699 + 1.700 +GLOBAL(void) 1.701 +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 1.702 + FAST_FLOAT * workspace) 1.703 +{ 1.704 + if (simd_support & JSIMD_SSE2) 1.705 + jsimd_convsamp_float_sse2(sample_data, start_col, workspace); 1.706 + else if (simd_support & JSIMD_SSE) 1.707 + jsimd_convsamp_float_sse(sample_data, start_col, workspace); 1.708 + else if (simd_support & JSIMD_3DNOW) 1.709 + jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); 1.710 +} 1.711 + 1.712 +GLOBAL(int) 1.713 +jsimd_can_fdct_islow (void) 1.714 +{ 1.715 + init_simd(); 1.716 + 1.717 + /* The code is optimised for these values only */ 1.718 + if (DCTSIZE != 8) 1.719 + return 0; 1.720 + if (sizeof(DCTELEM) != 2) 1.721 + return 0; 1.722 + 1.723 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 1.724 + return 1; 1.725 + if (simd_support & JSIMD_MMX) 1.726 + return 1; 1.727 + 1.728 + return 0; 1.729 +} 1.730 + 1.731 +GLOBAL(int) 1.732 +jsimd_can_fdct_ifast (void) 1.733 +{ 1.734 + init_simd(); 1.735 + 1.736 + /* The code is optimised for these values only */ 1.737 + if (DCTSIZE != 8) 1.738 + return 0; 1.739 + if (sizeof(DCTELEM) != 2) 1.740 + return 0; 1.741 + 1.742 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) 1.743 + return 1; 1.744 + if (simd_support & JSIMD_MMX) 1.745 + return 1; 1.746 + 1.747 + return 0; 1.748 +} 1.749 + 1.750 +GLOBAL(int) 1.751 +jsimd_can_fdct_float (void) 1.752 +{ 1.753 + init_simd(); 1.754 + 1.755 + /* The code is optimised for these values only */ 1.756 + if (DCTSIZE != 8) 1.757 + return 0; 1.758 + if (sizeof(FAST_FLOAT) != 4) 1.759 + return 0; 1.760 + 1.761 + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 1.762 + return 1; 1.763 + if (simd_support & JSIMD_3DNOW) 1.764 + return 1; 1.765 + 1.766 + return 0; 1.767 +} 1.768 + 1.769 +GLOBAL(void) 1.770 +jsimd_fdct_islow (DCTELEM * data) 1.771 +{ 1.772 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 1.773 + jsimd_fdct_islow_sse2(data); 1.774 + else if (simd_support & JSIMD_MMX) 1.775 + jsimd_fdct_islow_mmx(data); 1.776 +} 1.777 + 1.778 +GLOBAL(void) 1.779 +jsimd_fdct_ifast (DCTELEM * data) 1.780 +{ 1.781 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) 1.782 + jsimd_fdct_ifast_sse2(data); 1.783 + else if (simd_support & JSIMD_MMX) 1.784 + jsimd_fdct_ifast_mmx(data); 1.785 +} 1.786 + 1.787 +GLOBAL(void) 1.788 +jsimd_fdct_float (FAST_FLOAT * data) 1.789 +{ 1.790 + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) 1.791 + jsimd_fdct_float_sse(data); 1.792 + else if (simd_support & JSIMD_3DNOW) 1.793 + jsimd_fdct_float_3dnow(data); 1.794 +} 1.795 + 1.796 +GLOBAL(int) 1.797 +jsimd_can_quantize (void) 1.798 +{ 1.799 + init_simd(); 1.800 + 1.801 + /* The code is optimised for these values only */ 1.802 + if (DCTSIZE != 8) 1.803 + return 0; 1.804 + if (sizeof(JCOEF) != 2) 1.805 + return 0; 1.806 + if (sizeof(DCTELEM) != 2) 1.807 + return 0; 1.808 + 1.809 + if (simd_support & JSIMD_SSE2) 1.810 + return 1; 1.811 + if (simd_support & JSIMD_MMX) 1.812 + return 1; 1.813 + 1.814 + return 0; 1.815 +} 1.816 + 1.817 +GLOBAL(int) 1.818 +jsimd_can_quantize_float (void) 1.819 +{ 1.820 + init_simd(); 1.821 + 1.822 + /* The code is optimised for these values only */ 1.823 + if (DCTSIZE != 8) 1.824 + return 0; 1.825 + if (sizeof(JCOEF) != 2) 1.826 + return 0; 1.827 + if (sizeof(FAST_FLOAT) != 4) 1.828 + return 0; 1.829 + 1.830 + if (simd_support & JSIMD_SSE2) 1.831 + return 1; 1.832 + if (simd_support & JSIMD_SSE) 1.833 + return 1; 1.834 + if (simd_support & JSIMD_3DNOW) 1.835 + return 1; 1.836 + 1.837 + return 0; 1.838 +} 1.839 + 1.840 +GLOBAL(void) 1.841 +jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, 1.842 + DCTELEM * workspace) 1.843 +{ 1.844 + if (simd_support & JSIMD_SSE2) 1.845 + jsimd_quantize_sse2(coef_block, divisors, workspace); 1.846 + else if (simd_support & JSIMD_MMX) 1.847 + jsimd_quantize_mmx(coef_block, divisors, workspace); 1.848 +} 1.849 + 1.850 +GLOBAL(void) 1.851 +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, 1.852 + FAST_FLOAT * workspace) 1.853 +{ 1.854 + if (simd_support & JSIMD_SSE2) 1.855 + jsimd_quantize_float_sse2(coef_block, divisors, workspace); 1.856 + else if (simd_support & JSIMD_SSE) 1.857 + jsimd_quantize_float_sse(coef_block, divisors, workspace); 1.858 + else if (simd_support & JSIMD_3DNOW) 1.859 + jsimd_quantize_float_3dnow(coef_block, divisors, workspace); 1.860 +} 1.861 + 1.862 +GLOBAL(int) 1.863 +jsimd_can_idct_2x2 (void) 1.864 +{ 1.865 + init_simd(); 1.866 + 1.867 + /* The code is optimised for these values only */ 1.868 + if (DCTSIZE != 8) 1.869 + return 0; 1.870 + if (sizeof(JCOEF) != 2) 1.871 + return 0; 1.872 + if (BITS_IN_JSAMPLE != 8) 1.873 + return 0; 1.874 + if (sizeof(JDIMENSION) != 4) 1.875 + return 0; 1.876 + if (sizeof(ISLOW_MULT_TYPE) != 2) 1.877 + return 0; 1.878 + 1.879 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 1.880 + return 1; 1.881 + if (simd_support & JSIMD_MMX) 1.882 + return 1; 1.883 + 1.884 + return 0; 1.885 +} 1.886 + 1.887 +GLOBAL(int) 1.888 +jsimd_can_idct_4x4 (void) 1.889 +{ 1.890 + init_simd(); 1.891 + 1.892 + /* The code is optimised for these values only */ 1.893 + if (DCTSIZE != 8) 1.894 + return 0; 1.895 + if (sizeof(JCOEF) != 2) 1.896 + return 0; 1.897 + if (BITS_IN_JSAMPLE != 8) 1.898 + return 0; 1.899 + if (sizeof(JDIMENSION) != 4) 1.900 + return 0; 1.901 + if (sizeof(ISLOW_MULT_TYPE) != 2) 1.902 + return 0; 1.903 + 1.904 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 1.905 + return 1; 1.906 + if (simd_support & JSIMD_MMX) 1.907 + return 1; 1.908 + 1.909 + return 0; 1.910 +} 1.911 + 1.912 +GLOBAL(void) 1.913 +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.914 + JCOEFPTR coef_block, JSAMPARRAY output_buf, 1.915 + JDIMENSION output_col) 1.916 +{ 1.917 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 1.918 + jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); 1.919 + else if (simd_support & JSIMD_MMX) 1.920 + jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); 1.921 +} 1.922 + 1.923 +GLOBAL(void) 1.924 +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.925 + JCOEFPTR coef_block, JSAMPARRAY output_buf, 1.926 + JDIMENSION output_col) 1.927 +{ 1.928 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) 1.929 + jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); 1.930 + else if (simd_support & JSIMD_MMX) 1.931 + jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); 1.932 +} 1.933 + 1.934 +GLOBAL(int) 1.935 +jsimd_can_idct_islow (void) 1.936 +{ 1.937 + init_simd(); 1.938 + 1.939 + /* The code is optimised for these values only */ 1.940 + if (DCTSIZE != 8) 1.941 + return 0; 1.942 + if (sizeof(JCOEF) != 2) 1.943 + return 0; 1.944 + if (BITS_IN_JSAMPLE != 8) 1.945 + return 0; 1.946 + if (sizeof(JDIMENSION) != 4) 1.947 + return 0; 1.948 + if (sizeof(ISLOW_MULT_TYPE) != 2) 1.949 + return 0; 1.950 + 1.951 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 1.952 + return 1; 1.953 + if (simd_support & JSIMD_MMX) 1.954 + return 1; 1.955 + 1.956 + return 0; 1.957 +} 1.958 + 1.959 +GLOBAL(int) 1.960 +jsimd_can_idct_ifast (void) 1.961 +{ 1.962 + init_simd(); 1.963 + 1.964 + /* The code is optimised for these values only */ 1.965 + if (DCTSIZE != 8) 1.966 + return 0; 1.967 + if (sizeof(JCOEF) != 2) 1.968 + return 0; 1.969 + if (BITS_IN_JSAMPLE != 8) 1.970 + return 0; 1.971 + if (sizeof(JDIMENSION) != 4) 1.972 + return 0; 1.973 + if (sizeof(IFAST_MULT_TYPE) != 2) 1.974 + return 0; 1.975 + if (IFAST_SCALE_BITS != 2) 1.976 + return 0; 1.977 + 1.978 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 1.979 + return 1; 1.980 + if (simd_support & JSIMD_MMX) 1.981 + return 1; 1.982 + 1.983 + return 0; 1.984 +} 1.985 + 1.986 +GLOBAL(int) 1.987 +jsimd_can_idct_float (void) 1.988 +{ 1.989 + init_simd(); 1.990 + 1.991 + if (DCTSIZE != 8) 1.992 + return 0; 1.993 + if (sizeof(JCOEF) != 2) 1.994 + return 0; 1.995 + if (BITS_IN_JSAMPLE != 8) 1.996 + return 0; 1.997 + if (sizeof(JDIMENSION) != 4) 1.998 + return 0; 1.999 + if (sizeof(FAST_FLOAT) != 4) 1.1000 + return 0; 1.1001 + if (sizeof(FLOAT_MULT_TYPE) != 4) 1.1002 + return 0; 1.1003 + 1.1004 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 1.1005 + return 1; 1.1006 + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) 1.1007 + return 1; 1.1008 + if (simd_support & JSIMD_3DNOW) 1.1009 + return 1; 1.1010 + 1.1011 + return 0; 1.1012 +} 1.1013 + 1.1014 +GLOBAL(void) 1.1015 +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.1016 + JCOEFPTR coef_block, JSAMPARRAY output_buf, 1.1017 + JDIMENSION output_col) 1.1018 +{ 1.1019 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) 1.1020 + jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col); 1.1021 + else if (simd_support & JSIMD_MMX) 1.1022 + jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col); 1.1023 +} 1.1024 + 1.1025 +GLOBAL(void) 1.1026 +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.1027 + JCOEFPTR coef_block, JSAMPARRAY output_buf, 1.1028 + JDIMENSION output_col) 1.1029 +{ 1.1030 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) 1.1031 + jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col); 1.1032 + else if (simd_support & JSIMD_MMX) 1.1033 + jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col); 1.1034 +} 1.1035 + 1.1036 +GLOBAL(void) 1.1037 +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.1038 + JCOEFPTR coef_block, JSAMPARRAY output_buf, 1.1039 + JDIMENSION output_col) 1.1040 +{ 1.1041 + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) 1.1042 + jsimd_idct_float_sse2(compptr->dct_table, coef_block, 1.1043 + output_buf, output_col); 1.1044 + else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) 1.1045 + jsimd_idct_float_sse(compptr->dct_table, coef_block, 1.1046 + output_buf, output_col); 1.1047 + else if (simd_support & JSIMD_3DNOW) 1.1048 + jsimd_idct_float_3dnow(compptr->dct_table, coef_block, 1.1049 + output_buf, output_col); 1.1050 +} 1.1051 +