media/libjpeg/simd/jsimd_x86_64.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libjpeg/simd/jsimd_x86_64.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,753 @@
     1.4 +/*
     1.5 + * jsimd_x86_64.c
     1.6 + *
     1.7 + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
     1.8 + * Copyright 2009-2011 D. R. Commander
     1.9 + * 
    1.10 + * Based on the x86 SIMD extension for IJG JPEG library,
    1.11 + * Copyright (C) 1999-2006, MIYASAKA Masaru.
    1.12 + * For conditions of distribution and use, see copyright notice in jsimdext.inc
    1.13 + *
    1.14 + * This file contains the interface between the "normal" portions
    1.15 + * of the library and the SIMD implementations when running on a
    1.16 + * x86_64 architecture.
    1.17 + */
    1.18 +
    1.19 +#define JPEG_INTERNALS
    1.20 +#include "../jinclude.h"
    1.21 +#include "../jpeglib.h"
    1.22 +#include "../jsimd.h"
    1.23 +#include "../jdct.h"
    1.24 +#include "../jsimddct.h"
    1.25 +#include "jsimd.h"
    1.26 +
    1.27 +/*
    1.28 + * In the PIC cases, we have no guarantee that constants will keep
    1.29 + * their alignment. This macro allows us to verify it at runtime.
    1.30 + */
    1.31 +#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
    1.32 +
    1.33 +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
    1.34 +
    1.35 +GLOBAL(int)
    1.36 +jsimd_can_rgb_ycc (void)
    1.37 +{
    1.38 +  /* The code is optimised for these values only */
    1.39 +  if (BITS_IN_JSAMPLE != 8)
    1.40 +    return 0;
    1.41 +  if (sizeof(JDIMENSION) != 4)
    1.42 +    return 0;
    1.43 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    1.44 +    return 0;
    1.45 +
    1.46 +  if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
    1.47 +    return 0;
    1.48 +
    1.49 +  return 1;
    1.50 +}
    1.51 +
    1.52 +GLOBAL(int)
    1.53 +jsimd_can_rgb_gray (void)
    1.54 +{
    1.55 +  /* The code is optimised for these values only */
    1.56 +  if (BITS_IN_JSAMPLE != 8)
    1.57 +    return 0;
    1.58 +  if (sizeof(JDIMENSION) != 4)
    1.59 +    return 0;
    1.60 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    1.61 +    return 0;
    1.62 +
    1.63 +  if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
    1.64 +    return 0;
    1.65 +
    1.66 +  return 1;
    1.67 +}
    1.68 +
    1.69 +GLOBAL(int)
    1.70 +jsimd_can_ycc_rgb (void)
    1.71 +{
    1.72 +  /* The code is optimised for these values only */
    1.73 +  if (BITS_IN_JSAMPLE != 8)
    1.74 +    return 0;
    1.75 +  if (sizeof(JDIMENSION) != 4)
    1.76 +    return 0;
    1.77 +  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
    1.78 +    return 0;
    1.79 +
    1.80 +  if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
    1.81 +    return 0;
    1.82 +
    1.83 +  return 1;
    1.84 +}
    1.85 +
    1.86 +GLOBAL(void)
    1.87 +jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
    1.88 +                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
    1.89 +                       JDIMENSION output_row, int num_rows)
    1.90 +{
    1.91 +  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
    1.92 +
    1.93 +  switch(cinfo->in_color_space)
    1.94 +  {
    1.95 +    case JCS_EXT_RGB:
    1.96 +      sse2fct=jsimd_extrgb_ycc_convert_sse2;
    1.97 +      break;
    1.98 +    case JCS_EXT_RGBX:
    1.99 +    case JCS_EXT_RGBA:
   1.100 +      sse2fct=jsimd_extrgbx_ycc_convert_sse2;
   1.101 +      break;
   1.102 +    case JCS_EXT_BGR:
   1.103 +      sse2fct=jsimd_extbgr_ycc_convert_sse2;
   1.104 +      break;
   1.105 +    case JCS_EXT_BGRX:
   1.106 +    case JCS_EXT_BGRA:
   1.107 +      sse2fct=jsimd_extbgrx_ycc_convert_sse2;
   1.108 +      break;
   1.109 +    case JCS_EXT_XBGR:
   1.110 +    case JCS_EXT_ABGR:
   1.111 +      sse2fct=jsimd_extxbgr_ycc_convert_sse2;
   1.112 +      break;
   1.113 +    case JCS_EXT_XRGB:
   1.114 +    case JCS_EXT_ARGB:
   1.115 +      sse2fct=jsimd_extxrgb_ycc_convert_sse2;
   1.116 +      break;
   1.117 +    default:
   1.118 +      sse2fct=jsimd_rgb_ycc_convert_sse2;
   1.119 +      break;
   1.120 +  }
   1.121 +
   1.122 +  sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
   1.123 +}
   1.124 +
   1.125 +GLOBAL(void)
   1.126 +jsimd_rgb_gray_convert (j_compress_ptr cinfo,
   1.127 +                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
   1.128 +                        JDIMENSION output_row, int num_rows)
   1.129 +{
   1.130 +  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   1.131 +
   1.132 +  switch(cinfo->in_color_space)
   1.133 +  {
   1.134 +    case JCS_EXT_RGB:
   1.135 +      sse2fct=jsimd_extrgb_gray_convert_sse2;
   1.136 +      break;
   1.137 +    case JCS_EXT_RGBX:
   1.138 +    case JCS_EXT_RGBA:
   1.139 +      sse2fct=jsimd_extrgbx_gray_convert_sse2;
   1.140 +      break;
   1.141 +    case JCS_EXT_BGR:
   1.142 +      sse2fct=jsimd_extbgr_gray_convert_sse2;
   1.143 +      break;
   1.144 +    case JCS_EXT_BGRX:
   1.145 +    case JCS_EXT_BGRA:
   1.146 +      sse2fct=jsimd_extbgrx_gray_convert_sse2;
   1.147 +      break;
   1.148 +    case JCS_EXT_XBGR:
   1.149 +    case JCS_EXT_ABGR:
   1.150 +      sse2fct=jsimd_extxbgr_gray_convert_sse2;
   1.151 +      break;
   1.152 +    case JCS_EXT_XRGB:
   1.153 +    case JCS_EXT_ARGB:
   1.154 +      sse2fct=jsimd_extxrgb_gray_convert_sse2;
   1.155 +      break;
   1.156 +    default:
   1.157 +      sse2fct=jsimd_rgb_gray_convert_sse2;
   1.158 +      break;
   1.159 +  }
   1.160 +
   1.161 +  sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
   1.162 +}
   1.163 +
   1.164 +GLOBAL(void)
   1.165 +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
   1.166 +                       JSAMPIMAGE input_buf, JDIMENSION input_row,
   1.167 +                       JSAMPARRAY output_buf, int num_rows)
   1.168 +{
   1.169 +  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   1.170 +
   1.171 +  switch(cinfo->out_color_space)
   1.172 +  {
   1.173 +    case JCS_EXT_RGB:
   1.174 +      sse2fct=jsimd_ycc_extrgb_convert_sse2;
   1.175 +      break;
   1.176 +    case JCS_EXT_RGBX:
   1.177 +    case JCS_EXT_RGBA:
   1.178 +      sse2fct=jsimd_ycc_extrgbx_convert_sse2;
   1.179 +      break;
   1.180 +    case JCS_EXT_BGR:
   1.181 +      sse2fct=jsimd_ycc_extbgr_convert_sse2;
   1.182 +      break;
   1.183 +    case JCS_EXT_BGRX:
   1.184 +    case JCS_EXT_BGRA:
   1.185 +      sse2fct=jsimd_ycc_extbgrx_convert_sse2;
   1.186 +      break;
   1.187 +    case JCS_EXT_XBGR:
   1.188 +    case JCS_EXT_ABGR:
   1.189 +      sse2fct=jsimd_ycc_extxbgr_convert_sse2;
   1.190 +      break;
   1.191 +    case JCS_EXT_XRGB:
   1.192 +    case JCS_EXT_ARGB:
   1.193 +      sse2fct=jsimd_ycc_extxrgb_convert_sse2;
   1.194 +      break;
   1.195 +    default:
   1.196 +      sse2fct=jsimd_ycc_rgb_convert_sse2;
   1.197 +      break;
   1.198 +  }
   1.199 +
   1.200 +  sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
   1.201 +}
   1.202 +
   1.203 +GLOBAL(int)
   1.204 +jsimd_can_h2v2_downsample (void)
   1.205 +{
   1.206 +  /* The code is optimised for these values only */
   1.207 +  if (BITS_IN_JSAMPLE != 8)
   1.208 +    return 0;
   1.209 +  if (sizeof(JDIMENSION) != 4)
   1.210 +    return 0;
   1.211 +
   1.212 +  return 1;
   1.213 +}
   1.214 +
   1.215 +GLOBAL(int)
   1.216 +jsimd_can_h2v1_downsample (void)
   1.217 +{
   1.218 +  /* The code is optimised for these values only */
   1.219 +  if (BITS_IN_JSAMPLE != 8)
   1.220 +    return 0;
   1.221 +  if (sizeof(JDIMENSION) != 4)
   1.222 +    return 0;
   1.223 +
   1.224 +  return 1;
   1.225 +}
   1.226 +
   1.227 +GLOBAL(void)
   1.228 +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
   1.229 +                       JSAMPARRAY input_data, JSAMPARRAY output_data)
   1.230 +{
   1.231 +  jsimd_h2v2_downsample_sse2(cinfo->image_width,
   1.232 +                             cinfo->max_v_samp_factor,
   1.233 +                             compptr->v_samp_factor,
   1.234 +                             compptr->width_in_blocks,
   1.235 +                             input_data, output_data);
   1.236 +}
   1.237 +
   1.238 +GLOBAL(void)
   1.239 +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
   1.240 +                       JSAMPARRAY input_data, JSAMPARRAY output_data)
   1.241 +{
   1.242 +  jsimd_h2v1_downsample_sse2(cinfo->image_width,
   1.243 +                             cinfo->max_v_samp_factor,
   1.244 +                             compptr->v_samp_factor,
   1.245 +                             compptr->width_in_blocks,
   1.246 +                             input_data, output_data);
   1.247 +}
   1.248 +
   1.249 +GLOBAL(int)
   1.250 +jsimd_can_h2v2_upsample (void)
   1.251 +{
   1.252 +  /* The code is optimised for these values only */
   1.253 +  if (BITS_IN_JSAMPLE != 8)
   1.254 +    return 0;
   1.255 +  if (sizeof(JDIMENSION) != 4)
   1.256 +    return 0;
   1.257 +
   1.258 +  return 1;
   1.259 +}
   1.260 +
   1.261 +GLOBAL(int)
   1.262 +jsimd_can_h2v1_upsample (void)
   1.263 +{
   1.264 +  /* The code is optimised for these values only */
   1.265 +  if (BITS_IN_JSAMPLE != 8)
   1.266 +    return 0;
   1.267 +  if (sizeof(JDIMENSION) != 4)
   1.268 +    return 0;
   1.269 +
   1.270 +  return 1;
   1.271 +}
   1.272 +
   1.273 +GLOBAL(void)
   1.274 +jsimd_h2v2_upsample (j_decompress_ptr cinfo,
   1.275 +                     jpeg_component_info * compptr, 
   1.276 +                     JSAMPARRAY input_data,
   1.277 +                     JSAMPARRAY * output_data_ptr)
   1.278 +{
   1.279 +  jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
   1.280 +                           cinfo->output_width,
   1.281 +                           input_data, output_data_ptr);
   1.282 +}
   1.283 +
   1.284 +GLOBAL(void)
   1.285 +jsimd_h2v1_upsample (j_decompress_ptr cinfo,
   1.286 +                     jpeg_component_info * compptr, 
   1.287 +                     JSAMPARRAY input_data,
   1.288 +                     JSAMPARRAY * output_data_ptr)
   1.289 +{
   1.290 +  jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
   1.291 +                           cinfo->output_width,
   1.292 +                           input_data, output_data_ptr);
   1.293 +}
   1.294 +
   1.295 +GLOBAL(int)
   1.296 +jsimd_can_h2v2_fancy_upsample (void)
   1.297 +{
   1.298 +  /* The code is optimised for these values only */
   1.299 +  if (BITS_IN_JSAMPLE != 8)
   1.300 +    return 0;
   1.301 +  if (sizeof(JDIMENSION) != 4)
   1.302 +    return 0;
   1.303 +
   1.304 +  if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
   1.305 +    return 0;
   1.306 +
   1.307 +  return 1;
   1.308 +}
   1.309 +
   1.310 +GLOBAL(int)
   1.311 +jsimd_can_h2v1_fancy_upsample (void)
   1.312 +{
   1.313 +  /* The code is optimised for these values only */
   1.314 +  if (BITS_IN_JSAMPLE != 8)
   1.315 +    return 0;
   1.316 +  if (sizeof(JDIMENSION) != 4)
   1.317 +    return 0;
   1.318 +
   1.319 +  if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
   1.320 +    return 0;
   1.321 +
   1.322 +  return 1;
   1.323 +}
   1.324 +
   1.325 +GLOBAL(void)
   1.326 +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
   1.327 +                           jpeg_component_info * compptr, 
   1.328 +                           JSAMPARRAY input_data,
   1.329 +                           JSAMPARRAY * output_data_ptr)
   1.330 +{
   1.331 +  jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
   1.332 +                                 compptr->downsampled_width,
   1.333 +                                 input_data, output_data_ptr);
   1.334 +}
   1.335 +
   1.336 +GLOBAL(void)
   1.337 +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
   1.338 +                           jpeg_component_info * compptr, 
   1.339 +                           JSAMPARRAY input_data,
   1.340 +                           JSAMPARRAY * output_data_ptr)
   1.341 +{
   1.342 +  jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
   1.343 +                                 compptr->downsampled_width,
   1.344 +                                 input_data, output_data_ptr);
   1.345 +}
   1.346 +
   1.347 +GLOBAL(int)
   1.348 +jsimd_can_h2v2_merged_upsample (void)
   1.349 +{
   1.350 +  /* The code is optimised for these values only */
   1.351 +  if (BITS_IN_JSAMPLE != 8)
   1.352 +    return 0;
   1.353 +  if (sizeof(JDIMENSION) != 4)
   1.354 +    return 0;
   1.355 +
   1.356 +  if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
   1.357 +    return 0;
   1.358 +
   1.359 +  return 1;
   1.360 +}
   1.361 +
   1.362 +GLOBAL(int)
   1.363 +jsimd_can_h2v1_merged_upsample (void)
   1.364 +{
   1.365 +  /* The code is optimised for these values only */
   1.366 +  if (BITS_IN_JSAMPLE != 8)
   1.367 +    return 0;
   1.368 +  if (sizeof(JDIMENSION) != 4)
   1.369 +    return 0;
   1.370 +
   1.371 +  if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
   1.372 +    return 0;
   1.373 +
   1.374 +  return 1;
   1.375 +}
   1.376 +
   1.377 +GLOBAL(void)
   1.378 +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
   1.379 +                            JSAMPIMAGE input_buf,
   1.380 +                            JDIMENSION in_row_group_ctr,
   1.381 +                            JSAMPARRAY output_buf)
   1.382 +{
   1.383 +  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   1.384 +
   1.385 +  switch(cinfo->out_color_space)
   1.386 +  {
   1.387 +    case JCS_EXT_RGB:
   1.388 +      sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
   1.389 +      break;
   1.390 +    case JCS_EXT_RGBX:
   1.391 +    case JCS_EXT_RGBA:
   1.392 +      sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
   1.393 +      break;
   1.394 +    case JCS_EXT_BGR:
   1.395 +      sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
   1.396 +      break;
   1.397 +    case JCS_EXT_BGRX:
   1.398 +    case JCS_EXT_BGRA:
   1.399 +      sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
   1.400 +      break;
   1.401 +    case JCS_EXT_XBGR:
   1.402 +    case JCS_EXT_ABGR:
   1.403 +      sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
   1.404 +      break;
   1.405 +    case JCS_EXT_XRGB:
   1.406 +    case JCS_EXT_ARGB:
   1.407 +      sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
   1.408 +      break;
   1.409 +    default:
   1.410 +      sse2fct=jsimd_h2v2_merged_upsample_sse2;
   1.411 +      break;
   1.412 +  }
   1.413 +
   1.414 +  sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
   1.415 +}
   1.416 +
   1.417 +GLOBAL(void)
   1.418 +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
   1.419 +                            JSAMPIMAGE input_buf,
   1.420 +                            JDIMENSION in_row_group_ctr,
   1.421 +                            JSAMPARRAY output_buf)
   1.422 +{
   1.423 +  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   1.424 +
   1.425 +  switch(cinfo->out_color_space)
   1.426 +  {
   1.427 +    case JCS_EXT_RGB:
   1.428 +      sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
   1.429 +      break;
   1.430 +    case JCS_EXT_RGBX:
   1.431 +    case JCS_EXT_RGBA:
   1.432 +      sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
   1.433 +      break;
   1.434 +    case JCS_EXT_BGR:
   1.435 +      sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
   1.436 +      break;
   1.437 +    case JCS_EXT_BGRX:
   1.438 +    case JCS_EXT_BGRA:
   1.439 +      sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
   1.440 +      break;
   1.441 +    case JCS_EXT_XBGR:
   1.442 +    case JCS_EXT_ABGR:
   1.443 +      sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
   1.444 +      break;
   1.445 +    case JCS_EXT_XRGB:
   1.446 +    case JCS_EXT_ARGB:
   1.447 +      sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
   1.448 +      break;
   1.449 +    default:
   1.450 +      sse2fct=jsimd_h2v1_merged_upsample_sse2;
   1.451 +      break;
   1.452 +  }
   1.453 +
   1.454 +  sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
   1.455 +}
   1.456 +
   1.457 +GLOBAL(int)
   1.458 +jsimd_can_convsamp (void)
   1.459 +{
   1.460 +  /* The code is optimised for these values only */
   1.461 +  if (DCTSIZE != 8)
   1.462 +    return 0;
   1.463 +  if (BITS_IN_JSAMPLE != 8)
   1.464 +    return 0;
   1.465 +  if (sizeof(JDIMENSION) != 4)
   1.466 +    return 0;
   1.467 +  if (sizeof(DCTELEM) != 2)
   1.468 +    return 0;
   1.469 +
   1.470 +  return 1;
   1.471 +}
   1.472 +
   1.473 +GLOBAL(int)
   1.474 +jsimd_can_convsamp_float (void)
   1.475 +{
   1.476 +  /* The code is optimised for these values only */
   1.477 +  if (DCTSIZE != 8)
   1.478 +    return 0;
   1.479 +  if (BITS_IN_JSAMPLE != 8)
   1.480 +    return 0;
   1.481 +  if (sizeof(JDIMENSION) != 4)
   1.482 +    return 0;
   1.483 +  if (sizeof(FAST_FLOAT) != 4)
   1.484 +    return 0;
   1.485 +
   1.486 +  return 1;
   1.487 +}
   1.488 +
   1.489 +GLOBAL(void)
   1.490 +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
   1.491 +                DCTELEM * workspace)
   1.492 +{
   1.493 +  jsimd_convsamp_sse2(sample_data, start_col, workspace);
   1.494 +}
   1.495 +
   1.496 +GLOBAL(void)
   1.497 +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
   1.498 +                      FAST_FLOAT * workspace)
   1.499 +{
   1.500 +  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
   1.501 +}
   1.502 +
   1.503 +GLOBAL(int)
   1.504 +jsimd_can_fdct_islow (void)
   1.505 +{
   1.506 +  /* The code is optimised for these values only */
   1.507 +  if (DCTSIZE != 8)
   1.508 +    return 0;
   1.509 +  if (sizeof(DCTELEM) != 2)
   1.510 +    return 0;
   1.511 +
   1.512 +  if (!IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
   1.513 +    return 0;
   1.514 +
   1.515 +  return 1;
   1.516 +}
   1.517 +
   1.518 +GLOBAL(int)
   1.519 +jsimd_can_fdct_ifast (void)
   1.520 +{
   1.521 +  /* The code is optimised for these values only */
   1.522 +  if (DCTSIZE != 8)
   1.523 +    return 0;
   1.524 +  if (sizeof(DCTELEM) != 2)
   1.525 +    return 0;
   1.526 +
   1.527 +  if (!IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
   1.528 +    return 0;
   1.529 +
   1.530 +  return 1;
   1.531 +}
   1.532 +
   1.533 +GLOBAL(int)
   1.534 +jsimd_can_fdct_float (void)
   1.535 +{
   1.536 +  /* The code is optimised for these values only */
   1.537 +  if (DCTSIZE != 8)
   1.538 +    return 0;
   1.539 +  if (sizeof(FAST_FLOAT) != 4)
   1.540 +    return 0;
   1.541 +
   1.542 +  if (!IS_ALIGNED_SSE(jconst_fdct_float_sse))
   1.543 +    return 0;
   1.544 +
   1.545 +  return 1;
   1.546 +}
   1.547 +
   1.548 +GLOBAL(void)
   1.549 +jsimd_fdct_islow (DCTELEM * data)
   1.550 +{
   1.551 +  jsimd_fdct_islow_sse2(data);
   1.552 +}
   1.553 +
   1.554 +GLOBAL(void)
   1.555 +jsimd_fdct_ifast (DCTELEM * data)
   1.556 +{
   1.557 +  jsimd_fdct_ifast_sse2(data);
   1.558 +}
   1.559 +
   1.560 +GLOBAL(void)
   1.561 +jsimd_fdct_float (FAST_FLOAT * data)
   1.562 +{
   1.563 +  jsimd_fdct_float_sse(data);
   1.564 +}
   1.565 +
   1.566 +GLOBAL(int)
   1.567 +jsimd_can_quantize (void)
   1.568 +{
   1.569 +  /* The code is optimised for these values only */
   1.570 +  if (DCTSIZE != 8)
   1.571 +    return 0;
   1.572 +  if (sizeof(JCOEF) != 2)
   1.573 +    return 0;
   1.574 +  if (sizeof(DCTELEM) != 2)
   1.575 +    return 0;
   1.576 +
   1.577 +  return 1;
   1.578 +}
   1.579 +
   1.580 +GLOBAL(int)
   1.581 +jsimd_can_quantize_float (void)
   1.582 +{
   1.583 +  /* The code is optimised for these values only */
   1.584 +  if (DCTSIZE != 8)
   1.585 +    return 0;
   1.586 +  if (sizeof(JCOEF) != 2)
   1.587 +    return 0;
   1.588 +  if (sizeof(FAST_FLOAT) != 4)
   1.589 +    return 0;
   1.590 +
   1.591 +  return 1;
   1.592 +}
   1.593 +
   1.594 +GLOBAL(void)
   1.595 +jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
   1.596 +                DCTELEM * workspace)
   1.597 +{
   1.598 +  jsimd_quantize_sse2(coef_block, divisors, workspace);
   1.599 +}
   1.600 +
   1.601 +GLOBAL(void)
   1.602 +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
   1.603 +                      FAST_FLOAT * workspace)
   1.604 +{
   1.605 +  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
   1.606 +}
   1.607 +
   1.608 +GLOBAL(int)
   1.609 +jsimd_can_idct_2x2 (void)
   1.610 +{
   1.611 +  /* The code is optimised for these values only */
   1.612 +  if (DCTSIZE != 8)
   1.613 +    return 0;
   1.614 +  if (sizeof(JCOEF) != 2)
   1.615 +    return 0;
   1.616 +  if (BITS_IN_JSAMPLE != 8)
   1.617 +    return 0;
   1.618 +  if (sizeof(JDIMENSION) != 4)
   1.619 +    return 0;
   1.620 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
   1.621 +    return 0;
   1.622 +
   1.623 +  if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
   1.624 +    return 0;
   1.625 +
   1.626 +  return 1;
   1.627 +}
   1.628 +
   1.629 +GLOBAL(int)
   1.630 +jsimd_can_idct_4x4 (void)
   1.631 +{
   1.632 +  /* The code is optimised for these values only */
   1.633 +  if (DCTSIZE != 8)
   1.634 +    return 0;
   1.635 +  if (sizeof(JCOEF) != 2)
   1.636 +    return 0;
   1.637 +  if (BITS_IN_JSAMPLE != 8)
   1.638 +    return 0;
   1.639 +  if (sizeof(JDIMENSION) != 4)
   1.640 +    return 0;
   1.641 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
   1.642 +    return 0;
   1.643 +
   1.644 +  if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
   1.645 +    return 0;
   1.646 +
   1.647 +  return 1;
   1.648 +}
   1.649 +
   1.650 +GLOBAL(void)
   1.651 +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   1.652 +                JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1.653 +                JDIMENSION output_col)
   1.654 +{
   1.655 +  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
   1.656 +}
   1.657 +
   1.658 +GLOBAL(void)
   1.659 +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   1.660 +                JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1.661 +                JDIMENSION output_col)
   1.662 +{
   1.663 +  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
   1.664 +}
   1.665 +
   1.666 +GLOBAL(int)
   1.667 +jsimd_can_idct_islow (void)
   1.668 +{
   1.669 +  /* The code is optimised for these values only */
   1.670 +  if (DCTSIZE != 8)
   1.671 +    return 0;
   1.672 +  if (sizeof(JCOEF) != 2)
   1.673 +    return 0;
   1.674 +  if (BITS_IN_JSAMPLE != 8)
   1.675 +    return 0;
   1.676 +  if (sizeof(JDIMENSION) != 4)
   1.677 +    return 0;
   1.678 +  if (sizeof(ISLOW_MULT_TYPE) != 2)
   1.679 +    return 0;
   1.680 +
   1.681 +  if (!IS_ALIGNED_SSE(jconst_idct_islow_sse2))
   1.682 +    return 0;
   1.683 +
   1.684 +  return 1;
   1.685 +}
   1.686 +
   1.687 +GLOBAL(int)
   1.688 +jsimd_can_idct_ifast (void)
   1.689 +{
   1.690 +  /* The code is optimised for these values only */
   1.691 +  if (DCTSIZE != 8)
   1.692 +    return 0;
   1.693 +  if (sizeof(JCOEF) != 2)
   1.694 +    return 0;
   1.695 +  if (BITS_IN_JSAMPLE != 8)
   1.696 +    return 0;
   1.697 +  if (sizeof(JDIMENSION) != 4)
   1.698 +    return 0;
   1.699 +  if (sizeof(IFAST_MULT_TYPE) != 2)
   1.700 +    return 0;
   1.701 +  if (IFAST_SCALE_BITS != 2)
   1.702 +    return 0;
   1.703 +
   1.704 +  if (!IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
   1.705 +    return 0;
   1.706 +
   1.707 +  return 1;
   1.708 +}
   1.709 +
   1.710 +GLOBAL(int)
   1.711 +jsimd_can_idct_float (void)
   1.712 +{
   1.713 +  if (DCTSIZE != 8)
   1.714 +    return 0;
   1.715 +  if (sizeof(JCOEF) != 2)
   1.716 +    return 0;
   1.717 +  if (BITS_IN_JSAMPLE != 8)
   1.718 +    return 0;
   1.719 +  if (sizeof(JDIMENSION) != 4)
   1.720 +    return 0;
   1.721 +  if (sizeof(FAST_FLOAT) != 4)
   1.722 +    return 0;
   1.723 +  if (sizeof(FLOAT_MULT_TYPE) != 4)
   1.724 +    return 0;
   1.725 +
   1.726 +  if (!IS_ALIGNED_SSE(jconst_idct_float_sse2))
   1.727 +    return 0;
   1.728 +
   1.729 +  return 1;
   1.730 +}
   1.731 +
   1.732 +GLOBAL(void)
   1.733 +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   1.734 +                JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1.735 +                JDIMENSION output_col)
   1.736 +{
   1.737 +  jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
   1.738 +}
   1.739 +
   1.740 +GLOBAL(void)
   1.741 +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   1.742 +                JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1.743 +                JDIMENSION output_col)
   1.744 +{
   1.745 +  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
   1.746 +}
   1.747 +
   1.748 +GLOBAL(void)
   1.749 +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   1.750 +                JCOEFPTR coef_block, JSAMPARRAY output_buf,
   1.751 +                JDIMENSION output_col)
   1.752 +{
   1.753 +  jsimd_idct_float_sse2(compptr->dct_table, coef_block,
   1.754 +                        output_buf, output_col);
   1.755 +}
   1.756 +

mercurial