media/libjpeg/simd/jsimd_arm.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  * jsimd_arm.c
     3  *
     4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
     5  * Copyright 2009-2011 D. R. Commander
     6  * 
     7  * Based on the x86 SIMD extension for IJG JPEG library,
     8  * Copyright (C) 1999-2006, MIYASAKA Masaru.
     9  * For conditions of distribution and use, see copyright notice in jsimdext.inc
    10  *
    11  * This file contains the interface between the "normal" portions
    12  * of the library and the SIMD implementations when running on
    13  * ARM architecture.
    14  *
    15  * Based on the stubs from 'jsimd_none.c'
    16  */
    18 #define JPEG_INTERNALS
    19 #include "../jinclude.h"
    20 #include "../jpeglib.h"
    21 #include "../jsimd.h"
    22 #include "../jdct.h"
    23 #include "../jsimddct.h"
    24 #include "jsimd.h"
    26 #include <stdio.h>
    27 #include <string.h>
    28 #include <ctype.h>
    30 static unsigned int simd_support = ~0;
    32 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
    34 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
    36 LOCAL(int)
    37 check_feature (char *buffer, char *feature)
    38 {
    39   char *p;
    40   if (*feature == 0)
    41     return 0;
    42   if (strncmp(buffer, "Features", 8) != 0)
    43     return 0;
    44   buffer += 8;
    45   while (isspace(*buffer))
    46     buffer++;
    48   /* Check if 'feature' is present in the buffer as a separate word */
    49   while ((p = strstr(buffer, feature))) {
    50     if (p > buffer && !isspace(*(p - 1))) {
    51       buffer++;
    52       continue;
    53     }
    54     p += strlen(feature);
    55     if (*p != 0 && !isspace(*p)) {
    56       buffer++;
    57       continue;
    58     }
    59     return 1;
    60   }
    61   return 0;
    62 }
    64 LOCAL(int)
    65 parse_proc_cpuinfo (int bufsize)
    66 {
    67   char *buffer = (char *)malloc(bufsize);
    68   FILE *fd;
    69   simd_support = 0;
    71   if (!buffer)
    72     return 0;
    74   fd = fopen("/proc/cpuinfo", "r");
    75   if (fd) {
    76     while (fgets(buffer, bufsize, fd)) {
    77       if (!strchr(buffer, '\n') && !feof(fd)) {
    78         /* "impossible" happened - insufficient size of the buffer! */
    79         fclose(fd);
    80         free(buffer);
    81         return 0;
    82       }
    83       if (check_feature(buffer, "neon"))
    84         simd_support |= JSIMD_ARM_NEON;
    85     }
    86     fclose(fd);
    87   }
    88   free(buffer);
    89   return 1;
    90 }
    92 #endif
    94 /*
    95  * Check what SIMD accelerations are supported.
    96  *
    97  * FIXME: This code is racy under a multi-threaded environment.
    98  */
    99 LOCAL(void)
   100 init_simd (void)
   101 {
   102   char *env = NULL;
   103 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
   104   int bufsize = 1024; /* an initial guess for the line buffer size limit */
   105 #endif
   107   if (simd_support != ~0U)
   108     return;
   110   simd_support = 0;
   112 #if defined(__ARM_NEON__)
   113   simd_support |= JSIMD_ARM_NEON;
   114 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
   115   /* We still have a chance to use NEON regardless of globally used
   116    * -mcpu/-mfpu options passed to gcc by performing runtime detection via
   117    * /proc/cpuinfo parsing on linux/android */
   118   while (!parse_proc_cpuinfo(bufsize)) {
   119     bufsize *= 2;
   120     if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
   121       break;
   122   }
   123 #endif
   125   /* Force different settings through environment variables */
   126   env = getenv("JSIMD_FORCE_ARM_NEON");
   127   if ((env != NULL) && (strcmp(env, "1") == 0))
   128     simd_support &= JSIMD_ARM_NEON;
   129   env = getenv("JSIMD_FORCE_NO_SIMD");
   130   if ((env != NULL) && (strcmp(env, "1") == 0))
   131     simd_support = 0;
   132 }
   134 GLOBAL(int)
   135 jsimd_can_rgb_ycc (void)
   136 {
   137   init_simd();
   139   /* The code is optimised for these values only */
   140   if (BITS_IN_JSAMPLE != 8)
   141     return 0;
   142   if (sizeof(JDIMENSION) != 4)
   143     return 0;
   144   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
   145     return 0;
   147   if (simd_support & JSIMD_ARM_NEON)
   148     return 1;
   150   return 0;
   151 }
   153 GLOBAL(int)
   154 jsimd_can_rgb_gray (void)
   155 {
   156   init_simd();
   158   return 0;
   159 }
   161 GLOBAL(int)
   162 jsimd_can_ycc_rgb (void)
   163 {
   164   init_simd();
   166   /* The code is optimised for these values only */
   167   if (BITS_IN_JSAMPLE != 8)
   168     return 0;
   169   if (sizeof(JDIMENSION) != 4)
   170     return 0;
   171   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
   172     return 0;
   173   if (simd_support & JSIMD_ARM_NEON)
   174     return 1;
   176   return 0;
   177 }
   179 GLOBAL(void)
   180 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
   181                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
   182                        JDIMENSION output_row, int num_rows)
   183 {
   184   void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   186   switch(cinfo->in_color_space)
   187   {
   188     case JCS_EXT_RGB:
   189       neonfct=jsimd_extrgb_ycc_convert_neon;
   190       break;
   191     case JCS_EXT_RGBX:
   192     case JCS_EXT_RGBA:
   193       neonfct=jsimd_extrgbx_ycc_convert_neon;
   194       break;
   195     case JCS_EXT_BGR:
   196       neonfct=jsimd_extbgr_ycc_convert_neon;
   197       break;
   198     case JCS_EXT_BGRX:
   199     case JCS_EXT_BGRA:
   200       neonfct=jsimd_extbgrx_ycc_convert_neon;
   201       break;
   202     case JCS_EXT_XBGR:
   203     case JCS_EXT_ABGR:
   204       neonfct=jsimd_extxbgr_ycc_convert_neon;
   205       break;
   206     case JCS_EXT_XRGB:
   207     case JCS_EXT_ARGB:
   208       neonfct=jsimd_extxrgb_ycc_convert_neon;
   209       break;
   210     default:
   211       neonfct=jsimd_extrgb_ycc_convert_neon;
   212       break;
   213   }
   215   if (simd_support & JSIMD_ARM_NEON)
   216     neonfct(cinfo->image_width, input_buf,
   217         output_buf, output_row, num_rows);
   218 }
   220 GLOBAL(void)
   221 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
   222                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
   223                         JDIMENSION output_row, int num_rows)
   224 {
   225 }
   227 GLOBAL(void)
   228 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
   229                        JSAMPIMAGE input_buf, JDIMENSION input_row,
   230                        JSAMPARRAY output_buf, int num_rows)
   231 {
   232   void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   234   switch(cinfo->out_color_space)
   235   {
   236     case JCS_EXT_RGB:
   237       neonfct=jsimd_ycc_extrgb_convert_neon;
   238       break;
   239     case JCS_EXT_RGBX:
   240     case JCS_EXT_RGBA:
   241       neonfct=jsimd_ycc_extrgbx_convert_neon;
   242       break;
   243     case JCS_EXT_BGR:
   244       neonfct=jsimd_ycc_extbgr_convert_neon;
   245       break;
   246     case JCS_EXT_BGRX:
   247     case JCS_EXT_BGRA:
   248       neonfct=jsimd_ycc_extbgrx_convert_neon;
   249       break;
   250     case JCS_EXT_XBGR:
   251     case JCS_EXT_ABGR:
   252       neonfct=jsimd_ycc_extxbgr_convert_neon;
   253       break;
   254     case JCS_EXT_XRGB:
   255     case JCS_EXT_ARGB:
   256       neonfct=jsimd_ycc_extxrgb_convert_neon;
   257       break;
   258   default:
   259       neonfct=jsimd_ycc_extrgb_convert_neon;
   260       break;
   261   }
   263   if (simd_support & JSIMD_ARM_NEON)
   264     neonfct(cinfo->output_width, input_buf,
   265         input_row, output_buf, num_rows);
   266 }
   268 GLOBAL(int)
   269 jsimd_can_h2v2_downsample (void)
   270 {
   271   init_simd();
   273   return 0;
   274 }
   276 GLOBAL(int)
   277 jsimd_can_h2v1_downsample (void)
   278 {
   279   init_simd();
   281   return 0;
   282 }
   284 GLOBAL(void)
   285 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
   286                        JSAMPARRAY input_data, JSAMPARRAY output_data)
   287 {
   288 }
   290 GLOBAL(void)
   291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
   292                        JSAMPARRAY input_data, JSAMPARRAY output_data)
   293 {
   294 }
   296 GLOBAL(int)
   297 jsimd_can_h2v2_upsample (void)
   298 {
   299   init_simd();
   301   return 0;
   302 }
   304 GLOBAL(int)
   305 jsimd_can_h2v1_upsample (void)
   306 {
   307   init_simd();
   309   return 0;
   310 }
   312 GLOBAL(void)
   313 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
   314                      jpeg_component_info * compptr, 
   315                      JSAMPARRAY input_data,
   316                      JSAMPARRAY * output_data_ptr)
   317 {
   318 }
   320 GLOBAL(void)
   321 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
   322                      jpeg_component_info * compptr, 
   323                      JSAMPARRAY input_data,
   324                      JSAMPARRAY * output_data_ptr)
   325 {
   326 }
   328 GLOBAL(int)
   329 jsimd_can_h2v2_fancy_upsample (void)
   330 {
   331   init_simd();
   333   return 0;
   334 }
   336 GLOBAL(int)
   337 jsimd_can_h2v1_fancy_upsample (void)
   338 {
   339   init_simd();
   341   /* The code is optimised for these values only */
   342   if (BITS_IN_JSAMPLE != 8)
   343     return 0;
   344   if (sizeof(JDIMENSION) != 4)
   345     return 0;
   347   if (simd_support & JSIMD_ARM_NEON)
   348     return 1;
   350   return 0;
   351 }
   353 GLOBAL(void)
   354 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
   355                            jpeg_component_info * compptr, 
   356                            JSAMPARRAY input_data,
   357                            JSAMPARRAY * output_data_ptr)
   358 {
   359 }
   361 GLOBAL(void)
   362 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
   363                            jpeg_component_info * compptr, 
   364                            JSAMPARRAY input_data,
   365                            JSAMPARRAY * output_data_ptr)
   366 {
   367   if (simd_support & JSIMD_ARM_NEON)
   368     jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
   369         compptr->downsampled_width, input_data, output_data_ptr);
   370 }
   372 GLOBAL(int)
   373 jsimd_can_h2v2_merged_upsample (void)
   374 {
   375   init_simd();
   377   return 0;
   378 }
   380 GLOBAL(int)
   381 jsimd_can_h2v1_merged_upsample (void)
   382 {
   383   init_simd();
   385   return 0;
   386 }
   388 GLOBAL(void)
   389 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
   390                             JSAMPIMAGE input_buf,
   391                             JDIMENSION in_row_group_ctr,
   392                             JSAMPARRAY output_buf)
   393 {
   394 }
   396 GLOBAL(void)
   397 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
   398                             JSAMPIMAGE input_buf,
   399                             JDIMENSION in_row_group_ctr,
   400                             JSAMPARRAY output_buf)
   401 {
   402 }
   404 GLOBAL(int)
   405 jsimd_can_convsamp (void)
   406 {
   407   init_simd();
   409   /* The code is optimised for these values only */
   410   if (DCTSIZE != 8)
   411     return 0;
   412   if (BITS_IN_JSAMPLE != 8)
   413     return 0;
   414   if (sizeof(JDIMENSION) != 4)
   415     return 0;
   416   if (sizeof(DCTELEM) != 2)
   417     return 0;
   419   if (simd_support & JSIMD_ARM_NEON)
   420     return 1;
   422   return 0;
   423 }
   425 GLOBAL(int)
   426 jsimd_can_convsamp_float (void)
   427 {
   428   init_simd();
   430   return 0;
   431 }
   433 GLOBAL(void)
   434 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
   435                 DCTELEM * workspace)
   436 {
   437   if (simd_support & JSIMD_ARM_NEON)
   438     jsimd_convsamp_neon(sample_data, start_col, workspace);
   439 }
   441 GLOBAL(void)
   442 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
   443                       FAST_FLOAT * workspace)
   444 {
   445 }
   447 GLOBAL(int)
   448 jsimd_can_fdct_islow (void)
   449 {
   450   init_simd();
   452   return 0;
   453 }
   455 GLOBAL(int)
   456 jsimd_can_fdct_ifast (void)
   457 {
   458   init_simd();
   460   /* The code is optimised for these values only */
   461   if (DCTSIZE != 8)
   462     return 0;
   463   if (sizeof(DCTELEM) != 2)
   464     return 0;
   466   if (simd_support & JSIMD_ARM_NEON)
   467     return 1;
   469   return 0;
   470 }
   472 GLOBAL(int)
   473 jsimd_can_fdct_float (void)
   474 {
   475   init_simd();
   477   return 0;
   478 }
   480 GLOBAL(void)
   481 jsimd_fdct_islow (DCTELEM * data)
   482 {
   483 }
   485 GLOBAL(void)
   486 jsimd_fdct_ifast (DCTELEM * data)
   487 {
   488   if (simd_support & JSIMD_ARM_NEON)
   489     jsimd_fdct_ifast_neon(data);
   490 }
   492 GLOBAL(void)
   493 jsimd_fdct_float (FAST_FLOAT * data)
   494 {
   495 }
   497 GLOBAL(int)
   498 jsimd_can_quantize (void)
   499 {
   500   init_simd();
   502   /* The code is optimised for these values only */
   503   if (DCTSIZE != 8)
   504     return 0;
   505   if (sizeof(JCOEF) != 2)
   506     return 0;
   507   if (sizeof(DCTELEM) != 2)
   508     return 0;
   510   if (simd_support & JSIMD_ARM_NEON)
   511     return 1;
   513   return 0;
   514 }
   516 GLOBAL(int)
   517 jsimd_can_quantize_float (void)
   518 {
   519   init_simd();
   521   return 0;
   522 }
   524 GLOBAL(void)
   525 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
   526                 DCTELEM * workspace)
   527 {
   528   if (simd_support & JSIMD_ARM_NEON)
   529     jsimd_quantize_neon(coef_block, divisors, workspace);
   530 }
   532 GLOBAL(void)
   533 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
   534                       FAST_FLOAT * workspace)
   535 {
   536 }
   538 GLOBAL(int)
   539 jsimd_can_idct_2x2 (void)
   540 {
   541   init_simd();
   543   /* The code is optimised for these values only */
   544   if (DCTSIZE != 8)
   545     return 0;
   546   if (sizeof(JCOEF) != 2)
   547     return 0;
   548   if (BITS_IN_JSAMPLE != 8)
   549     return 0;
   550   if (sizeof(JDIMENSION) != 4)
   551     return 0;
   552   if (sizeof(ISLOW_MULT_TYPE) != 2)
   553     return 0;
   555   if ((simd_support & JSIMD_ARM_NEON))
   556     return 1;
   558   return 0;
   559 }
   561 GLOBAL(int)
   562 jsimd_can_idct_4x4 (void)
   563 {
   564   init_simd();
   566   /* The code is optimised for these values only */
   567   if (DCTSIZE != 8)
   568     return 0;
   569   if (sizeof(JCOEF) != 2)
   570     return 0;
   571   if (BITS_IN_JSAMPLE != 8)
   572     return 0;
   573   if (sizeof(JDIMENSION) != 4)
   574     return 0;
   575   if (sizeof(ISLOW_MULT_TYPE) != 2)
   576     return 0;
   578   if ((simd_support & JSIMD_ARM_NEON))
   579     return 1;
   581   return 0;
   582 }
   584 GLOBAL(void)
   585 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   586                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   587                 JDIMENSION output_col)
   588 {
   589   if ((simd_support & JSIMD_ARM_NEON))
   590     jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
   591 }
   593 GLOBAL(void)
   594 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   595                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   596                 JDIMENSION output_col)
   597 {
   598   if ((simd_support & JSIMD_ARM_NEON))
   599     jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
   600 }
   602 GLOBAL(int)
   603 jsimd_can_idct_islow (void)
   604 {
   605   init_simd();
   607   /* The code is optimised for these values only */
   608   if (DCTSIZE != 8)
   609     return 0;
   610   if (sizeof(JCOEF) != 2)
   611     return 0;
   612   if (BITS_IN_JSAMPLE != 8)
   613     return 0;
   614   if (sizeof(JDIMENSION) != 4)
   615     return 0;
   616   if (sizeof(ISLOW_MULT_TYPE) != 2)
   617     return 0;
   619   if (simd_support & JSIMD_ARM_NEON)
   620     return 1;
   622   return 0;
   623 }
   625 GLOBAL(int)
   626 jsimd_can_idct_ifast (void)
   627 {
   628   init_simd();
   630   /* The code is optimised for these values only */
   631   if (DCTSIZE != 8)
   632     return 0;
   633   if (sizeof(JCOEF) != 2)
   634     return 0;
   635   if (BITS_IN_JSAMPLE != 8)
   636     return 0;
   637   if (sizeof(JDIMENSION) != 4)
   638     return 0;
   639   if (sizeof(IFAST_MULT_TYPE) != 2)
   640     return 0;
   641   if (IFAST_SCALE_BITS != 2)
   642     return 0;
   644   if ((simd_support & JSIMD_ARM_NEON))
   645     return 1;
   647   return 0;
   648 }
   650 GLOBAL(int)
   651 jsimd_can_idct_float (void)
   652 {
   653   init_simd();
   655   return 0;
   656 }
   658 GLOBAL(void)
   659 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   660                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   661                 JDIMENSION output_col)
   662 {
   663   if ((simd_support & JSIMD_ARM_NEON))
   664     jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
   665 }
   667 GLOBAL(void)
   668 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   669                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   670                 JDIMENSION output_col)
   671 {
   672   if ((simd_support & JSIMD_ARM_NEON))
   673     jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
   674 }
   676 GLOBAL(void)
   677 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
   678                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
   679                 JDIMENSION output_col)
   680 {
   681 }

mercurial