media/libvpx/vp8/common/x86/variance_sse2.c

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /*
     2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS.  All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include "vpx_config.h"
    12 #include "vp8/common/variance.h"
    13 #include "vp8/common/pragmas.h"
    14 #include "vpx_ports/mem.h"
    15 #include "vp8/common/x86/filter_x86.h"
    17 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
    18 extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
    19 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
    20 extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
    22 extern void vp8_filter_block2d_bil4x4_var_mmx
    23 (
    24     const unsigned char *ref_ptr,
    25     int ref_pixels_per_line,
    26     const unsigned char *src_ptr,
    27     int src_pixels_per_line,
    28     const short *HFilter,
    29     const short *VFilter,
    30     int *sum,
    31     unsigned int *sumsquared
    32 );
    34 extern unsigned int vp8_get4x4var_mmx
    35 (
    36     const unsigned char *src_ptr,
    37     int  source_stride,
    38     const unsigned char *ref_ptr,
    39     int  recon_stride,
    40     unsigned int *SSE,
    41     int *Sum
    42 );
    44 unsigned int vp8_get_mb_ss_sse2
    45 (
    46     const short *src_ptr
    47 );
    48 unsigned int vp8_get16x16var_sse2
    49 (
    50     const unsigned char *src_ptr,
    51     int source_stride,
    52     const unsigned char *ref_ptr,
    53     int recon_stride,
    54     unsigned int *SSE,
    55     int *Sum
    56 );
    57 unsigned int vp8_get8x8var_sse2
    58 (
    59     const unsigned char *src_ptr,
    60     int source_stride,
    61     const unsigned char *ref_ptr,
    62     int recon_stride,
    63     unsigned int *SSE,
    64     int *Sum
    65 );
    66 void vp8_filter_block2d_bil_var_sse2
    67 (
    68     const unsigned char *ref_ptr,
    69     int ref_pixels_per_line,
    70     const unsigned char *src_ptr,
    71     int src_pixels_per_line,
    72     unsigned int Height,
    73     int  xoffset,
    74     int  yoffset,
    75     int *sum,
    76     unsigned int *sumsquared
    77 );
    78 void vp8_half_horiz_vert_variance8x_h_sse2
    79 (
    80     const unsigned char *ref_ptr,
    81     int ref_pixels_per_line,
    82     const unsigned char *src_ptr,
    83     int src_pixels_per_line,
    84     unsigned int Height,
    85     int *sum,
    86     unsigned int *sumsquared
    87 );
    88 void vp8_half_horiz_vert_variance16x_h_sse2
    89 (
    90     const unsigned char *ref_ptr,
    91     int ref_pixels_per_line,
    92     const unsigned char *src_ptr,
    93     int src_pixels_per_line,
    94     unsigned int Height,
    95     int *sum,
    96     unsigned int *sumsquared
    97 );
    98 void vp8_half_horiz_variance8x_h_sse2
    99 (
   100     const unsigned char *ref_ptr,
   101     int ref_pixels_per_line,
   102     const unsigned char *src_ptr,
   103     int src_pixels_per_line,
   104     unsigned int Height,
   105     int *sum,
   106     unsigned int *sumsquared
   107 );
   108 void vp8_half_horiz_variance16x_h_sse2
   109 (
   110     const unsigned char *ref_ptr,
   111     int ref_pixels_per_line,
   112     const unsigned char *src_ptr,
   113     int src_pixels_per_line,
   114     unsigned int Height,
   115     int *sum,
   116     unsigned int *sumsquared
   117 );
   118 void vp8_half_vert_variance8x_h_sse2
   119 (
   120     const unsigned char *ref_ptr,
   121     int ref_pixels_per_line,
   122     const unsigned char *src_ptr,
   123     int src_pixels_per_line,
   124     unsigned int Height,
   125     int *sum,
   126     unsigned int *sumsquared
   127 );
   128 void vp8_half_vert_variance16x_h_sse2
   129 (
   130     const unsigned char *ref_ptr,
   131     int ref_pixels_per_line,
   132     const unsigned char *src_ptr,
   133     int src_pixels_per_line,
   134     unsigned int Height,
   135     int *sum,
   136     unsigned int *sumsquared
   137 );
   139 unsigned int vp8_variance4x4_wmt(
   140     const unsigned char *src_ptr,
   141     int  source_stride,
   142     const unsigned char *ref_ptr,
   143     int  recon_stride,
   144     unsigned int *sse)
   145 {
   146     unsigned int var;
   147     int avg;
   149     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
   150     *sse = var;
   151     return (var - (((unsigned int)avg * avg) >> 4));
   153 }
   155 unsigned int vp8_variance8x8_wmt
   156 (
   157     const unsigned char *src_ptr,
   158     int  source_stride,
   159     const unsigned char *ref_ptr,
   160     int  recon_stride,
   161     unsigned int *sse)
   162 {
   163     unsigned int var;
   164     int avg;
   166     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
   167     *sse = var;
   168     return (var - (((unsigned int)avg * avg) >> 6));
   170 }
   173 unsigned int vp8_variance16x16_wmt
   174 (
   175     const unsigned char *src_ptr,
   176     int  source_stride,
   177     const unsigned char *ref_ptr,
   178     int  recon_stride,
   179     unsigned int *sse)
   180 {
   181     unsigned int sse0;
   182     int sum0;
   185     vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
   186     *sse = sse0;
   187     return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
   188 }
   189 unsigned int vp8_mse16x16_wmt(
   190     const unsigned char *src_ptr,
   191     int  source_stride,
   192     const unsigned char *ref_ptr,
   193     int  recon_stride,
   194     unsigned int *sse)
   195 {
   197     unsigned int sse0;
   198     int sum0;
   199     vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
   200     *sse = sse0;
   201     return sse0;
   203 }
   206 unsigned int vp8_variance16x8_wmt
   207 (
   208     const unsigned char *src_ptr,
   209     int  source_stride,
   210     const unsigned char *ref_ptr,
   211     int  recon_stride,
   212     unsigned int *sse)
   213 {
   214     unsigned int sse0, sse1, var;
   215     int sum0, sum1, avg;
   217     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
   218     vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
   220     var = sse0 + sse1;
   221     avg = sum0 + sum1;
   222     *sse = var;
   223     return (var - (((unsigned int)avg * avg) >> 7));
   225 }
   227 unsigned int vp8_variance8x16_wmt
   228 (
   229     const unsigned char *src_ptr,
   230     int  source_stride,
   231     const unsigned char *ref_ptr,
   232     int  recon_stride,
   233     unsigned int *sse)
   234 {
   235     unsigned int sse0, sse1, var;
   236     int sum0, sum1, avg;
   238     vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
   239     vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
   241     var = sse0 + sse1;
   242     avg = sum0 + sum1;
   243     *sse = var;
   244     return (var - (((unsigned int)avg * avg) >> 7));
   246 }
   248 unsigned int vp8_sub_pixel_variance4x4_wmt
   249 (
   250     const unsigned char  *src_ptr,
   251     int  src_pixels_per_line,
   252     int  xoffset,
   253     int  yoffset,
   254     const unsigned char *dst_ptr,
   255     int dst_pixels_per_line,
   256     unsigned int *sse
   257 )
   258 {
   259     int xsum;
   260     unsigned int xxsum;
   261     vp8_filter_block2d_bil4x4_var_mmx(
   262         src_ptr, src_pixels_per_line,
   263         dst_ptr, dst_pixels_per_line,
   264         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
   265         &xsum, &xxsum
   266     );
   267     *sse = xxsum;
   268     return (xxsum - (((unsigned int)xsum * xsum) >> 4));
   269 }
   272 unsigned int vp8_sub_pixel_variance8x8_wmt
   273 (
   274     const unsigned char  *src_ptr,
   275     int  src_pixels_per_line,
   276     int  xoffset,
   277     int  yoffset,
   278     const unsigned char *dst_ptr,
   279     int dst_pixels_per_line,
   280     unsigned int *sse
   281 )
   282 {
   283     int xsum;
   284     unsigned int xxsum;
   286     if (xoffset == 4 && yoffset == 0)
   287     {
   288         vp8_half_horiz_variance8x_h_sse2(
   289             src_ptr, src_pixels_per_line,
   290             dst_ptr, dst_pixels_per_line, 8,
   291             &xsum, &xxsum);
   292     }
   293     else if (xoffset == 0 && yoffset == 4)
   294     {
   295         vp8_half_vert_variance8x_h_sse2(
   296             src_ptr, src_pixels_per_line,
   297             dst_ptr, dst_pixels_per_line, 8,
   298             &xsum, &xxsum);
   299     }
   300     else if (xoffset == 4 && yoffset == 4)
   301     {
   302         vp8_half_horiz_vert_variance8x_h_sse2(
   303             src_ptr, src_pixels_per_line,
   304             dst_ptr, dst_pixels_per_line, 8,
   305             &xsum, &xxsum);
   306     }
   307     else
   308     {
   309         vp8_filter_block2d_bil_var_sse2(
   310             src_ptr, src_pixels_per_line,
   311             dst_ptr, dst_pixels_per_line, 8,
   312             xoffset, yoffset,
   313             &xsum, &xxsum);
   314     }
   316     *sse = xxsum;
   317     return (xxsum - (((unsigned int)xsum * xsum) >> 6));
   318 }
   320 unsigned int vp8_sub_pixel_variance16x16_wmt
   321 (
   322     const unsigned char  *src_ptr,
   323     int  src_pixels_per_line,
   324     int  xoffset,
   325     int  yoffset,
   326     const unsigned char *dst_ptr,
   327     int dst_pixels_per_line,
   328     unsigned int *sse
   329 )
   330 {
   331     int xsum0, xsum1;
   332     unsigned int xxsum0, xxsum1;
   335     /* note we could avoid these if statements if the calling function
   336      * just called the appropriate functions inside.
   337      */
   338     if (xoffset == 4 && yoffset == 0)
   339     {
   340         vp8_half_horiz_variance16x_h_sse2(
   341             src_ptr, src_pixels_per_line,
   342             dst_ptr, dst_pixels_per_line, 16,
   343             &xsum0, &xxsum0);
   344     }
   345     else if (xoffset == 0 && yoffset == 4)
   346     {
   347         vp8_half_vert_variance16x_h_sse2(
   348             src_ptr, src_pixels_per_line,
   349             dst_ptr, dst_pixels_per_line, 16,
   350             &xsum0, &xxsum0);
   351     }
   352     else if (xoffset == 4 && yoffset == 4)
   353     {
   354         vp8_half_horiz_vert_variance16x_h_sse2(
   355             src_ptr, src_pixels_per_line,
   356             dst_ptr, dst_pixels_per_line, 16,
   357             &xsum0, &xxsum0);
   358     }
   359     else
   360     {
   361         vp8_filter_block2d_bil_var_sse2(
   362             src_ptr, src_pixels_per_line,
   363             dst_ptr, dst_pixels_per_line, 16,
   364             xoffset, yoffset,
   365             &xsum0, &xxsum0
   366         );
   368         vp8_filter_block2d_bil_var_sse2(
   369             src_ptr + 8, src_pixels_per_line,
   370             dst_ptr + 8, dst_pixels_per_line, 16,
   371             xoffset, yoffset,
   372             &xsum1, &xxsum1
   373         );
   374         xsum0 += xsum1;
   375         xxsum0 += xxsum1;
   376     }
   378     *sse = xxsum0;
   379     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
   380 }
   382 unsigned int vp8_sub_pixel_mse16x16_wmt(
   383     const unsigned char  *src_ptr,
   384     int  src_pixels_per_line,
   385     int  xoffset,
   386     int  yoffset,
   387     const unsigned char *dst_ptr,
   388     int dst_pixels_per_line,
   389     unsigned int *sse
   390 )
   391 {
   392     vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
   393     return *sse;
   394 }
   396 unsigned int vp8_sub_pixel_variance16x8_wmt
   397 (
   398     const unsigned char  *src_ptr,
   399     int  src_pixels_per_line,
   400     int  xoffset,
   401     int  yoffset,
   402     const unsigned char *dst_ptr,
   403     int dst_pixels_per_line,
   404     unsigned int *sse
   406 )
   407 {
   408     int xsum0, xsum1;
   409     unsigned int xxsum0, xxsum1;
   411     if (xoffset == 4 && yoffset == 0)
   412     {
   413         vp8_half_horiz_variance16x_h_sse2(
   414             src_ptr, src_pixels_per_line,
   415             dst_ptr, dst_pixels_per_line, 8,
   416             &xsum0, &xxsum0);
   417     }
   418     else if (xoffset == 0 && yoffset == 4)
   419     {
   420         vp8_half_vert_variance16x_h_sse2(
   421             src_ptr, src_pixels_per_line,
   422             dst_ptr, dst_pixels_per_line, 8,
   423             &xsum0, &xxsum0);
   424     }
   425     else if (xoffset == 4 && yoffset == 4)
   426     {
   427         vp8_half_horiz_vert_variance16x_h_sse2(
   428             src_ptr, src_pixels_per_line,
   429             dst_ptr, dst_pixels_per_line, 8,
   430             &xsum0, &xxsum0);
   431     }
   432     else
   433     {
   434         vp8_filter_block2d_bil_var_sse2(
   435             src_ptr, src_pixels_per_line,
   436             dst_ptr, dst_pixels_per_line, 8,
   437             xoffset, yoffset,
   438             &xsum0, &xxsum0);
   440         vp8_filter_block2d_bil_var_sse2(
   441             src_ptr + 8, src_pixels_per_line,
   442             dst_ptr + 8, dst_pixels_per_line, 8,
   443             xoffset, yoffset,
   444             &xsum1, &xxsum1);
   445         xsum0 += xsum1;
   446         xxsum0 += xxsum1;
   447     }
   449     *sse = xxsum0;
   450     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
   451 }
   453 unsigned int vp8_sub_pixel_variance8x16_wmt
   454 (
   455     const unsigned char  *src_ptr,
   456     int  src_pixels_per_line,
   457     int  xoffset,
   458     int  yoffset,
   459     const unsigned char *dst_ptr,
   460     int dst_pixels_per_line,
   461     unsigned int *sse
   462 )
   463 {
   464     int xsum;
   465     unsigned int xxsum;
   467     if (xoffset == 4 && yoffset == 0)
   468     {
   469         vp8_half_horiz_variance8x_h_sse2(
   470             src_ptr, src_pixels_per_line,
   471             dst_ptr, dst_pixels_per_line, 16,
   472             &xsum, &xxsum);
   473     }
   474     else if (xoffset == 0 && yoffset == 4)
   475     {
   476         vp8_half_vert_variance8x_h_sse2(
   477             src_ptr, src_pixels_per_line,
   478             dst_ptr, dst_pixels_per_line, 16,
   479             &xsum, &xxsum);
   480     }
   481     else if (xoffset == 4 && yoffset == 4)
   482     {
   483         vp8_half_horiz_vert_variance8x_h_sse2(
   484             src_ptr, src_pixels_per_line,
   485             dst_ptr, dst_pixels_per_line, 16,
   486             &xsum, &xxsum);
   487     }
   488     else
   489     {
   490         vp8_filter_block2d_bil_var_sse2(
   491             src_ptr, src_pixels_per_line,
   492             dst_ptr, dst_pixels_per_line, 16,
   493             xoffset, yoffset,
   494             &xsum, &xxsum);
   495     }
   497     *sse = xxsum;
   498     return (xxsum - (((unsigned int)xsum * xsum) >> 7));
   499 }
   502 unsigned int vp8_variance_halfpixvar16x16_h_wmt(
   503     const unsigned char *src_ptr,
   504     int  src_pixels_per_line,
   505     const unsigned char *dst_ptr,
   506     int  dst_pixels_per_line,
   507     unsigned int *sse)
   508 {
   509     int xsum0;
   510     unsigned int xxsum0;
   512     vp8_half_horiz_variance16x_h_sse2(
   513         src_ptr, src_pixels_per_line,
   514         dst_ptr, dst_pixels_per_line, 16,
   515         &xsum0, &xxsum0);
   517     *sse = xxsum0;
   518     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
   519 }
   522 unsigned int vp8_variance_halfpixvar16x16_v_wmt(
   523     const unsigned char *src_ptr,
   524     int  src_pixels_per_line,
   525     const unsigned char *dst_ptr,
   526     int  dst_pixels_per_line,
   527     unsigned int *sse)
   528 {
   529     int xsum0;
   530     unsigned int xxsum0;
   531     vp8_half_vert_variance16x_h_sse2(
   532         src_ptr, src_pixels_per_line,
   533         dst_ptr, dst_pixels_per_line, 16,
   534         &xsum0, &xxsum0);
   536     *sse = xxsum0;
   537     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
   538 }
   541 unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
   542     const unsigned char *src_ptr,
   543     int  src_pixels_per_line,
   544     const unsigned char *dst_ptr,
   545     int  dst_pixels_per_line,
   546     unsigned int *sse)
   547 {
   548     int xsum0;
   549     unsigned int xxsum0;
   551     vp8_half_horiz_vert_variance16x_h_sse2(
   552         src_ptr, src_pixels_per_line,
   553         dst_ptr, dst_pixels_per_line, 16,
   554         &xsum0, &xxsum0);
   556     *sse = xxsum0;
   557     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
   558 }

mercurial