media/libvpx/vp9/encoder/x86/vp9_subtract_sse2.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    11 %include "third_party/x86inc/x86inc.asm"
    13 SECTION .text
    15 ; void vp9_subtract_block(int rows, int cols,
    16 ;                         int16_t *diff, ptrdiff_t diff_stride,
    17 ;                         const uint8_t *src, ptrdiff_t src_stride,
    18 ;                         const uint8_t *pred, ptrdiff_t pred_stride)
    20 INIT_XMM sse2
    21 cglobal subtract_block, 7, 7, 8, \
    22                         rows, cols, diff, diff_stride, src, src_stride, \
    23                         pred, pred_stride
    24 %define pred_str colsq
    25   pxor                  m7, m7         ; dedicated zero register
    26   cmp                colsd, 4
    27   je .case_4
    28   cmp                colsd, 8
    29   je .case_8
    30   cmp                colsd, 16
    31   je .case_16
    32   cmp                colsd, 32
    33   je .case_32
    35 %macro loop16 6
    36   mova                  m0, [srcq+%1]
    37   mova                  m4, [srcq+%2]
    38   mova                  m1, [predq+%3]
    39   mova                  m5, [predq+%4]
    40   punpckhbw             m2, m0, m7
    41   punpckhbw             m3, m1, m7
    42   punpcklbw             m0, m7
    43   punpcklbw             m1, m7
    44   psubw                 m2, m3
    45   psubw                 m0, m1
    46   punpckhbw             m1, m4, m7
    47   punpckhbw             m3, m5, m7
    48   punpcklbw             m4, m7
    49   punpcklbw             m5, m7
    50   psubw                 m1, m3
    51   psubw                 m4, m5
    52   mova [diffq+mmsize*0+%5], m0
    53   mova [diffq+mmsize*1+%5], m2
    54   mova [diffq+mmsize*0+%6], m4
    55   mova [diffq+mmsize*1+%6], m1
    56 %endmacro
    58   mov             pred_str, pred_stridemp
    59 .loop_64:
    60   loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize
    61   loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize
    62   lea                diffq, [diffq+diff_strideq*2]
    63   add                predq, pred_str
    64   add                 srcq, src_strideq
    65   dec                rowsd
    66   jg .loop_64
    67   RET
    69 .case_32:
    70   mov             pred_str, pred_stridemp
    71 .loop_32:
    72   loop16 0, mmsize, 0, mmsize, 0, 2*mmsize
    73   lea                diffq, [diffq+diff_strideq*2]
    74   add                predq, pred_str
    75   add                 srcq, src_strideq
    76   dec                rowsd
    77   jg .loop_32
    78   RET
    80 .case_16:
    81   mov             pred_str, pred_stridemp
    82 .loop_16:
    83   loop16 0, src_strideq, 0, pred_str, 0, diff_strideq*2
    84   lea                diffq, [diffq+diff_strideq*4]
    85   lea                predq, [predq+pred_str*2]
    86   lea                 srcq, [srcq+src_strideq*2]
    87   sub                rowsd, 2
    88   jg .loop_16
    89   RET
    91 %macro loop_h 0
    92   movh                  m0, [srcq]
    93   movh                  m2, [srcq+src_strideq]
    94   movh                  m1, [predq]
    95   movh                  m3, [predq+pred_str]
    96   punpcklbw             m0, m7
    97   punpcklbw             m1, m7
    98   punpcklbw             m2, m7
    99   punpcklbw             m3, m7
   100   psubw                 m0, m1
   101   psubw                 m2, m3
   102   mova             [diffq], m0
   103   mova [diffq+diff_strideq*2], m2
   104 %endmacro
   106 .case_8:
   107   mov             pred_str, pred_stridemp
   108 .loop_8:
   109   loop_h
   110   lea                diffq, [diffq+diff_strideq*4]
   111   lea                 srcq, [srcq+src_strideq*2]
   112   lea                predq, [predq+pred_str*2]
   113   sub                rowsd, 2
   114   jg .loop_8
   115   RET
   117 INIT_MMX
   118 .case_4:
   119   mov             pred_str, pred_stridemp
   120 .loop_4:
   121   loop_h
   122   lea                diffq, [diffq+diff_strideq*4]
   123   lea                 srcq, [srcq+src_strideq*2]
   124   lea                predq, [predq+pred_str*2]
   125   sub                rowsd, 2
   126   jg .loop_4
   127   RET

mercurial