media/libvpx/vp8/common/x86/dequantize_mmx.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    15 ;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
    16 global sym(vp8_dequantize_b_impl_mmx) PRIVATE
    17 sym(vp8_dequantize_b_impl_mmx):
    18     push        rbp
    19     mov         rbp, rsp
    20     SHADOW_ARGS_TO_STACK 3
    21     push        rsi
    22     push        rdi
    23     ; end prolog
    25         mov       rsi, arg(0) ;sq
    26         mov       rdi, arg(1) ;dq
    27         mov       rax, arg(2) ;q
    29         movq      mm1, [rsi]
    30         pmullw    mm1, [rax+0]            ; mm4 *= kernel 0 modifiers.
    31         movq      [rdi], mm1
    33         movq      mm1, [rsi+8]
    34         pmullw    mm1, [rax+8]            ; mm4 *= kernel 0 modifiers.
    35         movq      [rdi+8], mm1
    37         movq      mm1, [rsi+16]
    38         pmullw    mm1, [rax+16]            ; mm4 *= kernel 0 modifiers.
    39         movq      [rdi+16], mm1
    41         movq      mm1, [rsi+24]
    42         pmullw    mm1, [rax+24]            ; mm4 *= kernel 0 modifiers.
    43         movq      [rdi+24], mm1
    45     ; begin epilog
    46     pop rdi
    47     pop rsi
    48     UNSHADOW_ARGS
    49     pop         rbp
    50     ret
    53 ;void dequant_idct_add_mmx(
    54 ;short *input,            0
    55 ;short *dq,               1
    56 ;unsigned char *dest,     2
    57 ;int stride)              3
    58 global sym(vp8_dequant_idct_add_mmx) PRIVATE
    59 sym(vp8_dequant_idct_add_mmx):
    60     push        rbp
    61     mov         rbp, rsp
    62     SHADOW_ARGS_TO_STACK 4
    63     GET_GOT     rbx
    64     push        rdi
    65     ; end prolog
    67         mov         rax,    arg(0) ;input
    68         mov         rdx,    arg(1) ;dq
    71         movq        mm0,    [rax   ]
    72         pmullw      mm0,    [rdx]
    74         movq        mm1,    [rax +8]
    75         pmullw      mm1,    [rdx +8]
    77         movq        mm2,    [rax+16]
    78         pmullw      mm2,    [rdx+16]
    80         movq        mm3,    [rax+24]
    81         pmullw      mm3,    [rdx+24]
    83         mov         rdx,    arg(2) ;dest
    85         pxor        mm7,    mm7
    88         movq        [rax],   mm7
    89         movq        [rax+8], mm7
    91         movq        [rax+16],mm7
    92         movq        [rax+24],mm7
    95         movsxd      rdi,            dword ptr arg(3) ;stride
    97         psubw       mm0,            mm2             ; b1= 0-2
    98         paddw       mm2,            mm2             ;
   100         movq        mm5,            mm1
   101         paddw       mm2,            mm0             ; a1 =0+2
   103         pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
   104         paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
   106         movq        mm7,            mm3             ;
   107         pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
   109         paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
   110         psubw       mm7,            mm5             ; c1
   112         movq        mm5,            mm1
   113         movq        mm4,            mm3
   115         pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
   116         paddw       mm5,            mm1
   118         pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
   119         paddw       mm3,            mm4
   121         paddw       mm3,            mm5             ; d1
   122         movq        mm6,            mm2             ; a1
   124         movq        mm4,            mm0             ; b1
   125         paddw       mm2,            mm3             ;0
   127         paddw       mm4,            mm7             ;1
   128         psubw       mm0,            mm7             ;2
   130         psubw       mm6,            mm3             ;3
   132         movq        mm1,            mm2             ; 03 02 01 00
   133         movq        mm3,            mm4             ; 23 22 21 20
   135         punpcklwd   mm1,            mm0             ; 11 01 10 00
   136         punpckhwd   mm2,            mm0             ; 13 03 12 02
   138         punpcklwd   mm3,            mm6             ; 31 21 30 20
   139         punpckhwd   mm4,            mm6             ; 33 23 32 22
   141         movq        mm0,            mm1             ; 11 01 10 00
   142         movq        mm5,            mm2             ; 13 03 12 02
   144         punpckldq   mm0,            mm3             ; 30 20 10 00
   145         punpckhdq   mm1,            mm3             ; 31 21 11 01
   147         punpckldq   mm2,            mm4             ; 32 22 12 02
   148         punpckhdq   mm5,            mm4             ; 33 23 13 03
   150         movq        mm3,            mm5             ; 33 23 13 03
   152         psubw       mm0,            mm2             ; b1= 0-2
   153         paddw       mm2,            mm2             ;
   155         movq        mm5,            mm1
   156         paddw       mm2,            mm0             ; a1 =0+2
   158         pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
   159         paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
   161         movq        mm7,            mm3             ;
   162         pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
   164         paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
   165         psubw       mm7,            mm5             ; c1
   167         movq        mm5,            mm1
   168         movq        mm4,            mm3
   170         pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
   171         paddw       mm5,            mm1
   173         pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
   174         paddw       mm3,            mm4
   176         paddw       mm3,            mm5             ; d1
   177         paddw       mm0,            [GLOBAL(fours)]
   179         paddw       mm2,            [GLOBAL(fours)]
   180         movq        mm6,            mm2             ; a1
   182         movq        mm4,            mm0             ; b1
   183         paddw       mm2,            mm3             ;0
   185         paddw       mm4,            mm7             ;1
   186         psubw       mm0,            mm7             ;2
   188         psubw       mm6,            mm3             ;3
   189         psraw       mm2,            3
   191         psraw       mm0,            3
   192         psraw       mm4,            3
   194         psraw       mm6,            3
   196         movq        mm1,            mm2             ; 03 02 01 00
   197         movq        mm3,            mm4             ; 23 22 21 20
   199         punpcklwd   mm1,            mm0             ; 11 01 10 00
   200         punpckhwd   mm2,            mm0             ; 13 03 12 02
   202         punpcklwd   mm3,            mm6             ; 31 21 30 20
   203         punpckhwd   mm4,            mm6             ; 33 23 32 22
   205         movq        mm0,            mm1             ; 11 01 10 00
   206         movq        mm5,            mm2             ; 13 03 12 02
   208         punpckldq   mm0,            mm3             ; 30 20 10 00
   209         punpckhdq   mm1,            mm3             ; 31 21 11 01
   211         punpckldq   mm2,            mm4             ; 32 22 12 02
   212         punpckhdq   mm5,            mm4             ; 33 23 13 03
   214         pxor        mm7,            mm7
   216         movd        mm4,            [rdx]
   217         punpcklbw   mm4,            mm7
   218         paddsw      mm0,            mm4
   219         packuswb    mm0,            mm7
   220         movd        [rdx],          mm0
   222         movd        mm4,            [rdx+rdi]
   223         punpcklbw   mm4,            mm7
   224         paddsw      mm1,            mm4
   225         packuswb    mm1,            mm7
   226         movd        [rdx+rdi],      mm1
   228         movd        mm4,            [rdx+2*rdi]
   229         punpcklbw   mm4,            mm7
   230         paddsw      mm2,            mm4
   231         packuswb    mm2,            mm7
   232         movd        [rdx+rdi*2],    mm2
   234         add         rdx,            rdi
   236         movd        mm4,            [rdx+2*rdi]
   237         punpcklbw   mm4,            mm7
   238         paddsw      mm5,            mm4
   239         packuswb    mm5,            mm7
   240         movd        [rdx+rdi*2],    mm5
   242     ; begin epilog
   243     pop rdi
   244     RESTORE_GOT
   245     UNSHADOW_ARGS
   246     pop         rbp
   247     ret
   249 SECTION_RODATA
   250 align 16
   251 x_s1sqr2:
   252     times 4 dw 0x8A8C
   253 align 16
   254 x_c1sqr2less1:
   255     times 4 dw 0x4E7B
   256 align 16
   257 fours:
   258     times 4 dw 0x0004

mercurial