The Tor Browser: media/libvpx/vp8/common/x86/postproc

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

1 ;

     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 ;

     4 ;  Use of this source code is governed by a BSD-style license

     5 ;  that can be found in the LICENSE file in the root of the source

     6 ;  tree. An additional intellectual property rights grant can be found

     7 ;  in the file PATENTS.  All contributing project authors may

     8 ;  be found in the AUTHORS file in the root of the source tree.

9 ;

    12 %include "vpx_ports/x86_abi_support.asm"

    14 %define VP8_FILTER_WEIGHT 128

    15 %define VP8_FILTER_SHIFT  7

    17 ;void vp8_mbpost_proc_down_mmx(unsigned char *dst,

    18 ;                             int pitch, int rows, int cols,int flimit)

    19 extern sym(vp8_rv)

    20 global sym(vp8_mbpost_proc_down_mmx) PRIVATE

    21 sym(vp8_mbpost_proc_down_mmx):

    22     push        rbp

    23     mov         rbp, rsp

    24     SHADOW_ARGS_TO_STACK 5

    25     GET_GOT     rbx

    26     push        rsi

    27     push        rdi

    28     ; end prolog

    30     ALIGN_STACK 16, rax

    31     sub         rsp, 136

    33     ; unsigned char d[16][8] at [rsp]

    34     ; create flimit2 at [rsp+128]

    35     mov         eax, dword ptr arg(4) ;flimit

    36     mov         [rsp+128], eax

    37     mov         [rsp+128+4], eax

    38 %define flimit2 [rsp+128]

    40 %if ABI_IS_32BIT=0

    41     lea         r8,       [GLOBAL(sym(vp8_rv))]

    42 %endif

    44     ;rows +=8;

    45     add         dword ptr arg(2), 8

    47     ;for(c=0; c<cols; c+=4)

    48 .loop_col:

    49             mov         rsi,        arg(0)  ;s

    50             pxor        mm0,        mm0     ;

    52             movsxd      rax,        dword ptr arg(1) ;pitch       ;

    54             ; this copies the last row down into the border 8 rows

    55             mov         rdi,        rsi

    56             mov         rdx,        arg(2)

    57             sub         rdx,        9

    58             imul        rdx,        rax

    59             lea         rdi,        [rdi+rdx]

    60             movq        mm1,        QWORD ptr[rdi]              ; first row

    61             mov         rcx,        8

    62 .init_borderd                                                    ; initialize borders

    63             lea         rdi,        [rdi + rax]

    64             movq        [rdi],      mm1

    66             dec         rcx

    67             jne         .init_borderd

    69             neg         rax                                     ; rax = -pitch

    71             ; this copies the first row up into the border 8 rows

    72             mov         rdi,        rsi

    73             movq        mm1,        QWORD ptr[rdi]              ; first row

    74             mov         rcx,        8

    75 .init_border                                                    ; initialize borders

    76             lea         rdi,        [rdi + rax]

    77             movq        [rdi],      mm1

    79             dec         rcx

    80             jne         .init_border

    83             lea         rsi,        [rsi + rax*8];              ; rdi = s[-pitch*8]

    84             neg         rax

    87             pxor        mm5,        mm5

    88             pxor        mm6,        mm6     ;

    90             pxor        mm7,        mm7     ;

    91             mov         rdi,        rsi

    93             mov         rcx,        15          ;

    95 .loop_initvar:

    96             movd        mm1,        DWORD PTR [rdi];

    97             punpcklbw   mm1,        mm0     ;

    99             paddw       mm5,        mm1     ;

   100             pmullw      mm1,        mm1     ;

   102             movq        mm2,        mm1     ;

   103             punpcklwd   mm1,        mm0     ;

   105             punpckhwd   mm2,        mm0     ;

   106             paddd       mm6,        mm1     ;

   108             paddd       mm7,        mm2     ;

   109             lea         rdi,        [rdi+rax]   ;

   111             dec         rcx

   112             jne         .loop_initvar

   113             ;save the var and sum

   114             xor         rdx,        rdx

   115 .loop_row:

   116             movd        mm1,        DWORD PTR [rsi]     ; [s-pitch*8]

   117             movd        mm2,        DWORD PTR [rdi]     ; [s+pitch*7]

   119             punpcklbw   mm1,        mm0

   120             punpcklbw   mm2,        mm0

   122             paddw       mm5,        mm2

   123             psubw       mm5,        mm1

   125             pmullw      mm2,        mm2

   126             movq        mm4,        mm2

   128             punpcklwd   mm2,        mm0

   129             punpckhwd   mm4,        mm0

   131             paddd       mm6,        mm2

   132             paddd       mm7,        mm4

   134             pmullw      mm1,        mm1

   135             movq        mm2,        mm1

   137             punpcklwd   mm1,        mm0

   138             psubd       mm6,        mm1

   140             punpckhwd   mm2,        mm0

   141             psubd       mm7,        mm2

   144             movq        mm3,        mm6

   145             pslld       mm3,        4

   147             psubd       mm3,        mm6

   148             movq        mm1,        mm5

   150             movq        mm4,        mm5

   151             pmullw      mm1,        mm1

   153             pmulhw      mm4,        mm4

   154             movq        mm2,        mm1

   156             punpcklwd   mm1,        mm4

   157             punpckhwd   mm2,        mm4

   159             movq        mm4,        mm7

   160             pslld       mm4,        4

   162             psubd       mm4,        mm7

   164             psubd       mm3,        mm1

   165             psubd       mm4,        mm2

   167             psubd       mm3,        flimit2

   168             psubd       mm4,        flimit2

   170             psrad       mm3,        31

   171             psrad       mm4,        31

   173             packssdw    mm3,        mm4

   174             packsswb    mm3,        mm0

   176             movd        mm1,        DWORD PTR [rsi+rax*8]

   178             movq        mm2,        mm1

   179             punpcklbw   mm1,        mm0

   181             paddw       mm1,        mm5

   182             mov         rcx,        rdx

   184             and         rcx,        127

   185 %if ABI_IS_32BIT=1 && CONFIG_PIC=1

   186             push        rax

   187             lea         rax,        [GLOBAL(sym(vp8_rv))]

   188             movq        mm4,        [rax + rcx*2] ;vp8_rv[rcx*2]

   189             pop         rax

   190 %elif ABI_IS_32BIT=0

   191             movq        mm4,        [r8 + rcx*2] ;vp8_rv[rcx*2]

   192 %else

   193             movq        mm4,        [sym(vp8_rv) + rcx*2]

   194 %endif

   195             paddw       mm1,        mm4

   196             psraw       mm1,        4

   198             packuswb    mm1,        mm0

   199             pand        mm1,        mm3

   201             pandn       mm3,        mm2

   202             por         mm1,        mm3

   204             and         rcx,        15

   205             movd        DWORD PTR   [rsp+rcx*4], mm1 ;d[rcx*4]

   207             mov         rcx,        rdx

   208             sub         rcx,        8

   210             and         rcx,        15

   211             movd        mm1,        DWORD PTR [rsp+rcx*4] ;d[rcx*4]

   213             movd        [rsi],      mm1

   214             lea         rsi,        [rsi+rax]

   216             lea         rdi,        [rdi+rax]

   217             add         rdx,        1

   219             cmp         edx,        dword arg(2) ;rows

   220             jl          .loop_row

   223         add         dword arg(0), 4 ; s += 4

   224         sub         dword arg(3), 4 ; cols -= 4

   225         cmp         dword arg(3), 0

   226         jg          .loop_col

   228     add         rsp, 136

   229     pop         rsp

   231     ; begin epilog

   232     pop rdi

   233     pop rsi

   234     RESTORE_GOT

   235     UNSHADOW_ARGS

   236     pop         rbp

   237     ret

   238 %undef flimit2

   241 ;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,

   242 ;                            unsigned char blackclamp[16],

   243 ;                            unsigned char whiteclamp[16],

   244 ;                            unsigned char bothclamp[16],

   245 ;                            unsigned int Width, unsigned int Height, int Pitch)

   246 extern sym(rand)

   247 global sym(vp8_plane_add_noise_mmx) PRIVATE

   248 sym(vp8_plane_add_noise_mmx):

   249     push        rbp

   250     mov         rbp, rsp

   251     SHADOW_ARGS_TO_STACK 8

   252     GET_GOT     rbx

   253     push        rsi

   254     push        rdi

   255     ; end prolog

   257 .addnoise_loop:

   258     call sym(rand) WRT_PLT

   259     mov     rcx, arg(1) ;noise

   260     and     rax, 0xff

   261     add     rcx, rax

   263     ; we rely on the fact that the clamping vectors are stored contiguously

   264     ; in black/white/both order. Note that we have to reload this here because

   265     ; rdx could be trashed by rand()

   266     mov     rdx, arg(2) ; blackclamp

   269             mov     rdi, rcx

   270             movsxd  rcx, dword arg(5) ;[Width]

   271             mov     rsi, arg(0) ;Pos

   272             xor         rax,rax

   274 .addnoise_nextset:

   275             movq        mm1,[rsi+rax]         ; get the source

   277             psubusb     mm1, [rdx]    ;blackclamp        ; clamp both sides so we don't outrange adding noise

   278             paddusb     mm1, [rdx+32] ;bothclamp

   279             psubusb     mm1, [rdx+16] ;whiteclamp

   281             movq        mm2,[rdi+rax]         ; get the noise for this line

   282             paddb       mm1,mm2              ; add it in

   283             movq        [rsi+rax],mm1         ; store the result

   285             add         rax,8                 ; move to the next line

   287             cmp         rax, rcx

   288             jl          .addnoise_nextset

   290     movsxd  rax, dword arg(7) ; Pitch

   291     add     arg(0), rax ; Start += Pitch

   292     sub     dword arg(6), 1   ; Height -= 1

   293     jg      .addnoise_loop

   295     ; begin epilog

   296     pop rdi

   297     pop rsi

   298     RESTORE_GOT

   299     UNSHADOW_ARGS

   300     pop         rbp

   301     ret

   304 SECTION_RODATA

   305 align 16

   306 Blur:

   307     times 16 dw 16

   308     times  8 dw 64

   309     times 16 dw 16

   310     times  8 dw  0

   312 rd:

   313     times 4 dw 0x40

The Tor Browser / file revision

media/libvpx/vp8/common/x86/postproc_mmx.asm@ac0c01689b40

media/libvpx/vp8/common/x86/postproc_mmx.asm