media/libvpx/vp8/encoder/x86/quantize_mmx.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 ;int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
    15 ;                           short *qcoeff_ptr,short *dequant_ptr,
    16 ;                           short *scan_mask, short *round_ptr,
    17 ;                           short *quant_ptr, short *dqcoeff_ptr);
    18 global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE
    19 sym(vp8_fast_quantize_b_impl_mmx):
    20     push        rbp
    21     mov         rbp, rsp
    22     SHADOW_ARGS_TO_STACK 8
    23     push rsi
    24     push rdi
    25     ; end prolog
    28         mov             rsi,        arg(0) ;coeff_ptr
    29         movq            mm0,        [rsi]
    31         mov             rax,        arg(1) ;zbin_ptr
    32         movq            mm1,        [rax]
    34         movq            mm3,        mm0
    35         psraw           mm0,        15
    37         pxor            mm3,        mm0
    38         psubw           mm3,        mm0         ; abs
    40         movq            mm2,        mm3
    41         pcmpgtw         mm1,        mm2
    43         pandn           mm1,        mm2
    44         movq            mm3,        mm1
    46         mov             rdx,        arg(6) ;quant_ptr
    47         movq            mm1,        [rdx]
    49         mov             rcx,        arg(5) ;round_ptr
    50         movq            mm2,        [rcx]
    52         paddw           mm3,        mm2
    53         pmulhuw         mm3,        mm1
    55         pxor            mm3,        mm0
    56         psubw           mm3,        mm0     ;gain the sign back
    58         mov             rdi,        arg(2) ;qcoeff_ptr
    59         movq            mm0,        mm3
    61         movq            [rdi],      mm3
    63         mov             rax,        arg(3) ;dequant_ptr
    64         movq            mm2,        [rax]
    66         pmullw          mm3,        mm2
    67         mov             rax,        arg(7) ;dqcoeff_ptr
    69         movq            [rax],      mm3
    71         ; next 8
    72         movq            mm4,        [rsi+8]
    74         mov             rax,        arg(1) ;zbin_ptr
    75         movq            mm5,        [rax+8]
    77         movq            mm7,        mm4
    78         psraw           mm4,        15
    80         pxor            mm7,        mm4
    81         psubw           mm7,        mm4         ; abs
    83         movq            mm6,        mm7
    84         pcmpgtw         mm5,        mm6
    86         pandn           mm5,        mm6
    87         movq            mm7,        mm5
    89         movq            mm5,        [rdx+8]
    90         movq            mm6,        [rcx+8]
    92         paddw           mm7,        mm6
    93         pmulhuw         mm7,        mm5
    95         pxor            mm7,        mm4
    96         psubw           mm7,        mm4;gain the sign back
    98         mov             rdi,        arg(2) ;qcoeff_ptr
   100         movq            mm1,        mm7
   101         movq            [rdi+8],    mm7
   103         mov             rax,        arg(3) ;dequant_ptr
   104         movq            mm6,        [rax+8]
   106         pmullw          mm7,        mm6
   107         mov             rax,        arg(7) ;dqcoeff_ptr
   109         movq            [rax+8],    mm7
   112                 ; next 8
   113         movq            mm4,        [rsi+16]
   115         mov             rax,        arg(1) ;zbin_ptr
   116         movq            mm5,        [rax+16]
   118         movq            mm7,        mm4
   119         psraw           mm4,        15
   121         pxor            mm7,        mm4
   122         psubw           mm7,        mm4         ; abs
   124         movq            mm6,        mm7
   125         pcmpgtw         mm5,        mm6
   127         pandn           mm5,        mm6
   128         movq            mm7,        mm5
   130         movq            mm5,        [rdx+16]
   131         movq            mm6,        [rcx+16]
   133         paddw           mm7,        mm6
   134         pmulhuw         mm7,        mm5
   136         pxor            mm7,        mm4
   137         psubw           mm7,        mm4;gain the sign back
   139         mov             rdi,        arg(2) ;qcoeff_ptr
   141         movq            mm1,        mm7
   142         movq            [rdi+16],   mm7
   144         mov             rax,        arg(3) ;dequant_ptr
   145         movq            mm6,        [rax+16]
   147         pmullw          mm7,        mm6
   148         mov             rax,        arg(7) ;dqcoeff_ptr
   150         movq            [rax+16],   mm7
   153                 ; next 8
   154         movq            mm4,        [rsi+24]
   156         mov             rax,        arg(1) ;zbin_ptr
   157         movq            mm5,        [rax+24]
   159         movq            mm7,        mm4
   160         psraw           mm4,        15
   162         pxor            mm7,        mm4
   163         psubw           mm7,        mm4         ; abs
   165         movq            mm6,        mm7
   166         pcmpgtw         mm5,        mm6
   168         pandn           mm5,        mm6
   169         movq            mm7,        mm5
   171         movq            mm5,        [rdx+24]
   172         movq            mm6,        [rcx+24]
   174         paddw           mm7,        mm6
   175         pmulhuw         mm7,        mm5
   177         pxor            mm7,        mm4
   178         psubw           mm7,        mm4;gain the sign back
   180         mov             rdi,        arg(2) ;qcoeff_ptr
   182         movq            mm1,        mm7
   183         movq            [rdi+24],   mm7
   185         mov             rax,        arg(3) ;dequant_ptr
   186         movq            mm6,        [rax+24]
   188         pmullw          mm7,        mm6
   189         mov             rax,        arg(7) ;dqcoeff_ptr
   191         movq            [rax+24],   mm7
   195         mov             rdi,        arg(4) ;scan_mask
   196         mov             rsi,        arg(2) ;qcoeff_ptr
   198         pxor            mm5,        mm5
   199         pxor            mm7,        mm7
   201         movq            mm0,        [rsi]
   202         movq            mm1,        [rsi+8]
   204         movq            mm2,        [rdi]
   205         movq            mm3,        [rdi+8];
   207         pcmpeqw         mm0,        mm7
   208         pcmpeqw         mm1,        mm7
   210         pcmpeqw         mm6,        mm6
   211         pxor            mm0,        mm6
   213         pxor            mm1,        mm6
   214         psrlw           mm0,        15
   216         psrlw           mm1,        15
   217         pmaddwd         mm0,        mm2
   219         pmaddwd         mm1,        mm3
   220         movq            mm5,        mm0
   222         paddd           mm5,        mm1
   224         movq            mm0,        [rsi+16]
   225         movq            mm1,        [rsi+24]
   227         movq            mm2,        [rdi+16]
   228         movq            mm3,        [rdi+24];
   230         pcmpeqw         mm0,        mm7
   231         pcmpeqw         mm1,        mm7
   233         pcmpeqw         mm6,        mm6
   234         pxor            mm0,        mm6
   236         pxor            mm1,        mm6
   237         psrlw           mm0,        15
   239         psrlw           mm1,        15
   240         pmaddwd         mm0,        mm2
   242         pmaddwd         mm1,        mm3
   243         paddd           mm5,        mm0
   245         paddd           mm5,        mm1
   246         movq            mm0,        mm5
   248         psrlq           mm5,        32
   249         paddd           mm0,        mm5
   251         ; eob adjustment begins here
   252         movq            rcx,        mm0
   253         and             rcx,        0xffff
   255         xor             rdx,        rdx
   256         sub             rdx,        rcx ; rdx=-rcx
   258         bsr             rax,        rcx
   259         inc             rax
   261         sar             rdx,        31
   262         and             rax,        rdx
   263         ; Substitute the sse assembly for the old mmx mixed assembly/C. The
   264         ; following is kept as reference
   265         ;    movq            rcx,        mm0
   266         ;    bsr             rax,        rcx
   267         ;
   268         ;    mov             eob,        rax
   269         ;    mov             eee,        rcx
   270         ;
   271         ;if(eee==0)
   272         ;{
   273         ;    eob=-1;
   274         ;}
   275         ;else if(eee<0)
   276         ;{
   277         ;    eob=15;
   278         ;}
   279         ;d->eob = eob+1;
   281     ; begin epilog
   282     pop rdi
   283     pop rsi
   284     UNSHADOW_ARGS
   285     pop         rbp
   286     ret

mercurial