media/libvpx/vp9/encoder/x86/vp9_error_sse2.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11 %include "third_party/x86inc/x86inc.asm"
michael@0 12
michael@0 13 SECTION .text
michael@0 14
michael@0 15 ; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
michael@0 16 ; int64_t *ssz)
michael@0 17
michael@0 18 INIT_XMM sse2
michael@0 19 cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
michael@0 20 pxor m4, m4 ; sse accumulator
michael@0 21 pxor m6, m6 ; ssz accumulator
michael@0 22 pxor m5, m5 ; dedicated zero register
michael@0 23 lea uqcq, [uqcq+sizeq*2]
michael@0 24 lea dqcq, [dqcq+sizeq*2]
michael@0 25 neg sizeq
michael@0 26 .loop:
michael@0 27 mova m2, [uqcq+sizeq*2]
michael@0 28 mova m0, [dqcq+sizeq*2]
michael@0 29 mova m3, [uqcq+sizeq*2+mmsize]
michael@0 30 mova m1, [dqcq+sizeq*2+mmsize]
michael@0 31 psubw m0, m2
michael@0 32 psubw m1, m3
michael@0 33 ; individual errors are max. 15bit+sign, so squares are 30bit, and
michael@0 34 ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
michael@0 35 pmaddwd m0, m0
michael@0 36 pmaddwd m1, m1
michael@0 37 pmaddwd m2, m2
michael@0 38 pmaddwd m3, m3
michael@0 39 ; accumulate in 64bit
michael@0 40 punpckldq m7, m0, m5
michael@0 41 punpckhdq m0, m5
michael@0 42 paddq m4, m7
michael@0 43 punpckldq m7, m1, m5
michael@0 44 paddq m4, m0
michael@0 45 punpckhdq m1, m5
michael@0 46 paddq m4, m7
michael@0 47 punpckldq m7, m2, m5
michael@0 48 paddq m4, m1
michael@0 49 punpckhdq m2, m5
michael@0 50 paddq m6, m7
michael@0 51 punpckldq m7, m3, m5
michael@0 52 paddq m6, m2
michael@0 53 punpckhdq m3, m5
michael@0 54 paddq m6, m7
michael@0 55 paddq m6, m3
michael@0 56 add sizeq, mmsize
michael@0 57 jl .loop
michael@0 58
michael@0 59 ; accumulate horizontally and store in return value
michael@0 60 movhlps m5, m4
michael@0 61 movhlps m7, m6
michael@0 62 paddq m4, m5
michael@0 63 paddq m6, m7
michael@0 64 %if ARCH_X86_64
michael@0 65 movq rax, m4
michael@0 66 movq [sszq], m6
michael@0 67 %else
michael@0 68 mov eax, sszm
michael@0 69 pshufd m5, m4, 0x1
michael@0 70 movq [eax], m6
michael@0 71 movd eax, m4
michael@0 72 movd edx, m5
michael@0 73 %endif
michael@0 74 RET

mercurial