Thu, 15 Jan 2015 15:59:08 +0100
Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | ; |
michael@0 | 2 | ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
michael@0 | 3 | ; |
michael@0 | 4 | ; Use of this source code is governed by a BSD-style license |
michael@0 | 5 | ; that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | ; tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | ; in the file PATENTS. All contributing project authors may |
michael@0 | 8 | ; be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | ; |
michael@0 | 10 | |
michael@0 | 11 | %include "third_party/x86inc/x86inc.asm" |
michael@0 | 12 | |
michael@0 | 13 | SECTION_RODATA |
michael@0 | 14 | pw_1: times 8 dw 1 |
michael@0 | 15 | |
michael@0 | 16 | SECTION .text |
michael@0 | 17 | |
michael@0 | 18 | %macro QUANTIZE_FN 2 |
michael@0 | 19 | cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
michael@0 | 20 | shift, qcoeff, dqcoeff, dequant, zbin_oq, \ |
michael@0 | 21 | eob, scan, iscan |
michael@0 | 22 | cmp dword skipm, 0 |
michael@0 | 23 | jne .blank |
michael@0 | 24 | |
michael@0 | 25 | ; actual quantize loop - setup pointers, rounders, etc. |
michael@0 | 26 | movifnidn coeffq, coeffmp |
michael@0 | 27 | movifnidn ncoeffq, ncoeffmp |
michael@0 | 28 | mov r2, dequantmp |
michael@0 | 29 | movifnidn zbinq, zbinmp |
michael@0 | 30 | movifnidn roundq, roundmp |
michael@0 | 31 | movifnidn quantq, quantmp |
michael@0 | 32 | movd m4, dword zbin_oqm ; m4 = zbin_oq |
michael@0 | 33 | mova m0, [zbinq] ; m0 = zbin |
michael@0 | 34 | punpcklwd m4, m4 |
michael@0 | 35 | mova m1, [roundq] ; m1 = round |
michael@0 | 36 | pshufd m4, m4, 0 |
michael@0 | 37 | mova m2, [quantq] ; m2 = quant |
michael@0 | 38 | paddw m0, m4 ; m0 = zbin + zbin_oq |
michael@0 | 39 | %ifidn %1, b_32x32 |
michael@0 | 40 | pcmpeqw m5, m5 |
michael@0 | 41 | psrlw m5, 15 |
michael@0 | 42 | paddw m0, m5 |
michael@0 | 43 | paddw m1, m5 |
michael@0 | 44 | psrlw m0, 1 ; m0 = (m0 + 1) / 2 |
michael@0 | 45 | psrlw m1, 1 ; m1 = (m1 + 1) / 2 |
michael@0 | 46 | %endif |
michael@0 | 47 | mova m3, [r2q] ; m3 = dequant |
michael@0 | 48 | psubw m0, [pw_1] |
michael@0 | 49 | mov r2, shiftmp |
michael@0 | 50 | mov r3, qcoeffmp |
michael@0 | 51 | mova m4, [r2] ; m4 = shift |
michael@0 | 52 | mov r4, dqcoeffmp |
michael@0 | 53 | mov r5, iscanmp |
michael@0 | 54 | %ifidn %1, b_32x32 |
michael@0 | 55 | psllw m4, 1 |
michael@0 | 56 | %endif |
michael@0 | 57 | pxor m5, m5 ; m5 = dedicated zero |
michael@0 | 58 | DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob |
michael@0 | 59 | lea coeffq, [ coeffq+ncoeffq*2] |
michael@0 | 60 | lea iscanq, [ iscanq+ncoeffq*2] |
michael@0 | 61 | lea qcoeffq, [ qcoeffq+ncoeffq*2] |
michael@0 | 62 | lea dqcoeffq, [dqcoeffq+ncoeffq*2] |
michael@0 | 63 | neg ncoeffq |
michael@0 | 64 | |
michael@0 | 65 | ; get DC and first 15 AC coeffs |
michael@0 | 66 | mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] |
michael@0 | 67 | mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] |
michael@0 | 68 | pabsw m6, m9 ; m6 = abs(m9) |
michael@0 | 69 | pabsw m11, m10 ; m11 = abs(m10) |
michael@0 | 70 | pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin |
michael@0 | 71 | punpckhqdq m0, m0 |
michael@0 | 72 | pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin |
michael@0 | 73 | paddsw m6, m1 ; m6 += round |
michael@0 | 74 | punpckhqdq m1, m1 |
michael@0 | 75 | paddsw m11, m1 ; m11 += round |
michael@0 | 76 | pmulhw m8, m6, m2 ; m8 = m6*q>>16 |
michael@0 | 77 | punpckhqdq m2, m2 |
michael@0 | 78 | pmulhw m13, m11, m2 ; m13 = m11*q>>16 |
michael@0 | 79 | paddw m8, m6 ; m8 += m6 |
michael@0 | 80 | paddw m13, m11 ; m13 += m11 |
michael@0 | 81 | pmulhw m8, m4 ; m8 = m8*qsh>>16 |
michael@0 | 82 | punpckhqdq m4, m4 |
michael@0 | 83 | pmulhw m13, m4 ; m13 = m13*qsh>>16 |
michael@0 | 84 | psignw m8, m9 ; m8 = reinsert sign |
michael@0 | 85 | psignw m13, m10 ; m13 = reinsert sign |
michael@0 | 86 | pand m8, m7 |
michael@0 | 87 | pand m13, m12 |
michael@0 | 88 | mova [qcoeffq+ncoeffq*2+ 0], m8 |
michael@0 | 89 | mova [qcoeffq+ncoeffq*2+16], m13 |
michael@0 | 90 | %ifidn %1, b_32x32 |
michael@0 | 91 | pabsw m8, m8 |
michael@0 | 92 | pabsw m13, m13 |
michael@0 | 93 | %endif |
michael@0 | 94 | pmullw m8, m3 ; dqc[i] = qc[i] * q |
michael@0 | 95 | punpckhqdq m3, m3 |
michael@0 | 96 | pmullw m13, m3 ; dqc[i] = qc[i] * q |
michael@0 | 97 | %ifidn %1, b_32x32 |
michael@0 | 98 | psrlw m8, 1 |
michael@0 | 99 | psrlw m13, 1 |
michael@0 | 100 | psignw m8, m9 |
michael@0 | 101 | psignw m13, m10 |
michael@0 | 102 | %endif |
michael@0 | 103 | mova [dqcoeffq+ncoeffq*2+ 0], m8 |
michael@0 | 104 | mova [dqcoeffq+ncoeffq*2+16], m13 |
michael@0 | 105 | pcmpeqw m8, m5 ; m8 = c[i] == 0 |
michael@0 | 106 | pcmpeqw m13, m5 ; m13 = c[i] == 0 |
michael@0 | 107 | mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] |
michael@0 | 108 | mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] |
michael@0 | 109 | psubw m6, m7 ; m6 = scan[i] + 1 |
michael@0 | 110 | psubw m11, m12 ; m11 = scan[i] + 1 |
michael@0 | 111 | pandn m8, m6 ; m8 = max(eob) |
michael@0 | 112 | pandn m13, m11 ; m13 = max(eob) |
michael@0 | 113 | pmaxsw m8, m13 |
michael@0 | 114 | add ncoeffq, mmsize |
michael@0 | 115 | jz .accumulate_eob |
michael@0 | 116 | |
michael@0 | 117 | .ac_only_loop: |
michael@0 | 118 | mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] |
michael@0 | 119 | mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] |
michael@0 | 120 | pabsw m6, m9 ; m6 = abs(m9) |
michael@0 | 121 | pabsw m11, m10 ; m11 = abs(m10) |
michael@0 | 122 | pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin |
michael@0 | 123 | pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin |
michael@0 | 124 | %ifidn %1, b_32x32 |
michael@0 | 125 | pmovmskb r6, m7 |
michael@0 | 126 | pmovmskb r2, m12 |
michael@0 | 127 | or r6, r2 |
michael@0 | 128 | jz .skip_iter |
michael@0 | 129 | %endif |
michael@0 | 130 | paddsw m6, m1 ; m6 += round |
michael@0 | 131 | paddsw m11, m1 ; m11 += round |
michael@0 | 132 | pmulhw m14, m6, m2 ; m14 = m6*q>>16 |
michael@0 | 133 | pmulhw m13, m11, m2 ; m13 = m11*q>>16 |
michael@0 | 134 | paddw m14, m6 ; m14 += m6 |
michael@0 | 135 | paddw m13, m11 ; m13 += m11 |
michael@0 | 136 | pmulhw m14, m4 ; m14 = m14*qsh>>16 |
michael@0 | 137 | pmulhw m13, m4 ; m13 = m13*qsh>>16 |
michael@0 | 138 | psignw m14, m9 ; m14 = reinsert sign |
michael@0 | 139 | psignw m13, m10 ; m13 = reinsert sign |
michael@0 | 140 | pand m14, m7 |
michael@0 | 141 | pand m13, m12 |
michael@0 | 142 | mova [qcoeffq+ncoeffq*2+ 0], m14 |
michael@0 | 143 | mova [qcoeffq+ncoeffq*2+16], m13 |
michael@0 | 144 | %ifidn %1, b_32x32 |
michael@0 | 145 | pabsw m14, m14 |
michael@0 | 146 | pabsw m13, m13 |
michael@0 | 147 | %endif |
michael@0 | 148 | pmullw m14, m3 ; dqc[i] = qc[i] * q |
michael@0 | 149 | pmullw m13, m3 ; dqc[i] = qc[i] * q |
michael@0 | 150 | %ifidn %1, b_32x32 |
michael@0 | 151 | psrlw m14, 1 |
michael@0 | 152 | psrlw m13, 1 |
michael@0 | 153 | psignw m14, m9 |
michael@0 | 154 | psignw m13, m10 |
michael@0 | 155 | %endif |
michael@0 | 156 | mova [dqcoeffq+ncoeffq*2+ 0], m14 |
michael@0 | 157 | mova [dqcoeffq+ncoeffq*2+16], m13 |
michael@0 | 158 | pcmpeqw m14, m5 ; m14 = c[i] == 0 |
michael@0 | 159 | pcmpeqw m13, m5 ; m13 = c[i] == 0 |
michael@0 | 160 | mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] |
michael@0 | 161 | mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] |
michael@0 | 162 | psubw m6, m7 ; m6 = scan[i] + 1 |
michael@0 | 163 | psubw m11, m12 ; m11 = scan[i] + 1 |
michael@0 | 164 | pandn m14, m6 ; m14 = max(eob) |
michael@0 | 165 | pandn m13, m11 ; m13 = max(eob) |
michael@0 | 166 | pmaxsw m8, m14 |
michael@0 | 167 | pmaxsw m8, m13 |
michael@0 | 168 | add ncoeffq, mmsize |
michael@0 | 169 | jl .ac_only_loop |
michael@0 | 170 | |
michael@0 | 171 | %ifidn %1, b_32x32 |
michael@0 | 172 | jmp .accumulate_eob |
michael@0 | 173 | .skip_iter: |
michael@0 | 174 | mova [qcoeffq+ncoeffq*2+ 0], m5 |
michael@0 | 175 | mova [qcoeffq+ncoeffq*2+16], m5 |
michael@0 | 176 | mova [dqcoeffq+ncoeffq*2+ 0], m5 |
michael@0 | 177 | mova [dqcoeffq+ncoeffq*2+16], m5 |
michael@0 | 178 | add ncoeffq, mmsize |
michael@0 | 179 | jl .ac_only_loop |
michael@0 | 180 | %endif |
michael@0 | 181 | |
michael@0 | 182 | .accumulate_eob: |
michael@0 | 183 | ; horizontally accumulate/max eobs and write into [eob] memory pointer |
michael@0 | 184 | mov r2, eobmp |
michael@0 | 185 | pshufd m7, m8, 0xe |
michael@0 | 186 | pmaxsw m8, m7 |
michael@0 | 187 | pshuflw m7, m8, 0xe |
michael@0 | 188 | pmaxsw m8, m7 |
michael@0 | 189 | pshuflw m7, m8, 0x1 |
michael@0 | 190 | pmaxsw m8, m7 |
michael@0 | 191 | pextrw [r2], m8, 0 |
michael@0 | 192 | RET |
michael@0 | 193 | |
michael@0 | 194 | ; skip-block, i.e. just write all zeroes |
michael@0 | 195 | .blank: |
michael@0 | 196 | mov r0, dqcoeffmp |
michael@0 | 197 | movifnidn ncoeffq, ncoeffmp |
michael@0 | 198 | mov r2, qcoeffmp |
michael@0 | 199 | mov r3, eobmp |
michael@0 | 200 | DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob |
michael@0 | 201 | lea dqcoeffq, [dqcoeffq+ncoeffq*2] |
michael@0 | 202 | lea qcoeffq, [ qcoeffq+ncoeffq*2] |
michael@0 | 203 | neg ncoeffq |
michael@0 | 204 | pxor m7, m7 |
michael@0 | 205 | .blank_loop: |
michael@0 | 206 | mova [dqcoeffq+ncoeffq*2+ 0], m7 |
michael@0 | 207 | mova [dqcoeffq+ncoeffq*2+16], m7 |
michael@0 | 208 | mova [qcoeffq+ncoeffq*2+ 0], m7 |
michael@0 | 209 | mova [qcoeffq+ncoeffq*2+16], m7 |
michael@0 | 210 | add ncoeffq, mmsize |
michael@0 | 211 | jl .blank_loop |
michael@0 | 212 | mov word [eobq], 0 |
michael@0 | 213 | RET |
michael@0 | 214 | %endmacro |
michael@0 | 215 | |
michael@0 | 216 | INIT_XMM ssse3 |
michael@0 | 217 | QUANTIZE_FN b, 6 |
michael@0 | 218 | QUANTIZE_FN b_32x32, 7 |