Thu, 15 Jan 2015 15:59:08 +0100
Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | ; |
michael@0 | 2 | ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
michael@0 | 3 | ; |
michael@0 | 4 | ; Use of this source code is governed by a BSD-style license |
michael@0 | 5 | ; that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | ; tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | ; in the file PATENTS. All contributing project authors may |
michael@0 | 8 | ; be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | ; |
michael@0 | 10 | |
michael@0 | 11 | |
michael@0 | 12 | EXPORT |vp8_short_fdct4x4_neon| |
michael@0 | 13 | EXPORT |vp8_short_fdct8x4_neon| |
michael@0 | 14 | |
michael@0 | 15 | ARM |
michael@0 | 16 | REQUIRE8 |
michael@0 | 17 | PRESERVE8 |
michael@0 | 18 | |
michael@0 | 19 | AREA ||.text||, CODE, READONLY, ALIGN=4 |
michael@0 | 20 | |
michael@0 | 21 | |
michael@0 | 22 | ALIGN 16 ; enable use of @128 bit aligned loads |
michael@0 | 23 | coeff |
michael@0 | 24 | DCW 5352, 5352, 5352, 5352 |
michael@0 | 25 | DCW 2217, 2217, 2217, 2217 |
michael@0 | 26 | DCD 14500, 14500, 14500, 14500 |
michael@0 | 27 | DCD 7500, 7500, 7500, 7500 |
michael@0 | 28 | DCD 12000, 12000, 12000, 12000 |
michael@0 | 29 | DCD 51000, 51000, 51000, 51000 |
michael@0 | 30 | |
michael@0 | 31 | ;void vp8_short_fdct4x4_c(short *input, short *output, int pitch) |
michael@0 | 32 | |vp8_short_fdct4x4_neon| PROC |
michael@0 | 33 | |
michael@0 | 34 | ; Part one |
michael@0 | 35 | vld1.16 {d0}, [r0@64], r2 |
michael@0 | 36 | adr r12, coeff |
michael@0 | 37 | vld1.16 {d1}, [r0@64], r2 |
michael@0 | 38 | vld1.16 {q8}, [r12@128]! ; d16=5352, d17=2217 |
michael@0 | 39 | vld1.16 {d2}, [r0@64], r2 |
michael@0 | 40 | vld1.32 {q9, q10}, [r12@128]! ; q9=14500, q10=7500 |
michael@0 | 41 | vld1.16 {d3}, [r0@64], r2 |
michael@0 | 42 | |
michael@0 | 43 | ; transpose d0=ip[0], d1=ip[1], d2=ip[2], d3=ip[3] |
michael@0 | 44 | vtrn.32 d0, d2 |
michael@0 | 45 | vtrn.32 d1, d3 |
michael@0 | 46 | vld1.32 {q11,q12}, [r12@128] ; q11=12000, q12=51000 |
michael@0 | 47 | vtrn.16 d0, d1 |
michael@0 | 48 | vtrn.16 d2, d3 |
michael@0 | 49 | |
michael@0 | 50 | vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[3] |
michael@0 | 51 | vadd.s16 d5, d1, d2 ; b1 = ip[1] + ip[2] |
michael@0 | 52 | vsub.s16 d6, d1, d2 ; c1 = ip[1] - ip[2] |
michael@0 | 53 | vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[3] |
michael@0 | 54 | |
michael@0 | 55 | vshl.s16 q2, q2, #3 ; (a1, b1) << 3 |
michael@0 | 56 | vshl.s16 q3, q3, #3 ; (c1, d1) << 3 |
michael@0 | 57 | |
michael@0 | 58 | vadd.s16 d0, d4, d5 ; op[0] = a1 + b1 |
michael@0 | 59 | vsub.s16 d2, d4, d5 ; op[2] = a1 - b1 |
michael@0 | 60 | |
michael@0 | 61 | vmlal.s16 q9, d7, d16 ; d1*5352 + 14500 |
michael@0 | 62 | vmlal.s16 q10, d7, d17 ; d1*2217 + 7500 |
michael@0 | 63 | vmlal.s16 q9, d6, d17 ; c1*2217 + d1*5352 + 14500 |
michael@0 | 64 | vmlsl.s16 q10, d6, d16 ; d1*2217 - c1*5352 + 7500 |
michael@0 | 65 | |
michael@0 | 66 | vshrn.s32 d1, q9, #12 ; op[1] = (c1*2217 + d1*5352 + 14500)>>12 |
michael@0 | 67 | vshrn.s32 d3, q10, #12 ; op[3] = (d1*2217 - c1*5352 + 7500)>>12 |
michael@0 | 68 | |
michael@0 | 69 | |
michael@0 | 70 | ; Part two |
michael@0 | 71 | |
michael@0 | 72 | ; transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12] |
michael@0 | 73 | vtrn.32 d0, d2 |
michael@0 | 74 | vtrn.32 d1, d3 |
michael@0 | 75 | vtrn.16 d0, d1 |
michael@0 | 76 | vtrn.16 d2, d3 |
michael@0 | 77 | |
michael@0 | 78 | vmov.s16 d26, #7 |
michael@0 | 79 | |
michael@0 | 80 | vadd.s16 d4, d0, d3 ; a1 = ip[0] + ip[12] |
michael@0 | 81 | vadd.s16 d5, d1, d2 ; b1 = ip[4] + ip[8] |
michael@0 | 82 | vsub.s16 d6, d1, d2 ; c1 = ip[4] - ip[8] |
michael@0 | 83 | vadd.s16 d4, d4, d26 ; a1 + 7 |
michael@0 | 84 | vsub.s16 d7, d0, d3 ; d1 = ip[0] - ip[12] |
michael@0 | 85 | |
michael@0 | 86 | vadd.s16 d0, d4, d5 ; op[0] = a1 + b1 + 7 |
michael@0 | 87 | vsub.s16 d2, d4, d5 ; op[8] = a1 - b1 + 7 |
michael@0 | 88 | |
michael@0 | 89 | vmlal.s16 q11, d7, d16 ; d1*5352 + 12000 |
michael@0 | 90 | vmlal.s16 q12, d7, d17 ; d1*2217 + 51000 |
michael@0 | 91 | |
michael@0 | 92 | vceq.s16 d4, d7, #0 |
michael@0 | 93 | |
michael@0 | 94 | vshr.s16 d0, d0, #4 |
michael@0 | 95 | vshr.s16 d2, d2, #4 |
michael@0 | 96 | |
michael@0 | 97 | vmlal.s16 q11, d6, d17 ; c1*2217 + d1*5352 + 12000 |
michael@0 | 98 | vmlsl.s16 q12, d6, d16 ; d1*2217 - c1*5352 + 51000 |
michael@0 | 99 | |
michael@0 | 100 | vmvn d4, d4 |
michael@0 | 101 | vshrn.s32 d1, q11, #16 ; op[4] = (c1*2217 + d1*5352 + 12000)>>16 |
michael@0 | 102 | vsub.s16 d1, d1, d4 ; op[4] += (d1!=0) |
michael@0 | 103 | vshrn.s32 d3, q12, #16 ; op[12]= (d1*2217 - c1*5352 + 51000)>>16 |
michael@0 | 104 | |
michael@0 | 105 | vst1.16 {q0, q1}, [r1@128] |
michael@0 | 106 | |
michael@0 | 107 | bx lr |
michael@0 | 108 | |
michael@0 | 109 | ENDP |
michael@0 | 110 | |
michael@0 | 111 | ;void vp8_short_fdct8x4_c(short *input, short *output, int pitch) |
michael@0 | 112 | |vp8_short_fdct8x4_neon| PROC |
michael@0 | 113 | |
michael@0 | 114 | ; Part one |
michael@0 | 115 | |
michael@0 | 116 | vld1.16 {q0}, [r0@128], r2 |
michael@0 | 117 | adr r12, coeff |
michael@0 | 118 | vld1.16 {q1}, [r0@128], r2 |
michael@0 | 119 | vld1.16 {q8}, [r12@128]! ; d16=5352, d17=2217 |
michael@0 | 120 | vld1.16 {q2}, [r0@128], r2 |
michael@0 | 121 | vld1.32 {q9, q10}, [r12@128]! ; q9=14500, q10=7500 |
michael@0 | 122 | vld1.16 {q3}, [r0@128], r2 |
michael@0 | 123 | |
michael@0 | 124 | ; transpose q0=ip[0], q1=ip[1], q2=ip[2], q3=ip[3] |
michael@0 | 125 | vtrn.32 q0, q2 ; [A0|B0] |
michael@0 | 126 | vtrn.32 q1, q3 ; [A1|B1] |
michael@0 | 127 | vtrn.16 q0, q1 ; [A2|B2] |
michael@0 | 128 | vtrn.16 q2, q3 ; [A3|B3] |
michael@0 | 129 | |
michael@0 | 130 | vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[3] |
michael@0 | 131 | vadd.s16 q12, q1, q2 ; b1 = ip[1] + ip[2] |
michael@0 | 132 | vsub.s16 q13, q1, q2 ; c1 = ip[1] - ip[2] |
michael@0 | 133 | vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[3] |
michael@0 | 134 | |
michael@0 | 135 | vshl.s16 q11, q11, #3 ; a1 << 3 |
michael@0 | 136 | vshl.s16 q12, q12, #3 ; b1 << 3 |
michael@0 | 137 | vshl.s16 q13, q13, #3 ; c1 << 3 |
michael@0 | 138 | vshl.s16 q14, q14, #3 ; d1 << 3 |
michael@0 | 139 | |
michael@0 | 140 | vadd.s16 q0, q11, q12 ; [A0 | B0] = a1 + b1 |
michael@0 | 141 | vsub.s16 q2, q11, q12 ; [A2 | B2] = a1 - b1 |
michael@0 | 142 | |
michael@0 | 143 | vmov.s16 q11, q9 ; 14500 |
michael@0 | 144 | vmov.s16 q12, q10 ; 7500 |
michael@0 | 145 | |
michael@0 | 146 | vmlal.s16 q9, d28, d16 ; A[1] = d1*5352 + 14500 |
michael@0 | 147 | vmlal.s16 q10, d28, d17 ; A[3] = d1*2217 + 7500 |
michael@0 | 148 | vmlal.s16 q11, d29, d16 ; B[1] = d1*5352 + 14500 |
michael@0 | 149 | vmlal.s16 q12, d29, d17 ; B[3] = d1*2217 + 7500 |
michael@0 | 150 | |
michael@0 | 151 | vmlal.s16 q9, d26, d17 ; A[1] = c1*2217 + d1*5352 + 14500 |
michael@0 | 152 | vmlsl.s16 q10, d26, d16 ; A[3] = d1*2217 - c1*5352 + 7500 |
michael@0 | 153 | vmlal.s16 q11, d27, d17 ; B[1] = c1*2217 + d1*5352 + 14500 |
michael@0 | 154 | vmlsl.s16 q12, d27, d16 ; B[3] = d1*2217 - c1*5352 + 7500 |
michael@0 | 155 | |
michael@0 | 156 | vshrn.s32 d2, q9, #12 ; A[1] = (c1*2217 + d1*5352 + 14500)>>12 |
michael@0 | 157 | vshrn.s32 d6, q10, #12 ; A[3] = (d1*2217 - c1*5352 + 7500)>>12 |
michael@0 | 158 | vshrn.s32 d3, q11, #12 ; B[1] = (c1*2217 + d1*5352 + 14500)>>12 |
michael@0 | 159 | vshrn.s32 d7, q12, #12 ; B[3] = (d1*2217 - c1*5352 + 7500)>>12 |
michael@0 | 160 | |
michael@0 | 161 | |
michael@0 | 162 | ; Part two |
michael@0 | 163 | vld1.32 {q9,q10}, [r12@128] ; q9=12000, q10=51000 |
michael@0 | 164 | |
michael@0 | 165 | ; transpose q0=ip[0], q1=ip[4], q2=ip[8], q3=ip[12] |
michael@0 | 166 | vtrn.32 q0, q2 ; q0=[A0 | B0] |
michael@0 | 167 | vtrn.32 q1, q3 ; q1=[A4 | B4] |
michael@0 | 168 | vtrn.16 q0, q1 ; q2=[A8 | B8] |
michael@0 | 169 | vtrn.16 q2, q3 ; q3=[A12|B12] |
michael@0 | 170 | |
michael@0 | 171 | vmov.s16 q15, #7 |
michael@0 | 172 | |
michael@0 | 173 | vadd.s16 q11, q0, q3 ; a1 = ip[0] + ip[12] |
michael@0 | 174 | vadd.s16 q12, q1, q2 ; b1 = ip[4] + ip[8] |
michael@0 | 175 | vadd.s16 q11, q11, q15 ; a1 + 7 |
michael@0 | 176 | vsub.s16 q13, q1, q2 ; c1 = ip[4] - ip[8] |
michael@0 | 177 | vsub.s16 q14, q0, q3 ; d1 = ip[0] - ip[12] |
michael@0 | 178 | |
michael@0 | 179 | vadd.s16 q0, q11, q12 ; a1 + b1 + 7 |
michael@0 | 180 | vsub.s16 q1, q11, q12 ; a1 - b1 + 7 |
michael@0 | 181 | |
michael@0 | 182 | vmov.s16 q11, q9 ; 12000 |
michael@0 | 183 | vmov.s16 q12, q10 ; 51000 |
michael@0 | 184 | |
michael@0 | 185 | vshr.s16 d0, d0, #4 ; A[0] = (a1 + b1 + 7)>>4 |
michael@0 | 186 | vshr.s16 d4, d1, #4 ; B[0] = (a1 + b1 + 7)>>4 |
michael@0 | 187 | vshr.s16 d2, d2, #4 ; A[8] = (a1 + b1 + 7)>>4 |
michael@0 | 188 | vshr.s16 d6, d3, #4 ; B[8] = (a1 + b1 + 7)>>4 |
michael@0 | 189 | |
michael@0 | 190 | |
michael@0 | 191 | vmlal.s16 q9, d28, d16 ; A[4] = d1*5352 + 12000 |
michael@0 | 192 | vmlal.s16 q10, d28, d17 ; A[12] = d1*2217 + 51000 |
michael@0 | 193 | vmlal.s16 q11, d29, d16 ; B[4] = d1*5352 + 12000 |
michael@0 | 194 | vmlal.s16 q12, d29, d17 ; B[12] = d1*2217 + 51000 |
michael@0 | 195 | |
michael@0 | 196 | vceq.s16 q14, q14, #0 |
michael@0 | 197 | |
michael@0 | 198 | vmlal.s16 q9, d26, d17 ; A[4] = c1*2217 + d1*5352 + 12000 |
michael@0 | 199 | vmlsl.s16 q10, d26, d16 ; A[12] = d1*2217 - c1*5352 + 51000 |
michael@0 | 200 | vmlal.s16 q11, d27, d17 ; B[4] = c1*2217 + d1*5352 + 12000 |
michael@0 | 201 | vmlsl.s16 q12, d27, d16 ; B[12] = d1*2217 - c1*5352 + 51000 |
michael@0 | 202 | |
michael@0 | 203 | vmvn q14, q14 |
michael@0 | 204 | |
michael@0 | 205 | vshrn.s32 d1, q9, #16 ; A[4] = (c1*2217 + d1*5352 + 12000)>>16 |
michael@0 | 206 | vshrn.s32 d3, q10, #16 ; A[12]= (d1*2217 - c1*5352 + 51000)>>16 |
michael@0 | 207 | vsub.s16 d1, d1, d28 ; A[4] += (d1!=0) |
michael@0 | 208 | |
michael@0 | 209 | vshrn.s32 d5, q11, #16 ; B[4] = (c1*2217 + d1*5352 + 12000)>>16 |
michael@0 | 210 | vshrn.s32 d7, q12, #16 ; B[12]= (d1*2217 - c1*5352 + 51000)>>16 |
michael@0 | 211 | vsub.s16 d5, d5, d29 ; B[4] += (d1!=0) |
michael@0 | 212 | |
michael@0 | 213 | vst1.16 {q0, q1}, [r1@128]! ; block A |
michael@0 | 214 | vst1.16 {q2, q3}, [r1@128]! ; block B |
michael@0 | 215 | |
michael@0 | 216 | bx lr |
michael@0 | 217 | |
michael@0 | 218 | ENDP |
michael@0 | 219 | |
michael@0 | 220 | END |
michael@0 | 221 |