media/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11 %include "third_party/x86inc/x86inc.asm"
michael@0 12
michael@0 13 SECTION_RODATA
michael@0 14 pw_4: times 8 dw 4
michael@0 15 pw_8: times 8 dw 8
michael@0 16 pw_16: times 8 dw 16
michael@0 17 pw_32: times 8 dw 32
michael@0 18
michael@0 19 SECTION .text
michael@0 20
michael@0 21 INIT_MMX sse
michael@0 22 cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
michael@0 23 GET_GOT goffsetq
michael@0 24
michael@0 25 pxor m1, m1
michael@0 26 movd m0, [aboveq]
michael@0 27 punpckldq m0, [leftq]
michael@0 28 psadbw m0, m1
michael@0 29 paddw m0, [GLOBAL(pw_4)]
michael@0 30 psraw m0, 3
michael@0 31 pshufw m0, m0, 0x0
michael@0 32 packuswb m0, m0
michael@0 33 movd [dstq ], m0
michael@0 34 movd [dstq+strideq], m0
michael@0 35 lea dstq, [dstq+strideq*2]
michael@0 36 movd [dstq ], m0
michael@0 37 movd [dstq+strideq], m0
michael@0 38
michael@0 39 RESTORE_GOT
michael@0 40 RET
michael@0 41
michael@0 42 INIT_MMX sse
michael@0 43 cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
michael@0 44 GET_GOT goffsetq
michael@0 45
michael@0 46 pxor m1, m1
michael@0 47 movq m0, [aboveq]
michael@0 48 movq m2, [leftq]
michael@0 49 DEFINE_ARGS dst, stride, stride3
michael@0 50 lea stride3q, [strideq*3]
michael@0 51 psadbw m0, m1
michael@0 52 psadbw m2, m1
michael@0 53 paddw m0, m2
michael@0 54 paddw m0, [GLOBAL(pw_8)]
michael@0 55 psraw m0, 4
michael@0 56 pshufw m0, m0, 0x0
michael@0 57 packuswb m0, m0
michael@0 58 movq [dstq ], m0
michael@0 59 movq [dstq+strideq ], m0
michael@0 60 movq [dstq+strideq*2], m0
michael@0 61 movq [dstq+stride3q ], m0
michael@0 62 lea dstq, [dstq+strideq*4]
michael@0 63 movq [dstq ], m0
michael@0 64 movq [dstq+strideq ], m0
michael@0 65 movq [dstq+strideq*2], m0
michael@0 66 movq [dstq+stride3q ], m0
michael@0 67
michael@0 68 RESTORE_GOT
michael@0 69 RET
michael@0 70
michael@0 71 INIT_XMM sse2
michael@0 72 cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
michael@0 73 GET_GOT goffsetq
michael@0 74
michael@0 75 pxor m1, m1
michael@0 76 mova m0, [aboveq]
michael@0 77 mova m2, [leftq]
michael@0 78 DEFINE_ARGS dst, stride, stride3, lines4
michael@0 79 lea stride3q, [strideq*3]
michael@0 80 mov lines4d, 4
michael@0 81 psadbw m0, m1
michael@0 82 psadbw m2, m1
michael@0 83 paddw m0, m2
michael@0 84 movhlps m2, m0
michael@0 85 paddw m0, m2
michael@0 86 paddw m0, [GLOBAL(pw_16)]
michael@0 87 psraw m0, 5
michael@0 88 pshuflw m0, m0, 0x0
michael@0 89 punpcklqdq m0, m0
michael@0 90 packuswb m0, m0
michael@0 91 .loop:
michael@0 92 mova [dstq ], m0
michael@0 93 mova [dstq+strideq ], m0
michael@0 94 mova [dstq+strideq*2], m0
michael@0 95 mova [dstq+stride3q ], m0
michael@0 96 lea dstq, [dstq+strideq*4]
michael@0 97 dec lines4d
michael@0 98 jnz .loop
michael@0 99
michael@0 100 RESTORE_GOT
michael@0 101 REP_RET
michael@0 102
michael@0 103 INIT_XMM sse2
michael@0 104 cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
michael@0 105 GET_GOT goffsetq
michael@0 106
michael@0 107 pxor m1, m1
michael@0 108 mova m0, [aboveq]
michael@0 109 mova m2, [aboveq+16]
michael@0 110 mova m3, [leftq]
michael@0 111 mova m4, [leftq+16]
michael@0 112 DEFINE_ARGS dst, stride, stride3, lines4
michael@0 113 lea stride3q, [strideq*3]
michael@0 114 mov lines4d, 8
michael@0 115 psadbw m0, m1
michael@0 116 psadbw m2, m1
michael@0 117 psadbw m3, m1
michael@0 118 psadbw m4, m1
michael@0 119 paddw m0, m2
michael@0 120 paddw m0, m3
michael@0 121 paddw m0, m4
michael@0 122 movhlps m2, m0
michael@0 123 paddw m0, m2
michael@0 124 paddw m0, [GLOBAL(pw_32)]
michael@0 125 psraw m0, 6
michael@0 126 pshuflw m0, m0, 0x0
michael@0 127 punpcklqdq m0, m0
michael@0 128 packuswb m0, m0
michael@0 129 .loop:
michael@0 130 mova [dstq ], m0
michael@0 131 mova [dstq +16], m0
michael@0 132 mova [dstq+strideq ], m0
michael@0 133 mova [dstq+strideq +16], m0
michael@0 134 mova [dstq+strideq*2 ], m0
michael@0 135 mova [dstq+strideq*2+16], m0
michael@0 136 mova [dstq+stride3q ], m0
michael@0 137 mova [dstq+stride3q +16], m0
michael@0 138 lea dstq, [dstq+strideq*4]
michael@0 139 dec lines4d
michael@0 140 jnz .loop
michael@0 141
michael@0 142 RESTORE_GOT
michael@0 143 REP_RET
michael@0 144
michael@0 145 INIT_MMX sse
michael@0 146 cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
michael@0 147 movd m0, [aboveq]
michael@0 148 movd [dstq ], m0
michael@0 149 movd [dstq+strideq], m0
michael@0 150 lea dstq, [dstq+strideq*2]
michael@0 151 movd [dstq ], m0
michael@0 152 movd [dstq+strideq], m0
michael@0 153 RET
michael@0 154
michael@0 155 INIT_MMX sse
michael@0 156 cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above
michael@0 157 movq m0, [aboveq]
michael@0 158 DEFINE_ARGS dst, stride, stride3
michael@0 159 lea stride3q, [strideq*3]
michael@0 160 movq [dstq ], m0
michael@0 161 movq [dstq+strideq ], m0
michael@0 162 movq [dstq+strideq*2], m0
michael@0 163 movq [dstq+stride3q ], m0
michael@0 164 lea dstq, [dstq+strideq*4]
michael@0 165 movq [dstq ], m0
michael@0 166 movq [dstq+strideq ], m0
michael@0 167 movq [dstq+strideq*2], m0
michael@0 168 movq [dstq+stride3q ], m0
michael@0 169 RET
michael@0 170
michael@0 171 INIT_XMM sse2
michael@0 172 cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above
michael@0 173 mova m0, [aboveq]
michael@0 174 DEFINE_ARGS dst, stride, stride3, nlines4
michael@0 175 lea stride3q, [strideq*3]
michael@0 176 mov nlines4d, 4
michael@0 177 .loop:
michael@0 178 mova [dstq ], m0
michael@0 179 mova [dstq+strideq ], m0
michael@0 180 mova [dstq+strideq*2], m0
michael@0 181 mova [dstq+stride3q ], m0
michael@0 182 lea dstq, [dstq+strideq*4]
michael@0 183 dec nlines4d
michael@0 184 jnz .loop
michael@0 185 REP_RET
michael@0 186
michael@0 187 INIT_XMM sse2
michael@0 188 cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above
michael@0 189 mova m0, [aboveq]
michael@0 190 mova m1, [aboveq+16]
michael@0 191 DEFINE_ARGS dst, stride, stride3, nlines4
michael@0 192 lea stride3q, [strideq*3]
michael@0 193 mov nlines4d, 8
michael@0 194 .loop:
michael@0 195 mova [dstq ], m0
michael@0 196 mova [dstq +16], m1
michael@0 197 mova [dstq+strideq ], m0
michael@0 198 mova [dstq+strideq +16], m1
michael@0 199 mova [dstq+strideq*2 ], m0
michael@0 200 mova [dstq+strideq*2+16], m1
michael@0 201 mova [dstq+stride3q ], m0
michael@0 202 mova [dstq+stride3q +16], m1
michael@0 203 lea dstq, [dstq+strideq*4]
michael@0 204 dec nlines4d
michael@0 205 jnz .loop
michael@0 206 REP_RET
michael@0 207
michael@0 208 INIT_MMX sse
michael@0 209 cglobal tm_predictor_4x4, 4, 4, 4, dst, stride, above, left
michael@0 210 pxor m1, m1
michael@0 211 movd m2, [aboveq-1]
michael@0 212 movd m0, [aboveq]
michael@0 213 punpcklbw m2, m1
michael@0 214 punpcklbw m0, m1
michael@0 215 pshufw m2, m2, 0x0
michael@0 216 DEFINE_ARGS dst, stride, line, left
michael@0 217 mov lineq, -2
michael@0 218 add leftq, 4
michael@0 219 psubw m0, m2
michael@0 220 .loop:
michael@0 221 movd m2, [leftq+lineq*2]
michael@0 222 movd m3, [leftq+lineq*2+1]
michael@0 223 punpcklbw m2, m1
michael@0 224 punpcklbw m3, m1
michael@0 225 pshufw m2, m2, 0x0
michael@0 226 pshufw m3, m3, 0x0
michael@0 227 paddw m2, m0
michael@0 228 paddw m3, m0
michael@0 229 packuswb m2, m2
michael@0 230 packuswb m3, m3
michael@0 231 movd [dstq ], m2
michael@0 232 movd [dstq+strideq], m3
michael@0 233 lea dstq, [dstq+strideq*2]
michael@0 234 inc lineq
michael@0 235 jnz .loop
michael@0 236 REP_RET
michael@0 237
michael@0 238 INIT_XMM sse2
michael@0 239 cglobal tm_predictor_8x8, 4, 4, 4, dst, stride, above, left
michael@0 240 pxor m1, m1
michael@0 241 movd m2, [aboveq-1]
michael@0 242 movq m0, [aboveq]
michael@0 243 punpcklbw m2, m1
michael@0 244 punpcklbw m0, m1
michael@0 245 pshuflw m2, m2, 0x0
michael@0 246 DEFINE_ARGS dst, stride, line, left
michael@0 247 mov lineq, -4
michael@0 248 punpcklqdq m2, m2
michael@0 249 add leftq, 8
michael@0 250 psubw m0, m2
michael@0 251 .loop:
michael@0 252 movd m2, [leftq+lineq*2]
michael@0 253 movd m3, [leftq+lineq*2+1]
michael@0 254 punpcklbw m2, m1
michael@0 255 punpcklbw m3, m1
michael@0 256 pshuflw m2, m2, 0x0
michael@0 257 pshuflw m3, m3, 0x0
michael@0 258 punpcklqdq m2, m2
michael@0 259 punpcklqdq m3, m3
michael@0 260 paddw m2, m0
michael@0 261 paddw m3, m0
michael@0 262 packuswb m2, m3
michael@0 263 movq [dstq ], m2
michael@0 264 movhps [dstq+strideq], m2
michael@0 265 lea dstq, [dstq+strideq*2]
michael@0 266 inc lineq
michael@0 267 jnz .loop
michael@0 268 REP_RET
michael@0 269
michael@0 270 INIT_XMM sse2
michael@0 271 cglobal tm_predictor_16x16, 4, 4, 7, dst, stride, above, left
michael@0 272 pxor m1, m1
michael@0 273 movd m2, [aboveq-1]
michael@0 274 mova m0, [aboveq]
michael@0 275 punpcklbw m2, m1
michael@0 276 punpckhbw m4, m0, m1
michael@0 277 punpcklbw m0, m1
michael@0 278 pshuflw m2, m2, 0x0
michael@0 279 DEFINE_ARGS dst, stride, line, left
michael@0 280 mov lineq, -8
michael@0 281 punpcklqdq m2, m2
michael@0 282 add leftq, 16
michael@0 283 psubw m0, m2
michael@0 284 psubw m4, m2
michael@0 285 .loop:
michael@0 286 movd m2, [leftq+lineq*2]
michael@0 287 movd m3, [leftq+lineq*2+1]
michael@0 288 punpcklbw m2, m1
michael@0 289 punpcklbw m3, m1
michael@0 290 pshuflw m2, m2, 0x0
michael@0 291 pshuflw m3, m3, 0x0
michael@0 292 punpcklqdq m2, m2
michael@0 293 punpcklqdq m3, m3
michael@0 294 paddw m5, m2, m0
michael@0 295 paddw m6, m3, m0
michael@0 296 paddw m2, m4
michael@0 297 paddw m3, m4
michael@0 298 packuswb m5, m2
michael@0 299 packuswb m6, m3
michael@0 300 mova [dstq ], m5
michael@0 301 mova [dstq+strideq], m6
michael@0 302 lea dstq, [dstq+strideq*2]
michael@0 303 inc lineq
michael@0 304 jnz .loop
michael@0 305 REP_RET
michael@0 306
michael@0 307 %if ARCH_X86_64
michael@0 308 INIT_XMM sse2
michael@0 309 cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left
michael@0 310 pxor m1, m1
michael@0 311 movd m2, [aboveq-1]
michael@0 312 mova m0, [aboveq]
michael@0 313 mova m4, [aboveq+16]
michael@0 314 punpcklbw m2, m1
michael@0 315 punpckhbw m3, m0, m1
michael@0 316 punpckhbw m5, m4, m1
michael@0 317 punpcklbw m0, m1
michael@0 318 punpcklbw m4, m1
michael@0 319 pshuflw m2, m2, 0x0
michael@0 320 DEFINE_ARGS dst, stride, line, left
michael@0 321 mov lineq, -16
michael@0 322 punpcklqdq m2, m2
michael@0 323 add leftq, 32
michael@0 324 psubw m0, m2
michael@0 325 psubw m3, m2
michael@0 326 psubw m4, m2
michael@0 327 psubw m5, m2
michael@0 328 .loop:
michael@0 329 movd m2, [leftq+lineq*2]
michael@0 330 movd m6, [leftq+lineq*2+1]
michael@0 331 punpcklbw m2, m1
michael@0 332 punpcklbw m6, m1
michael@0 333 pshuflw m2, m2, 0x0
michael@0 334 pshuflw m6, m6, 0x0
michael@0 335 punpcklqdq m2, m2
michael@0 336 punpcklqdq m6, m6
michael@0 337 paddw m7, m2, m0
michael@0 338 paddw m8, m2, m3
michael@0 339 paddw m9, m2, m4
michael@0 340 paddw m2, m5
michael@0 341 packuswb m7, m8
michael@0 342 packuswb m9, m2
michael@0 343 paddw m2, m6, m0
michael@0 344 paddw m8, m6, m3
michael@0 345 mova [dstq ], m7
michael@0 346 paddw m7, m6, m4
michael@0 347 paddw m6, m5
michael@0 348 mova [dstq +16], m9
michael@0 349 packuswb m2, m8
michael@0 350 packuswb m7, m6
michael@0 351 mova [dstq+strideq ], m2
michael@0 352 mova [dstq+strideq+16], m7
michael@0 353 lea dstq, [dstq+strideq*2]
michael@0 354 inc lineq
michael@0 355 jnz .loop
michael@0 356 REP_RET
michael@0 357 %endif

mercurial