media/libvpx/vp9/common/x86/vp9_subpixel_8t_ssse3.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 %macro VERTx4 1
michael@0 15 mov rdx, arg(5) ;filter ptr
michael@0 16 mov rsi, arg(0) ;src_ptr
michael@0 17 mov rdi, arg(2) ;output_ptr
michael@0 18 mov rcx, 0x0400040
michael@0 19
michael@0 20 movdqa xmm4, [rdx] ;load filters
michael@0 21 movd xmm5, rcx
michael@0 22 packsswb xmm4, xmm4
michael@0 23 pshuflw xmm0, xmm4, 0b ;k0_k1
michael@0 24 pshuflw xmm1, xmm4, 01010101b ;k2_k3
michael@0 25 pshuflw xmm2, xmm4, 10101010b ;k4_k5
michael@0 26 pshuflw xmm3, xmm4, 11111111b ;k6_k7
michael@0 27
michael@0 28 punpcklqdq xmm0, xmm0
michael@0 29 punpcklqdq xmm1, xmm1
michael@0 30 punpcklqdq xmm2, xmm2
michael@0 31 punpcklqdq xmm3, xmm3
michael@0 32
michael@0 33 movdqa k0k1, xmm0
michael@0 34 movdqa k2k3, xmm1
michael@0 35 pshufd xmm5, xmm5, 0
michael@0 36 movdqa k4k5, xmm2
michael@0 37 movdqa k6k7, xmm3
michael@0 38 movdqa krd, xmm5
michael@0 39
michael@0 40 movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
michael@0 41
michael@0 42 %if ABI_IS_32BIT=0
michael@0 43 movsxd r8, DWORD PTR arg(3) ;out_pitch
michael@0 44 %endif
michael@0 45 mov rax, rsi
michael@0 46 movsxd rcx, DWORD PTR arg(4) ;output_height
michael@0 47 add rax, rdx
michael@0 48
michael@0 49 lea rbx, [rdx + rdx*4]
michael@0 50 add rbx, rdx ;pitch * 6
michael@0 51
michael@0 52 .loop:
michael@0 53 movd xmm0, [rsi] ;A
michael@0 54 movd xmm1, [rsi + rdx] ;B
michael@0 55 movd xmm2, [rsi + rdx * 2] ;C
michael@0 56 movd xmm3, [rax + rdx * 2] ;D
michael@0 57 movd xmm4, [rsi + rdx * 4] ;E
michael@0 58 movd xmm5, [rax + rdx * 4] ;F
michael@0 59
michael@0 60 punpcklbw xmm0, xmm1 ;A B
michael@0 61 punpcklbw xmm2, xmm3 ;C D
michael@0 62 punpcklbw xmm4, xmm5 ;E F
michael@0 63
michael@0 64 movd xmm6, [rsi + rbx] ;G
michael@0 65 movd xmm7, [rax + rbx] ;H
michael@0 66
michael@0 67 pmaddubsw xmm0, k0k1
michael@0 68 pmaddubsw xmm2, k2k3
michael@0 69 punpcklbw xmm6, xmm7 ;G H
michael@0 70 pmaddubsw xmm4, k4k5
michael@0 71 pmaddubsw xmm6, k6k7
michael@0 72
michael@0 73 movdqa xmm1, xmm2
michael@0 74 paddsw xmm0, xmm6
michael@0 75 pmaxsw xmm2, xmm4
michael@0 76 pminsw xmm4, xmm1
michael@0 77 paddsw xmm0, xmm4
michael@0 78 paddsw xmm0, xmm2
michael@0 79
michael@0 80 paddsw xmm0, krd
michael@0 81 psraw xmm0, 7
michael@0 82 packuswb xmm0, xmm0
michael@0 83
michael@0 84 add rsi, rdx
michael@0 85 add rax, rdx
michael@0 86 %if %1
michael@0 87 movd xmm1, [rdi]
michael@0 88 pavgb xmm0, xmm1
michael@0 89 %endif
michael@0 90 movd [rdi], xmm0
michael@0 91
michael@0 92 %if ABI_IS_32BIT
michael@0 93 add rdi, DWORD PTR arg(3) ;out_pitch
michael@0 94 %else
michael@0 95 add rdi, r8
michael@0 96 %endif
michael@0 97 dec rcx
michael@0 98 jnz .loop
michael@0 99 %endm
michael@0 100
michael@0 101 %macro VERTx8 1
michael@0 102 mov rdx, arg(5) ;filter ptr
michael@0 103 mov rsi, arg(0) ;src_ptr
michael@0 104 mov rdi, arg(2) ;output_ptr
michael@0 105 mov rcx, 0x0400040
michael@0 106
michael@0 107 movdqa xmm4, [rdx] ;load filters
michael@0 108 movq xmm5, rcx
michael@0 109 packsswb xmm4, xmm4
michael@0 110 pshuflw xmm0, xmm4, 0b ;k0_k1
michael@0 111 pshuflw xmm1, xmm4, 01010101b ;k2_k3
michael@0 112 pshuflw xmm2, xmm4, 10101010b ;k4_k5
michael@0 113 pshuflw xmm3, xmm4, 11111111b ;k6_k7
michael@0 114
michael@0 115 punpcklqdq xmm0, xmm0
michael@0 116 punpcklqdq xmm1, xmm1
michael@0 117 punpcklqdq xmm2, xmm2
michael@0 118 punpcklqdq xmm3, xmm3
michael@0 119
michael@0 120 movdqa k0k1, xmm0
michael@0 121 movdqa k2k3, xmm1
michael@0 122 pshufd xmm5, xmm5, 0
michael@0 123 movdqa k4k5, xmm2
michael@0 124 movdqa k6k7, xmm3
michael@0 125 movdqa krd, xmm5
michael@0 126
michael@0 127 movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
michael@0 128
michael@0 129 %if ABI_IS_32BIT=0
michael@0 130 movsxd r8, DWORD PTR arg(3) ;out_pitch
michael@0 131 %endif
michael@0 132 mov rax, rsi
michael@0 133 movsxd rcx, DWORD PTR arg(4) ;output_height
michael@0 134 add rax, rdx
michael@0 135
michael@0 136 lea rbx, [rdx + rdx*4]
michael@0 137 add rbx, rdx ;pitch * 6
michael@0 138
michael@0 139 .loop:
michael@0 140 movq xmm0, [rsi] ;A
michael@0 141 movq xmm1, [rsi + rdx] ;B
michael@0 142 movq xmm2, [rsi + rdx * 2] ;C
michael@0 143 movq xmm3, [rax + rdx * 2] ;D
michael@0 144 movq xmm4, [rsi + rdx * 4] ;E
michael@0 145 movq xmm5, [rax + rdx * 4] ;F
michael@0 146
michael@0 147 punpcklbw xmm0, xmm1 ;A B
michael@0 148 punpcklbw xmm2, xmm3 ;C D
michael@0 149 punpcklbw xmm4, xmm5 ;E F
michael@0 150
michael@0 151 movq xmm6, [rsi + rbx] ;G
michael@0 152 movq xmm7, [rax + rbx] ;H
michael@0 153
michael@0 154 pmaddubsw xmm0, k0k1
michael@0 155 pmaddubsw xmm2, k2k3
michael@0 156 punpcklbw xmm6, xmm7 ;G H
michael@0 157 pmaddubsw xmm4, k4k5
michael@0 158 pmaddubsw xmm6, k6k7
michael@0 159
michael@0 160 paddsw xmm0, xmm6
michael@0 161 movdqa xmm1, xmm2
michael@0 162 pmaxsw xmm2, xmm4
michael@0 163 pminsw xmm4, xmm1
michael@0 164 paddsw xmm0, xmm4
michael@0 165 paddsw xmm0, xmm2
michael@0 166
michael@0 167 paddsw xmm0, krd
michael@0 168 psraw xmm0, 7
michael@0 169 packuswb xmm0, xmm0
michael@0 170
michael@0 171 add rsi, rdx
michael@0 172 add rax, rdx
michael@0 173 %if %1
michael@0 174 movq xmm1, [rdi]
michael@0 175 pavgb xmm0, xmm1
michael@0 176 %endif
michael@0 177 movq [rdi], xmm0
michael@0 178
michael@0 179 %if ABI_IS_32BIT
michael@0 180 add rdi, DWORD PTR arg(3) ;out_pitch
michael@0 181 %else
michael@0 182 add rdi, r8
michael@0 183 %endif
michael@0 184 dec rcx
michael@0 185 jnz .loop
michael@0 186 %endm
michael@0 187
michael@0 188
michael@0 189 %macro VERTx16 1
michael@0 190 mov rdx, arg(5) ;filter ptr
michael@0 191 mov rsi, arg(0) ;src_ptr
michael@0 192 mov rdi, arg(2) ;output_ptr
michael@0 193 mov rcx, 0x0400040
michael@0 194
michael@0 195 movdqa xmm4, [rdx] ;load filters
michael@0 196 movq xmm5, rcx
michael@0 197 packsswb xmm4, xmm4
michael@0 198 pshuflw xmm0, xmm4, 0b ;k0_k1
michael@0 199 pshuflw xmm1, xmm4, 01010101b ;k2_k3
michael@0 200 pshuflw xmm2, xmm4, 10101010b ;k4_k5
michael@0 201 pshuflw xmm3, xmm4, 11111111b ;k6_k7
michael@0 202
michael@0 203 punpcklqdq xmm0, xmm0
michael@0 204 punpcklqdq xmm1, xmm1
michael@0 205 punpcklqdq xmm2, xmm2
michael@0 206 punpcklqdq xmm3, xmm3
michael@0 207
michael@0 208 movdqa k0k1, xmm0
michael@0 209 movdqa k2k3, xmm1
michael@0 210 pshufd xmm5, xmm5, 0
michael@0 211 movdqa k4k5, xmm2
michael@0 212 movdqa k6k7, xmm3
michael@0 213 movdqa krd, xmm5
michael@0 214
michael@0 215 movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
michael@0 216
michael@0 217 %if ABI_IS_32BIT=0
michael@0 218 movsxd r8, DWORD PTR arg(3) ;out_pitch
michael@0 219 %endif
michael@0 220 mov rax, rsi
michael@0 221 movsxd rcx, DWORD PTR arg(4) ;output_height
michael@0 222 add rax, rdx
michael@0 223
michael@0 224 lea rbx, [rdx + rdx*4]
michael@0 225 add rbx, rdx ;pitch * 6
michael@0 226
michael@0 227 .loop:
michael@0 228 movq xmm0, [rsi] ;A
michael@0 229 movq xmm1, [rsi + rdx] ;B
michael@0 230 movq xmm2, [rsi + rdx * 2] ;C
michael@0 231 movq xmm3, [rax + rdx * 2] ;D
michael@0 232 movq xmm4, [rsi + rdx * 4] ;E
michael@0 233 movq xmm5, [rax + rdx * 4] ;F
michael@0 234
michael@0 235 punpcklbw xmm0, xmm1 ;A B
michael@0 236 punpcklbw xmm2, xmm3 ;C D
michael@0 237 punpcklbw xmm4, xmm5 ;E F
michael@0 238
michael@0 239 movq xmm6, [rsi + rbx] ;G
michael@0 240 movq xmm7, [rax + rbx] ;H
michael@0 241
michael@0 242 pmaddubsw xmm0, k0k1
michael@0 243 pmaddubsw xmm2, k2k3
michael@0 244 punpcklbw xmm6, xmm7 ;G H
michael@0 245 pmaddubsw xmm4, k4k5
michael@0 246 pmaddubsw xmm6, k6k7
michael@0 247
michael@0 248 paddsw xmm0, xmm6
michael@0 249 movdqa xmm1, xmm2
michael@0 250 pmaxsw xmm2, xmm4
michael@0 251 pminsw xmm4, xmm1
michael@0 252 paddsw xmm0, xmm4
michael@0 253 paddsw xmm0, xmm2
michael@0 254
michael@0 255 paddsw xmm0, krd
michael@0 256 psraw xmm0, 7
michael@0 257 packuswb xmm0, xmm0
michael@0 258 %if %1
michael@0 259 movq xmm1, [rdi]
michael@0 260 pavgb xmm0, xmm1
michael@0 261 %endif
michael@0 262 movq [rdi], xmm0
michael@0 263
michael@0 264 movq xmm0, [rsi + 8] ;A
michael@0 265 movq xmm1, [rsi + rdx + 8] ;B
michael@0 266 movq xmm2, [rsi + rdx * 2 + 8] ;C
michael@0 267 movq xmm3, [rax + rdx * 2 + 8] ;D
michael@0 268 movq xmm4, [rsi + rdx * 4 + 8] ;E
michael@0 269 movq xmm5, [rax + rdx * 4 + 8] ;F
michael@0 270
michael@0 271 punpcklbw xmm0, xmm1 ;A B
michael@0 272 punpcklbw xmm2, xmm3 ;C D
michael@0 273 punpcklbw xmm4, xmm5 ;E F
michael@0 274
michael@0 275
michael@0 276 movq xmm6, [rsi + rbx + 8] ;G
michael@0 277 movq xmm7, [rax + rbx + 8] ;H
michael@0 278 punpcklbw xmm6, xmm7 ;G H
michael@0 279
michael@0 280
michael@0 281 pmaddubsw xmm0, k0k1
michael@0 282 pmaddubsw xmm2, k2k3
michael@0 283 pmaddubsw xmm4, k4k5
michael@0 284 pmaddubsw xmm6, k6k7
michael@0 285
michael@0 286 paddsw xmm0, xmm6
michael@0 287 paddsw xmm0, xmm2
michael@0 288 paddsw xmm0, xmm4
michael@0 289 paddsw xmm0, krd
michael@0 290
michael@0 291 psraw xmm0, 7
michael@0 292 packuswb xmm0, xmm0
michael@0 293
michael@0 294 add rsi, rdx
michael@0 295 add rax, rdx
michael@0 296 %if %1
michael@0 297 movq xmm1, [rdi+8]
michael@0 298 pavgb xmm0, xmm1
michael@0 299 %endif
michael@0 300
michael@0 301 movq [rdi+8], xmm0
michael@0 302
michael@0 303 %if ABI_IS_32BIT
michael@0 304 add rdi, DWORD PTR arg(3) ;out_pitch
michael@0 305 %else
michael@0 306 add rdi, r8
michael@0 307 %endif
michael@0 308 dec rcx
michael@0 309 jnz .loop
michael@0 310 %endm
michael@0 311
michael@0 312 ;void vp9_filter_block1d8_v8_ssse3
michael@0 313 ;(
michael@0 314 ; unsigned char *src_ptr,
michael@0 315 ; unsigned int src_pitch,
michael@0 316 ; unsigned char *output_ptr,
michael@0 317 ; unsigned int out_pitch,
michael@0 318 ; unsigned int output_height,
michael@0 319 ; short *filter
michael@0 320 ;)
michael@0 321 global sym(vp9_filter_block1d4_v8_ssse3) PRIVATE
michael@0 322 sym(vp9_filter_block1d4_v8_ssse3):
michael@0 323 push rbp
michael@0 324 mov rbp, rsp
michael@0 325 SHADOW_ARGS_TO_STACK 6
michael@0 326 SAVE_XMM 7
michael@0 327 push rsi
michael@0 328 push rdi
michael@0 329 push rbx
michael@0 330 ; end prolog
michael@0 331
michael@0 332 ALIGN_STACK 16, rax
michael@0 333 sub rsp, 16*5
michael@0 334 %define k0k1 [rsp + 16*0]
michael@0 335 %define k2k3 [rsp + 16*1]
michael@0 336 %define k4k5 [rsp + 16*2]
michael@0 337 %define k6k7 [rsp + 16*3]
michael@0 338 %define krd [rsp + 16*4]
michael@0 339
michael@0 340 VERTx4 0
michael@0 341
michael@0 342 add rsp, 16*5
michael@0 343 pop rsp
michael@0 344 pop rbx
michael@0 345 ; begin epilog
michael@0 346 pop rdi
michael@0 347 pop rsi
michael@0 348 RESTORE_XMM
michael@0 349 UNSHADOW_ARGS
michael@0 350 pop rbp
michael@0 351 ret
michael@0 352
michael@0 353 ;void vp9_filter_block1d8_v8_ssse3
michael@0 354 ;(
michael@0 355 ; unsigned char *src_ptr,
michael@0 356 ; unsigned int src_pitch,
michael@0 357 ; unsigned char *output_ptr,
michael@0 358 ; unsigned int out_pitch,
michael@0 359 ; unsigned int output_height,
michael@0 360 ; short *filter
michael@0 361 ;)
michael@0 362 global sym(vp9_filter_block1d8_v8_ssse3) PRIVATE
michael@0 363 sym(vp9_filter_block1d8_v8_ssse3):
michael@0 364 push rbp
michael@0 365 mov rbp, rsp
michael@0 366 SHADOW_ARGS_TO_STACK 6
michael@0 367 SAVE_XMM 7
michael@0 368 push rsi
michael@0 369 push rdi
michael@0 370 push rbx
michael@0 371 ; end prolog
michael@0 372
michael@0 373 ALIGN_STACK 16, rax
michael@0 374 sub rsp, 16*5
michael@0 375 %define k0k1 [rsp + 16*0]
michael@0 376 %define k2k3 [rsp + 16*1]
michael@0 377 %define k4k5 [rsp + 16*2]
michael@0 378 %define k6k7 [rsp + 16*3]
michael@0 379 %define krd [rsp + 16*4]
michael@0 380
michael@0 381 VERTx8 0
michael@0 382
michael@0 383 add rsp, 16*5
michael@0 384 pop rsp
michael@0 385 pop rbx
michael@0 386 ; begin epilog
michael@0 387 pop rdi
michael@0 388 pop rsi
michael@0 389 RESTORE_XMM
michael@0 390 UNSHADOW_ARGS
michael@0 391 pop rbp
michael@0 392 ret
michael@0 393
michael@0 394 ;void vp9_filter_block1d16_v8_ssse3
michael@0 395 ;(
michael@0 396 ; unsigned char *src_ptr,
michael@0 397 ; unsigned int src_pitch,
michael@0 398 ; unsigned char *output_ptr,
michael@0 399 ; unsigned int out_pitch,
michael@0 400 ; unsigned int output_height,
michael@0 401 ; short *filter
michael@0 402 ;)
michael@0 403 global sym(vp9_filter_block1d16_v8_ssse3) PRIVATE
michael@0 404 sym(vp9_filter_block1d16_v8_ssse3):
michael@0 405 push rbp
michael@0 406 mov rbp, rsp
michael@0 407 SHADOW_ARGS_TO_STACK 6
michael@0 408 SAVE_XMM 7
michael@0 409 push rsi
michael@0 410 push rdi
michael@0 411 push rbx
michael@0 412 ; end prolog
michael@0 413
michael@0 414 ALIGN_STACK 16, rax
michael@0 415 sub rsp, 16*5
michael@0 416 %define k0k1 [rsp + 16*0]
michael@0 417 %define k2k3 [rsp + 16*1]
michael@0 418 %define k4k5 [rsp + 16*2]
michael@0 419 %define k6k7 [rsp + 16*3]
michael@0 420 %define krd [rsp + 16*4]
michael@0 421
michael@0 422 VERTx16 0
michael@0 423
michael@0 424 add rsp, 16*5
michael@0 425 pop rsp
michael@0 426 pop rbx
michael@0 427 ; begin epilog
michael@0 428 pop rdi
michael@0 429 pop rsi
michael@0 430 RESTORE_XMM
michael@0 431 UNSHADOW_ARGS
michael@0 432 pop rbp
michael@0 433 ret
michael@0 434
michael@0 435 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
michael@0 436
michael@0 437
michael@0 438 global sym(vp9_filter_block1d4_v8_avg_ssse3) PRIVATE
michael@0 439 sym(vp9_filter_block1d4_v8_avg_ssse3):
michael@0 440 push rbp
michael@0 441 mov rbp, rsp
michael@0 442 SHADOW_ARGS_TO_STACK 6
michael@0 443 SAVE_XMM 7
michael@0 444 push rsi
michael@0 445 push rdi
michael@0 446 push rbx
michael@0 447 ; end prolog
michael@0 448
michael@0 449 ALIGN_STACK 16, rax
michael@0 450 sub rsp, 16*5
michael@0 451 %define k0k1 [rsp + 16*0]
michael@0 452 %define k2k3 [rsp + 16*1]
michael@0 453 %define k4k5 [rsp + 16*2]
michael@0 454 %define k6k7 [rsp + 16*3]
michael@0 455 %define krd [rsp + 16*4]
michael@0 456
michael@0 457 VERTx4 1
michael@0 458
michael@0 459 add rsp, 16*5
michael@0 460 pop rsp
michael@0 461 pop rbx
michael@0 462 ; begin epilog
michael@0 463 pop rdi
michael@0 464 pop rsi
michael@0 465 RESTORE_XMM
michael@0 466 UNSHADOW_ARGS
michael@0 467 pop rbp
michael@0 468 ret
michael@0 469
michael@0 470 global sym(vp9_filter_block1d8_v8_avg_ssse3) PRIVATE
michael@0 471 sym(vp9_filter_block1d8_v8_avg_ssse3):
michael@0 472 push rbp
michael@0 473 mov rbp, rsp
michael@0 474 SHADOW_ARGS_TO_STACK 6
michael@0 475 SAVE_XMM 7
michael@0 476 push rsi
michael@0 477 push rdi
michael@0 478 push rbx
michael@0 479 ; end prolog
michael@0 480
michael@0 481 ALIGN_STACK 16, rax
michael@0 482 sub rsp, 16*5
michael@0 483 %define k0k1 [rsp + 16*0]
michael@0 484 %define k2k3 [rsp + 16*1]
michael@0 485 %define k4k5 [rsp + 16*2]
michael@0 486 %define k6k7 [rsp + 16*3]
michael@0 487 %define krd [rsp + 16*4]
michael@0 488
michael@0 489 VERTx8 1
michael@0 490
michael@0 491 add rsp, 16*5
michael@0 492 pop rsp
michael@0 493 pop rbx
michael@0 494 ; begin epilog
michael@0 495 pop rdi
michael@0 496 pop rsi
michael@0 497 RESTORE_XMM
michael@0 498 UNSHADOW_ARGS
michael@0 499 pop rbp
michael@0 500 ret
michael@0 501
michael@0 502 global sym(vp9_filter_block1d16_v8_avg_ssse3) PRIVATE
michael@0 503 sym(vp9_filter_block1d16_v8_avg_ssse3):
michael@0 504 push rbp
michael@0 505 mov rbp, rsp
michael@0 506 SHADOW_ARGS_TO_STACK 6
michael@0 507 SAVE_XMM 7
michael@0 508 push rsi
michael@0 509 push rdi
michael@0 510 push rbx
michael@0 511 ; end prolog
michael@0 512
michael@0 513 ALIGN_STACK 16, rax
michael@0 514 sub rsp, 16*5
michael@0 515 %define k0k1 [rsp + 16*0]
michael@0 516 %define k2k3 [rsp + 16*1]
michael@0 517 %define k4k5 [rsp + 16*2]
michael@0 518 %define k6k7 [rsp + 16*3]
michael@0 519 %define krd [rsp + 16*4]
michael@0 520
michael@0 521 VERTx16 1
michael@0 522
michael@0 523 add rsp, 16*5
michael@0 524 pop rsp
michael@0 525 pop rbx
michael@0 526 ; begin epilog
michael@0 527 pop rdi
michael@0 528 pop rsi
michael@0 529 RESTORE_XMM
michael@0 530 UNSHADOW_ARGS
michael@0 531 pop rbp
michael@0 532 ret
michael@0 533
michael@0 534 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
michael@0 535 %macro HORIZx4_ROW 2
michael@0 536 movdqa %2, %1
michael@0 537 pshufb %1, [GLOBAL(shuf_t0t1)]
michael@0 538 pshufb %2, [GLOBAL(shuf_t2t3)]
michael@0 539 pmaddubsw %1, k0k1k4k5
michael@0 540 pmaddubsw %2, k2k3k6k7
michael@0 541
michael@0 542 movdqa xmm4, %1
michael@0 543 movdqa xmm5, %2
michael@0 544 psrldq %1, 8
michael@0 545 psrldq %2, 8
michael@0 546 movdqa xmm6, xmm5
michael@0 547
michael@0 548 paddsw xmm4, %2
michael@0 549 pmaxsw xmm5, %1
michael@0 550 pminsw %1, xmm6
michael@0 551 paddsw %1, xmm4
michael@0 552 paddsw %1, xmm5
michael@0 553
michael@0 554 paddsw %1, krd
michael@0 555 psraw %1, 7
michael@0 556 packuswb %1, %1
michael@0 557 %endm
michael@0 558
michael@0 559 %macro HORIZx4 1
michael@0 560 mov rdx, arg(5) ;filter ptr
michael@0 561 mov rsi, arg(0) ;src_ptr
michael@0 562 mov rdi, arg(2) ;output_ptr
michael@0 563 mov rcx, 0x0400040
michael@0 564
michael@0 565 movdqa xmm4, [rdx] ;load filters
michael@0 566 movq xmm5, rcx
michael@0 567 packsswb xmm4, xmm4
michael@0 568 pshuflw xmm6, xmm4, 0b ;k0_k1
michael@0 569 pshufhw xmm6, xmm6, 10101010b ;k0_k1_k4_k5
michael@0 570 pshuflw xmm7, xmm4, 01010101b ;k2_k3
michael@0 571 pshufhw xmm7, xmm7, 11111111b ;k2_k3_k6_k7
michael@0 572 pshufd xmm5, xmm5, 0 ;rounding
michael@0 573
michael@0 574 movdqa k0k1k4k5, xmm6
michael@0 575 movdqa k2k3k6k7, xmm7
michael@0 576 movdqa krd, xmm5
michael@0 577
michael@0 578 movsxd rax, dword ptr arg(1) ;src_pixels_per_line
michael@0 579 movsxd rdx, dword ptr arg(3) ;output_pitch
michael@0 580 movsxd rcx, dword ptr arg(4) ;output_height
michael@0 581 shr rcx, 1
michael@0 582 .loop:
michael@0 583 ;Do two rows once
michael@0 584 movq xmm0, [rsi - 3] ;load src
michael@0 585 movq xmm1, [rsi + 5]
michael@0 586 movq xmm2, [rsi + rax - 3]
michael@0 587 movq xmm3, [rsi + rax + 5]
michael@0 588 punpcklqdq xmm0, xmm1
michael@0 589 punpcklqdq xmm2, xmm3
michael@0 590
michael@0 591 HORIZx4_ROW xmm0, xmm1
michael@0 592 HORIZx4_ROW xmm2, xmm3
michael@0 593 %if %1
michael@0 594 movd xmm1, [rdi]
michael@0 595 pavgb xmm0, xmm1
michael@0 596 movd xmm3, [rdi + rdx]
michael@0 597 pavgb xmm2, xmm3
michael@0 598 %endif
michael@0 599 movd [rdi], xmm0
michael@0 600 movd [rdi +rdx], xmm2
michael@0 601
michael@0 602 lea rsi, [rsi + rax]
michael@0 603 prefetcht0 [rsi + 4 * rax - 3]
michael@0 604 lea rsi, [rsi + rax]
michael@0 605 lea rdi, [rdi + 2 * rdx]
michael@0 606 prefetcht0 [rsi + 2 * rax - 3]
michael@0 607
michael@0 608 dec rcx
michael@0 609 jnz .loop
michael@0 610
michael@0 611 ; Do last row if output_height is odd
michael@0 612 movsxd rcx, dword ptr arg(4) ;output_height
michael@0 613 and rcx, 1
michael@0 614 je .done
michael@0 615
michael@0 616 movq xmm0, [rsi - 3] ; load src
michael@0 617 movq xmm1, [rsi + 5]
michael@0 618 punpcklqdq xmm0, xmm1
michael@0 619
michael@0 620 HORIZx4_ROW xmm0, xmm1
michael@0 621 %if %1
michael@0 622 movd xmm1, [rdi]
michael@0 623 pavgb xmm0, xmm1
michael@0 624 %endif
michael@0 625 movd [rdi], xmm0
michael@0 626 .done
michael@0 627 %endm
michael@0 628
michael@0 629 %macro HORIZx8_ROW 4
michael@0 630 movdqa %2, %1
michael@0 631 movdqa %3, %1
michael@0 632 movdqa %4, %1
michael@0 633
michael@0 634 pshufb %1, [GLOBAL(shuf_t0t1)]
michael@0 635 pshufb %2, [GLOBAL(shuf_t2t3)]
michael@0 636 pshufb %3, [GLOBAL(shuf_t4t5)]
michael@0 637 pshufb %4, [GLOBAL(shuf_t6t7)]
michael@0 638
michael@0 639 pmaddubsw %1, k0k1
michael@0 640 pmaddubsw %2, k2k3
michael@0 641 pmaddubsw %3, k4k5
michael@0 642 pmaddubsw %4, k6k7
michael@0 643
michael@0 644 paddsw %1, %4
michael@0 645 movdqa %4, %2
michael@0 646 pmaxsw %2, %3
michael@0 647 pminsw %3, %4
michael@0 648 paddsw %1, %3
michael@0 649 paddsw %1, %2
michael@0 650
michael@0 651 paddsw %1, krd
michael@0 652 psraw %1, 7
michael@0 653 packuswb %1, %1
michael@0 654 %endm
michael@0 655
michael@0 656 %macro HORIZx8 1
michael@0 657 mov rdx, arg(5) ;filter ptr
michael@0 658 mov rsi, arg(0) ;src_ptr
michael@0 659 mov rdi, arg(2) ;output_ptr
michael@0 660 mov rcx, 0x0400040
michael@0 661
michael@0 662 movdqa xmm4, [rdx] ;load filters
michael@0 663 movd xmm5, rcx
michael@0 664 packsswb xmm4, xmm4
michael@0 665 pshuflw xmm0, xmm4, 0b ;k0_k1
michael@0 666 pshuflw xmm1, xmm4, 01010101b ;k2_k3
michael@0 667 pshuflw xmm2, xmm4, 10101010b ;k4_k5
michael@0 668 pshuflw xmm3, xmm4, 11111111b ;k6_k7
michael@0 669
michael@0 670 punpcklqdq xmm0, xmm0
michael@0 671 punpcklqdq xmm1, xmm1
michael@0 672 punpcklqdq xmm2, xmm2
michael@0 673 punpcklqdq xmm3, xmm3
michael@0 674
michael@0 675 movdqa k0k1, xmm0
michael@0 676 movdqa k2k3, xmm1
michael@0 677 pshufd xmm5, xmm5, 0
michael@0 678 movdqa k4k5, xmm2
michael@0 679 movdqa k6k7, xmm3
michael@0 680 movdqa krd, xmm5
michael@0 681
michael@0 682 movsxd rax, dword ptr arg(1) ;src_pixels_per_line
michael@0 683 movsxd rdx, dword ptr arg(3) ;output_pitch
michael@0 684 movsxd rcx, dword ptr arg(4) ;output_height
michael@0 685 shr rcx, 1
michael@0 686
michael@0 687 .loop:
michael@0 688 movq xmm0, [rsi - 3] ;load src
michael@0 689 movq xmm3, [rsi + 5]
michael@0 690 movq xmm4, [rsi + rax - 3]
michael@0 691 movq xmm7, [rsi + rax + 5]
michael@0 692 punpcklqdq xmm0, xmm3
michael@0 693 punpcklqdq xmm4, xmm7
michael@0 694
michael@0 695 HORIZx8_ROW xmm0, xmm1, xmm2, xmm3
michael@0 696 HORIZx8_ROW xmm4, xmm5, xmm6, xmm7
michael@0 697 %if %1
michael@0 698 movq xmm1, [rdi]
michael@0 699 movq xmm2, [rdi + rdx]
michael@0 700 pavgb xmm0, xmm1
michael@0 701 pavgb xmm4, xmm2
michael@0 702 %endif
michael@0 703 movq [rdi], xmm0
michael@0 704 movq [rdi + rdx], xmm4
michael@0 705
michael@0 706 lea rsi, [rsi + rax]
michael@0 707 prefetcht0 [rsi + 4 * rax - 3]
michael@0 708 lea rsi, [rsi + rax]
michael@0 709 lea rdi, [rdi + 2 * rdx]
michael@0 710 prefetcht0 [rsi + 2 * rax - 3]
michael@0 711 dec rcx
michael@0 712 jnz .loop
michael@0 713
michael@0 714 ;Do last row if output_height is odd
michael@0 715 movsxd rcx, dword ptr arg(4) ;output_height
michael@0 716 and rcx, 1
michael@0 717 je .done
michael@0 718
michael@0 719 movq xmm0, [rsi - 3]
michael@0 720 movq xmm3, [rsi + 5]
michael@0 721 punpcklqdq xmm0, xmm3
michael@0 722
michael@0 723 HORIZx8_ROW xmm0, xmm1, xmm2, xmm3
michael@0 724 %if %1
michael@0 725 movq xmm1, [rdi]
michael@0 726 pavgb xmm0, xmm1
michael@0 727 %endif
michael@0 728 movq [rdi], xmm0
michael@0 729 .done
michael@0 730 %endm
michael@0 731
michael@0 732 %macro HORIZx16 1
michael@0 733 mov rdx, arg(5) ;filter ptr
michael@0 734 mov rsi, arg(0) ;src_ptr
michael@0 735 mov rdi, arg(2) ;output_ptr
michael@0 736 mov rcx, 0x0400040
michael@0 737
michael@0 738 movdqa xmm4, [rdx] ;load filters
michael@0 739 movq xmm5, rcx
michael@0 740 packsswb xmm4, xmm4
michael@0 741 pshuflw xmm0, xmm4, 0b ;k0_k1
michael@0 742 pshuflw xmm1, xmm4, 01010101b ;k2_k3
michael@0 743 pshuflw xmm2, xmm4, 10101010b ;k4_k5
michael@0 744 pshuflw xmm3, xmm4, 11111111b ;k6_k7
michael@0 745
michael@0 746 punpcklqdq xmm0, xmm0
michael@0 747 punpcklqdq xmm1, xmm1
michael@0 748 punpcklqdq xmm2, xmm2
michael@0 749 punpcklqdq xmm3, xmm3
michael@0 750
michael@0 751 movdqa k0k1, xmm0
michael@0 752 movdqa k2k3, xmm1
michael@0 753 pshufd xmm5, xmm5, 0
michael@0 754 movdqa k4k5, xmm2
michael@0 755 movdqa k6k7, xmm3
michael@0 756 movdqa krd, xmm5
michael@0 757
michael@0 758 movsxd rax, dword ptr arg(1) ;src_pixels_per_line
michael@0 759 movsxd rdx, dword ptr arg(3) ;output_pitch
michael@0 760 movsxd rcx, dword ptr arg(4) ;output_height
michael@0 761
michael@0 762 .loop:
michael@0 763 prefetcht0 [rsi + 2 * rax -3]
michael@0 764
michael@0 765 movq xmm0, [rsi - 3] ;load src data
michael@0 766 movq xmm4, [rsi + 5]
michael@0 767 movq xmm7, [rsi + 13]
michael@0 768 punpcklqdq xmm0, xmm4
michael@0 769 punpcklqdq xmm4, xmm7
michael@0 770
michael@0 771 movdqa xmm1, xmm0
michael@0 772 movdqa xmm2, xmm0
michael@0 773 movdqa xmm3, xmm0
michael@0 774 movdqa xmm5, xmm4
michael@0 775 movdqa xmm6, xmm4
michael@0 776 movdqa xmm7, xmm4
michael@0 777
michael@0 778 pshufb xmm0, [GLOBAL(shuf_t0t1)]
michael@0 779 pshufb xmm1, [GLOBAL(shuf_t2t3)]
michael@0 780 pshufb xmm2, [GLOBAL(shuf_t4t5)]
michael@0 781 pshufb xmm3, [GLOBAL(shuf_t6t7)]
michael@0 782 pshufb xmm4, [GLOBAL(shuf_t0t1)]
michael@0 783 pshufb xmm5, [GLOBAL(shuf_t2t3)]
michael@0 784 pshufb xmm6, [GLOBAL(shuf_t4t5)]
michael@0 785 pshufb xmm7, [GLOBAL(shuf_t6t7)]
michael@0 786
michael@0 787 pmaddubsw xmm0, k0k1
michael@0 788 pmaddubsw xmm1, k2k3
michael@0 789 pmaddubsw xmm2, k4k5
michael@0 790 pmaddubsw xmm3, k6k7
michael@0 791 pmaddubsw xmm4, k0k1
michael@0 792 pmaddubsw xmm5, k2k3
michael@0 793 pmaddubsw xmm6, k4k5
michael@0 794 pmaddubsw xmm7, k6k7
michael@0 795
michael@0 796 paddsw xmm0, xmm3
michael@0 797 movdqa xmm3, xmm1
michael@0 798 pmaxsw xmm1, xmm2
michael@0 799 pminsw xmm2, xmm3
michael@0 800 paddsw xmm0, xmm2
michael@0 801 paddsw xmm0, xmm1
michael@0 802
michael@0 803 paddsw xmm4, xmm7
michael@0 804 movdqa xmm7, xmm5
michael@0 805 pmaxsw xmm5, xmm6
michael@0 806 pminsw xmm6, xmm7
michael@0 807 paddsw xmm4, xmm6
michael@0 808 paddsw xmm4, xmm5
michael@0 809
michael@0 810 paddsw xmm0, krd
michael@0 811 paddsw xmm4, krd
michael@0 812 psraw xmm0, 7
michael@0 813 psraw xmm4, 7
michael@0 814 packuswb xmm0, xmm0
michael@0 815 packuswb xmm4, xmm4
michael@0 816 punpcklqdq xmm0, xmm4
michael@0 817 %if %1
michael@0 818 movdqa xmm1, [rdi]
michael@0 819 pavgb xmm0, xmm1
michael@0 820 %endif
michael@0 821
michael@0 822 lea rsi, [rsi + rax]
michael@0 823 movdqa [rdi], xmm0
michael@0 824
michael@0 825 lea rdi, [rdi + rdx]
michael@0 826 dec rcx
michael@0 827 jnz .loop
michael@0 828 %endm
michael@0 829
michael@0 830 ;void vp9_filter_block1d4_h8_ssse3
michael@0 831 ;(
michael@0 832 ; unsigned char *src_ptr,
michael@0 833 ; unsigned int src_pixels_per_line,
michael@0 834 ; unsigned char *output_ptr,
michael@0 835 ; unsigned int output_pitch,
michael@0 836 ; unsigned int output_height,
michael@0 837 ; short *filter
michael@0 838 ;)
michael@0 839 global sym(vp9_filter_block1d4_h8_ssse3) PRIVATE
michael@0 840 sym(vp9_filter_block1d4_h8_ssse3):
michael@0 841 push rbp
michael@0 842 mov rbp, rsp
michael@0 843 SHADOW_ARGS_TO_STACK 6
michael@0 844 SAVE_XMM 7
michael@0 845 GET_GOT rbx
michael@0 846 push rsi
michael@0 847 push rdi
michael@0 848 ; end prolog
michael@0 849
michael@0 850 ALIGN_STACK 16, rax
michael@0 851 sub rsp, 16 * 3
michael@0 852 %define k0k1k4k5 [rsp + 16 * 0]
michael@0 853 %define k2k3k6k7 [rsp + 16 * 1]
michael@0 854 %define krd [rsp + 16 * 2]
michael@0 855
michael@0 856 HORIZx4 0
michael@0 857
michael@0 858 add rsp, 16 * 3
michael@0 859 pop rsp
michael@0 860 ; begin epilog
michael@0 861 pop rdi
michael@0 862 pop rsi
michael@0 863 RESTORE_GOT
michael@0 864 RESTORE_XMM
michael@0 865 UNSHADOW_ARGS
michael@0 866 pop rbp
michael@0 867 ret
michael@0 868
michael@0 869 ;void vp9_filter_block1d8_h8_ssse3
michael@0 870 ;(
michael@0 871 ; unsigned char *src_ptr,
michael@0 872 ; unsigned int src_pixels_per_line,
michael@0 873 ; unsigned char *output_ptr,
michael@0 874 ; unsigned int output_pitch,
michael@0 875 ; unsigned int output_height,
michael@0 876 ; short *filter
michael@0 877 ;)
michael@0 878 global sym(vp9_filter_block1d8_h8_ssse3) PRIVATE
michael@0 879 sym(vp9_filter_block1d8_h8_ssse3):
michael@0 880 push rbp
michael@0 881 mov rbp, rsp
michael@0 882 SHADOW_ARGS_TO_STACK 6
michael@0 883 SAVE_XMM 7
michael@0 884 GET_GOT rbx
michael@0 885 push rsi
michael@0 886 push rdi
michael@0 887 ; end prolog
michael@0 888
michael@0 889 ALIGN_STACK 16, rax
michael@0 890 sub rsp, 16*5
michael@0 891 %define k0k1 [rsp + 16*0]
michael@0 892 %define k2k3 [rsp + 16*1]
michael@0 893 %define k4k5 [rsp + 16*2]
michael@0 894 %define k6k7 [rsp + 16*3]
michael@0 895 %define krd [rsp + 16*4]
michael@0 896
michael@0 897 HORIZx8 0
michael@0 898
michael@0 899 add rsp, 16*5
michael@0 900 pop rsp
michael@0 901
michael@0 902 ; begin epilog
michael@0 903 pop rdi
michael@0 904 pop rsi
michael@0 905 RESTORE_GOT
michael@0 906 RESTORE_XMM
michael@0 907 UNSHADOW_ARGS
michael@0 908 pop rbp
michael@0 909 ret
michael@0 910
michael@0 911 ;void vp9_filter_block1d16_h8_ssse3
michael@0 912 ;(
michael@0 913 ; unsigned char *src_ptr,
michael@0 914 ; unsigned int src_pixels_per_line,
michael@0 915 ; unsigned char *output_ptr,
michael@0 916 ; unsigned int output_pitch,
michael@0 917 ; unsigned int output_height,
michael@0 918 ; short *filter
michael@0 919 ;)
michael@0 920 global sym(vp9_filter_block1d16_h8_ssse3) PRIVATE
michael@0 921 sym(vp9_filter_block1d16_h8_ssse3):
michael@0 922 push rbp
michael@0 923 mov rbp, rsp
michael@0 924 SHADOW_ARGS_TO_STACK 6
michael@0 925 SAVE_XMM 7
michael@0 926 GET_GOT rbx
michael@0 927 push rsi
michael@0 928 push rdi
michael@0 929 ; end prolog
michael@0 930
michael@0 931 ALIGN_STACK 16, rax
michael@0 932 sub rsp, 16*5
michael@0 933 %define k0k1 [rsp + 16*0]
michael@0 934 %define k2k3 [rsp + 16*1]
michael@0 935 %define k4k5 [rsp + 16*2]
michael@0 936 %define k6k7 [rsp + 16*3]
michael@0 937 %define krd [rsp + 16*4]
michael@0 938
michael@0 939 HORIZx16 0
michael@0 940
michael@0 941 add rsp, 16*5
michael@0 942 pop rsp
michael@0 943
michael@0 944 ; begin epilog
michael@0 945 pop rdi
michael@0 946 pop rsi
michael@0 947 RESTORE_GOT
michael@0 948 RESTORE_XMM
michael@0 949 UNSHADOW_ARGS
michael@0 950 pop rbp
michael@0 951 ret
michael@0 952
michael@0 953 global sym(vp9_filter_block1d4_h8_avg_ssse3) PRIVATE
michael@0 954 sym(vp9_filter_block1d4_h8_avg_ssse3):
michael@0 955 push rbp
michael@0 956 mov rbp, rsp
michael@0 957 SHADOW_ARGS_TO_STACK 6
michael@0 958 SAVE_XMM 7
michael@0 959 GET_GOT rbx
michael@0 960 push rsi
michael@0 961 push rdi
michael@0 962 ; end prolog
michael@0 963
michael@0 964 ALIGN_STACK 16, rax
michael@0 965 sub rsp, 16 * 3
michael@0 966 %define k0k1k4k5 [rsp + 16 * 0]
michael@0 967 %define k2k3k6k7 [rsp + 16 * 1]
michael@0 968 %define krd [rsp + 16 * 2]
michael@0 969
michael@0 970 HORIZx4 1
michael@0 971
michael@0 972 add rsp, 16 * 3
michael@0 973 pop rsp
michael@0 974 ; begin epilog
michael@0 975 pop rdi
michael@0 976 pop rsi
michael@0 977 RESTORE_GOT
michael@0 978 RESTORE_XMM
michael@0 979 UNSHADOW_ARGS
michael@0 980 pop rbp
michael@0 981 ret
michael@0 982
michael@0 983 global sym(vp9_filter_block1d8_h8_avg_ssse3) PRIVATE
michael@0 984 sym(vp9_filter_block1d8_h8_avg_ssse3):
michael@0 985 push rbp
michael@0 986 mov rbp, rsp
michael@0 987 SHADOW_ARGS_TO_STACK 6
michael@0 988 SAVE_XMM 7
michael@0 989 GET_GOT rbx
michael@0 990 push rsi
michael@0 991 push rdi
michael@0 992 ; end prolog
michael@0 993
michael@0 994 ALIGN_STACK 16, rax
michael@0 995 sub rsp, 16*5
michael@0 996 %define k0k1 [rsp + 16*0]
michael@0 997 %define k2k3 [rsp + 16*1]
michael@0 998 %define k4k5 [rsp + 16*2]
michael@0 999 %define k6k7 [rsp + 16*3]
michael@0 1000 %define krd [rsp + 16*4]
michael@0 1001
michael@0 1002 HORIZx8 1
michael@0 1003
michael@0 1004 add rsp, 16*5
michael@0 1005 pop rsp
michael@0 1006
michael@0 1007 ; begin epilog
michael@0 1008 pop rdi
michael@0 1009 pop rsi
michael@0 1010 RESTORE_GOT
michael@0 1011 RESTORE_XMM
michael@0 1012 UNSHADOW_ARGS
michael@0 1013 pop rbp
michael@0 1014 ret
michael@0 1015
michael@0 1016 global sym(vp9_filter_block1d16_h8_avg_ssse3) PRIVATE
michael@0 1017 sym(vp9_filter_block1d16_h8_avg_ssse3):
michael@0 1018 push rbp
michael@0 1019 mov rbp, rsp
michael@0 1020 SHADOW_ARGS_TO_STACK 6
michael@0 1021 SAVE_XMM 7
michael@0 1022 GET_GOT rbx
michael@0 1023 push rsi
michael@0 1024 push rdi
michael@0 1025 ; end prolog
michael@0 1026
michael@0 1027 ALIGN_STACK 16, rax
michael@0 1028 sub rsp, 16*5
michael@0 1029 %define k0k1 [rsp + 16*0]
michael@0 1030 %define k2k3 [rsp + 16*1]
michael@0 1031 %define k4k5 [rsp + 16*2]
michael@0 1032 %define k6k7 [rsp + 16*3]
michael@0 1033 %define krd [rsp + 16*4]
michael@0 1034
michael@0 1035 HORIZx16 1
michael@0 1036
michael@0 1037 add rsp, 16*5
michael@0 1038 pop rsp
michael@0 1039
michael@0 1040 ; begin epilog
michael@0 1041 pop rdi
michael@0 1042 pop rsi
michael@0 1043 RESTORE_GOT
michael@0 1044 RESTORE_XMM
michael@0 1045 UNSHADOW_ARGS
michael@0 1046 pop rbp
michael@0 1047 ret
michael@0 1048 SECTION_RODATA
michael@0 1049 align 16
michael@0 1050 shuf_t0t1:
michael@0 1051 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
michael@0 1052 align 16
michael@0 1053 shuf_t2t3:
michael@0 1054 db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
michael@0 1055 align 16
michael@0 1056 shuf_t4t5:
michael@0 1057 db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
michael@0 1058 align 16
michael@0 1059 shuf_t6t7:
michael@0 1060 db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14

mercurial