Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | ; |
michael@0 | 2 | ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
michael@0 | 3 | ; |
michael@0 | 4 | ; Use of this source code is governed by a BSD-style license and patent |
michael@0 | 5 | ; grant that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | ; tree. All contributing project authors may be found in the AUTHORS |
michael@0 | 7 | ; file in the root of the source tree. |
michael@0 | 8 | ; |
michael@0 | 9 | |
michael@0 | 10 | |
michael@0 | 11 | %include "vpx_ports/x86_abi_support.asm" |
michael@0 | 12 | %include "vp8_asm_enc_offsets.asm" |
michael@0 | 13 | |
michael@0 | 14 | |
michael@0 | 15 | ; void vp8_fast_quantize_b_ssse3 | arg |
michael@0 | 16 | ; (BLOCK *b, | 0 |
michael@0 | 17 | ; BLOCKD *d) | 1 |
michael@0 | 18 | ; |
michael@0 | 19 | |
michael@0 | 20 | global sym(vp8_fast_quantize_b_ssse3) PRIVATE |
michael@0 | 21 | sym(vp8_fast_quantize_b_ssse3): |
michael@0 | 22 | push rbp |
michael@0 | 23 | mov rbp, rsp |
michael@0 | 24 | GET_GOT rbx |
michael@0 | 25 | |
michael@0 | 26 | %if ABI_IS_32BIT |
michael@0 | 27 | push rdi |
michael@0 | 28 | push rsi |
michael@0 | 29 | %else |
michael@0 | 30 | %if LIBVPX_YASM_WIN64 |
michael@0 | 31 | push rdi |
michael@0 | 32 | push rsi |
michael@0 | 33 | %endif |
michael@0 | 34 | %endif |
michael@0 | 35 | ; end prolog |
michael@0 | 36 | |
michael@0 | 37 | %if ABI_IS_32BIT |
michael@0 | 38 | mov rdi, arg(0) ; BLOCK *b |
michael@0 | 39 | mov rsi, arg(1) ; BLOCKD *d |
michael@0 | 40 | %else |
michael@0 | 41 | %if LIBVPX_YASM_WIN64 |
michael@0 | 42 | mov rdi, rcx ; BLOCK *b |
michael@0 | 43 | mov rsi, rdx ; BLOCKD *d |
michael@0 | 44 | %else |
michael@0 | 45 | ;mov rdi, rdi ; BLOCK *b |
michael@0 | 46 | ;mov rsi, rsi ; BLOCKD *d |
michael@0 | 47 | %endif |
michael@0 | 48 | %endif |
michael@0 | 49 | |
michael@0 | 50 | mov rax, [rdi + vp8_block_coeff] |
michael@0 | 51 | mov rcx, [rdi + vp8_block_round] |
michael@0 | 52 | mov rdx, [rdi + vp8_block_quant_fast] |
michael@0 | 53 | |
michael@0 | 54 | ; coeff |
michael@0 | 55 | movdqa xmm0, [rax] |
michael@0 | 56 | movdqa xmm4, [rax + 16] |
michael@0 | 57 | |
michael@0 | 58 | ; round |
michael@0 | 59 | movdqa xmm2, [rcx] |
michael@0 | 60 | movdqa xmm3, [rcx + 16] |
michael@0 | 61 | |
michael@0 | 62 | movdqa xmm1, xmm0 |
michael@0 | 63 | movdqa xmm5, xmm4 |
michael@0 | 64 | |
michael@0 | 65 | ; sz = z >> 15 |
michael@0 | 66 | psraw xmm0, 15 |
michael@0 | 67 | psraw xmm4, 15 |
michael@0 | 68 | |
michael@0 | 69 | pabsw xmm1, xmm1 |
michael@0 | 70 | pabsw xmm5, xmm5 |
michael@0 | 71 | |
michael@0 | 72 | paddw xmm1, xmm2 |
michael@0 | 73 | paddw xmm5, xmm3 |
michael@0 | 74 | |
michael@0 | 75 | ; quant_fast |
michael@0 | 76 | pmulhw xmm1, [rdx] |
michael@0 | 77 | pmulhw xmm5, [rdx + 16] |
michael@0 | 78 | |
michael@0 | 79 | mov rax, [rsi + vp8_blockd_qcoeff] |
michael@0 | 80 | mov rdi, [rsi + vp8_blockd_dequant] |
michael@0 | 81 | mov rcx, [rsi + vp8_blockd_dqcoeff] |
michael@0 | 82 | |
michael@0 | 83 | movdqa xmm2, xmm1 ;store y for getting eob |
michael@0 | 84 | movdqa xmm3, xmm5 |
michael@0 | 85 | |
michael@0 | 86 | pxor xmm1, xmm0 |
michael@0 | 87 | pxor xmm5, xmm4 |
michael@0 | 88 | psubw xmm1, xmm0 |
michael@0 | 89 | psubw xmm5, xmm4 |
michael@0 | 90 | |
michael@0 | 91 | movdqa [rax], xmm1 |
michael@0 | 92 | movdqa [rax + 16], xmm5 |
michael@0 | 93 | |
michael@0 | 94 | movdqa xmm0, [rdi] |
michael@0 | 95 | movdqa xmm4, [rdi + 16] |
michael@0 | 96 | |
michael@0 | 97 | pmullw xmm0, xmm1 |
michael@0 | 98 | pmullw xmm4, xmm5 |
michael@0 | 99 | pxor xmm1, xmm1 |
michael@0 | 100 | |
michael@0 | 101 | pcmpgtw xmm2, xmm1 ;calculate eob |
michael@0 | 102 | pcmpgtw xmm3, xmm1 |
michael@0 | 103 | packsswb xmm2, xmm3 |
michael@0 | 104 | pshufb xmm2, [GLOBAL(zz_shuf)] |
michael@0 | 105 | |
michael@0 | 106 | pmovmskb edx, xmm2 |
michael@0 | 107 | |
michael@0 | 108 | movdqa [rcx], xmm0 ;store dqcoeff |
michael@0 | 109 | movdqa [rcx + 16], xmm4 ;store dqcoeff |
michael@0 | 110 | mov rcx, [rsi + vp8_blockd_eob] |
michael@0 | 111 | |
michael@0 | 112 | bsr eax, edx ;count 0 |
michael@0 | 113 | add eax, 1 |
michael@0 | 114 | |
michael@0 | 115 | cmp edx, 0 ;if all 0, eob=0 |
michael@0 | 116 | cmove eax, edx |
michael@0 | 117 | |
michael@0 | 118 | mov BYTE PTR [rcx], al ;store eob |
michael@0 | 119 | |
michael@0 | 120 | ; begin epilog |
michael@0 | 121 | %if ABI_IS_32BIT |
michael@0 | 122 | pop rsi |
michael@0 | 123 | pop rdi |
michael@0 | 124 | %else |
michael@0 | 125 | %if LIBVPX_YASM_WIN64 |
michael@0 | 126 | pop rsi |
michael@0 | 127 | pop rdi |
michael@0 | 128 | %endif |
michael@0 | 129 | %endif |
michael@0 | 130 | |
michael@0 | 131 | RESTORE_GOT |
michael@0 | 132 | pop rbp |
michael@0 | 133 | ret |
michael@0 | 134 | |
michael@0 | 135 | SECTION_RODATA |
michael@0 | 136 | align 16 |
michael@0 | 137 | zz_shuf: |
michael@0 | 138 | db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 |