|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license and patent |
|
5 ; grant that can be found in the LICENSE file in the root of the source |
|
6 ; tree. All contributing project authors may be found in the AUTHORS |
|
7 ; file in the root of the source tree. |
|
8 ; |
|
9 |
|
10 |
|
11 %include "vpx_ports/x86_abi_support.asm" |
|
12 %include "vp8_asm_enc_offsets.asm" |
|
13 |
|
14 |
|
15 ; void vp8_fast_quantize_b_ssse3 | arg |
|
16 ; (BLOCK *b, | 0 |
|
17 ; BLOCKD *d) | 1 |
|
18 ; |
|
19 |
|
20 global sym(vp8_fast_quantize_b_ssse3) PRIVATE |
|
21 sym(vp8_fast_quantize_b_ssse3): |
|
22 push rbp |
|
23 mov rbp, rsp |
|
24 GET_GOT rbx |
|
25 |
|
26 %if ABI_IS_32BIT |
|
27 push rdi |
|
28 push rsi |
|
29 %else |
|
30 %if LIBVPX_YASM_WIN64 |
|
31 push rdi |
|
32 push rsi |
|
33 %endif |
|
34 %endif |
|
35 ; end prolog |
|
36 |
|
37 %if ABI_IS_32BIT |
|
38 mov rdi, arg(0) ; BLOCK *b |
|
39 mov rsi, arg(1) ; BLOCKD *d |
|
40 %else |
|
41 %if LIBVPX_YASM_WIN64 |
|
42 mov rdi, rcx ; BLOCK *b |
|
43 mov rsi, rdx ; BLOCKD *d |
|
44 %else |
|
45 ;mov rdi, rdi ; BLOCK *b |
|
46 ;mov rsi, rsi ; BLOCKD *d |
|
47 %endif |
|
48 %endif |
|
49 |
|
50 mov rax, [rdi + vp8_block_coeff] |
|
51 mov rcx, [rdi + vp8_block_round] |
|
52 mov rdx, [rdi + vp8_block_quant_fast] |
|
53 |
|
54 ; coeff |
|
55 movdqa xmm0, [rax] |
|
56 movdqa xmm4, [rax + 16] |
|
57 |
|
58 ; round |
|
59 movdqa xmm2, [rcx] |
|
60 movdqa xmm3, [rcx + 16] |
|
61 |
|
62 movdqa xmm1, xmm0 |
|
63 movdqa xmm5, xmm4 |
|
64 |
|
65 ; sz = z >> 15 |
|
66 psraw xmm0, 15 |
|
67 psraw xmm4, 15 |
|
68 |
|
69 pabsw xmm1, xmm1 |
|
70 pabsw xmm5, xmm5 |
|
71 |
|
72 paddw xmm1, xmm2 |
|
73 paddw xmm5, xmm3 |
|
74 |
|
75 ; quant_fast |
|
76 pmulhw xmm1, [rdx] |
|
77 pmulhw xmm5, [rdx + 16] |
|
78 |
|
79 mov rax, [rsi + vp8_blockd_qcoeff] |
|
80 mov rdi, [rsi + vp8_blockd_dequant] |
|
81 mov rcx, [rsi + vp8_blockd_dqcoeff] |
|
82 |
|
83 movdqa xmm2, xmm1 ;store y for getting eob |
|
84 movdqa xmm3, xmm5 |
|
85 |
|
86 pxor xmm1, xmm0 |
|
87 pxor xmm5, xmm4 |
|
88 psubw xmm1, xmm0 |
|
89 psubw xmm5, xmm4 |
|
90 |
|
91 movdqa [rax], xmm1 |
|
92 movdqa [rax + 16], xmm5 |
|
93 |
|
94 movdqa xmm0, [rdi] |
|
95 movdqa xmm4, [rdi + 16] |
|
96 |
|
97 pmullw xmm0, xmm1 |
|
98 pmullw xmm4, xmm5 |
|
99 pxor xmm1, xmm1 |
|
100 |
|
101 pcmpgtw xmm2, xmm1 ;calculate eob |
|
102 pcmpgtw xmm3, xmm1 |
|
103 packsswb xmm2, xmm3 |
|
104 pshufb xmm2, [GLOBAL(zz_shuf)] |
|
105 |
|
106 pmovmskb edx, xmm2 |
|
107 |
|
108 movdqa [rcx], xmm0 ;store dqcoeff |
|
109 movdqa [rcx + 16], xmm4 ;store dqcoeff |
|
110 mov rcx, [rsi + vp8_blockd_eob] |
|
111 |
|
112 bsr eax, edx ;count 0 |
|
113 add eax, 1 |
|
114 |
|
115 cmp edx, 0 ;if all 0, eob=0 |
|
116 cmove eax, edx |
|
117 |
|
118 mov BYTE PTR [rcx], al ;store eob |
|
119 |
|
120 ; begin epilog |
|
121 %if ABI_IS_32BIT |
|
122 pop rsi |
|
123 pop rdi |
|
124 %else |
|
125 %if LIBVPX_YASM_WIN64 |
|
126 pop rsi |
|
127 pop rdi |
|
128 %endif |
|
129 %endif |
|
130 |
|
131 RESTORE_GOT |
|
132 pop rbp |
|
133 ret |
|
134 |
|
135 SECTION_RODATA |
|
136 align 16 |
|
137 zz_shuf: |
|
138 db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 |