|
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style license that can be |
|
3 // found in the LICENSE file. |
|
4 |
|
5 // contributor Siarhei Siamashka <siarhei.siamashka@gmail.com> |
|
6 |
|
7 #include "yuv_convert.h" |
|
8 #include "ycbcr_to_rgb565.h" |
|
9 |
|
10 |
|
11 |
|
12 #ifdef HAVE_YCBCR_TO_RGB565 |
|
13 |
|
14 namespace mozilla { |
|
15 |
|
16 namespace gfx { |
|
17 |
|
18 # if defined(MOZILLA_MAY_SUPPORT_NEON) |
|
19 void __attribute((noinline,optimize("-fomit-frame-pointer"))) |
|
20 yuv42x_to_rgb565_row_neon(uint16 *dst, |
|
21 const uint8 *y, |
|
22 const uint8 *u, |
|
23 const uint8 *v, |
|
24 int n, |
|
25 int oddflag) |
|
26 { |
|
27 static __attribute__((aligned(16))) uint16 acc_r[8] = { |
|
28 22840, 22840, 22840, 22840, 22840, 22840, 22840, 22840, |
|
29 }; |
|
30 static __attribute__((aligned(16))) uint16 acc_g[8] = { |
|
31 17312, 17312, 17312, 17312, 17312, 17312, 17312, 17312, |
|
32 }; |
|
33 static __attribute__((aligned(16))) uint16 acc_b[8] = { |
|
34 28832, 28832, 28832, 28832, 28832, 28832, 28832, 28832, |
|
35 }; |
|
36 /* |
|
37 * Registers: |
|
38 * q0, q1 : d0, d1, d2, d3 - are used for initial loading of YUV data |
|
39 * q2 : d4, d5 - are used for storing converted RGB data |
|
40 * q3 : d6, d7 - are used for temporary storage |
|
41 * |
|
42 * q4-q7 - reserved |
|
43 * |
|
44 * q8, q9 : d16, d17, d18, d19 - are used for expanded Y data |
|
45 * q10 : d20, d21 |
|
46 * q11 : d22, d23 |
|
47 * q12 : d24, d25 |
|
48 * q13 : d26, d27 |
|
49 * q13, q14, q15 - various constants (#16, #149, #204, #50, #104, #154) |
|
50 */ |
|
51 asm volatile ( |
|
52 ".fpu neon\n" |
|
53 /* Allow to build on targets not supporting neon, and force the object file |
|
54 * target to avoid bumping the final binary target */ |
|
55 ".arch armv7-a\n" |
|
56 ".object_arch armv4t\n" |
|
57 ".macro convert_macroblock size\n" |
|
58 /* load up to 16 source pixels */ |
|
59 ".if \\size == 16\n" |
|
60 "pld [%[y], #64]\n" |
|
61 "pld [%[u], #64]\n" |
|
62 "pld [%[v], #64]\n" |
|
63 "vld1.8 {d1}, [%[y]]!\n" |
|
64 "vld1.8 {d3}, [%[y]]!\n" |
|
65 "vld1.8 {d0}, [%[u]]!\n" |
|
66 "vld1.8 {d2}, [%[v]]!\n" |
|
67 ".elseif \\size == 8\n" |
|
68 "vld1.8 {d1}, [%[y]]!\n" |
|
69 "vld1.8 {d0[0]}, [%[u]]!\n" |
|
70 "vld1.8 {d0[1]}, [%[u]]!\n" |
|
71 "vld1.8 {d0[2]}, [%[u]]!\n" |
|
72 "vld1.8 {d0[3]}, [%[u]]!\n" |
|
73 "vld1.8 {d2[0]}, [%[v]]!\n" |
|
74 "vld1.8 {d2[1]}, [%[v]]!\n" |
|
75 "vld1.8 {d2[2]}, [%[v]]!\n" |
|
76 "vld1.8 {d2[3]}, [%[v]]!\n" |
|
77 ".elseif \\size == 4\n" |
|
78 "vld1.8 {d1[0]}, [%[y]]!\n" |
|
79 "vld1.8 {d1[1]}, [%[y]]!\n" |
|
80 "vld1.8 {d1[2]}, [%[y]]!\n" |
|
81 "vld1.8 {d1[3]}, [%[y]]!\n" |
|
82 "vld1.8 {d0[0]}, [%[u]]!\n" |
|
83 "vld1.8 {d0[1]}, [%[u]]!\n" |
|
84 "vld1.8 {d2[0]}, [%[v]]!\n" |
|
85 "vld1.8 {d2[1]}, [%[v]]!\n" |
|
86 ".elseif \\size == 2\n" |
|
87 "vld1.8 {d1[0]}, [%[y]]!\n" |
|
88 "vld1.8 {d1[1]}, [%[y]]!\n" |
|
89 "vld1.8 {d0[0]}, [%[u]]!\n" |
|
90 "vld1.8 {d2[0]}, [%[v]]!\n" |
|
91 ".elseif \\size == 1\n" |
|
92 "vld1.8 {d1[0]}, [%[y]]!\n" |
|
93 "vld1.8 {d0[0]}, [%[u]]!\n" |
|
94 "vld1.8 {d2[0]}, [%[v]]!\n" |
|
95 ".else\n" |
|
96 ".error \"unsupported macroblock size\"\n" |
|
97 ".endif\n" |
|
98 |
|
99 /* d1 - Y data (first 8 bytes) */ |
|
100 /* d3 - Y data (next 8 bytes) */ |
|
101 /* d0 - U data, d2 - V data */ |
|
102 |
|
103 /* split even and odd Y color components */ |
|
104 "vuzp.8 d1, d3\n" /* d1 - evenY, d3 - oddY */ |
|
105 /* clip upper and lower boundaries */ |
|
106 "vqadd.u8 q0, q0, q4\n" |
|
107 "vqadd.u8 q1, q1, q4\n" |
|
108 "vqsub.u8 q0, q0, q5\n" |
|
109 "vqsub.u8 q1, q1, q5\n" |
|
110 |
|
111 "vshr.u8 d4, d2, #1\n" /* d4 = V >> 1 */ |
|
112 |
|
113 "vmull.u8 q8, d1, d27\n" /* q8 = evenY * 149 */ |
|
114 "vmull.u8 q9, d3, d27\n" /* q9 = oddY * 149 */ |
|
115 |
|
116 "vld1.16 {d20, d21}, [%[acc_r], :128]\n" /* q10 - initialize accumulator for red */ |
|
117 "vsubw.u8 q10, q10, d4\n" /* red acc -= (V >> 1) */ |
|
118 "vmlsl.u8 q10, d2, d28\n" /* red acc -= V * 204 */ |
|
119 "vld1.16 {d22, d23}, [%[acc_g], :128]\n" /* q11 - initialize accumulator for green */ |
|
120 "vmlsl.u8 q11, d2, d30\n" /* green acc -= V * 104 */ |
|
121 "vmlsl.u8 q11, d0, d29\n" /* green acc -= U * 50 */ |
|
122 "vld1.16 {d24, d25}, [%[acc_b], :128]\n" /* q12 - initialize accumulator for blue */ |
|
123 "vmlsl.u8 q12, d0, d30\n" /* blue acc -= U * 104 */ |
|
124 "vmlsl.u8 q12, d0, d31\n" /* blue acc -= U * 154 */ |
|
125 |
|
126 "vhsub.s16 q3, q8, q10\n" /* calculate even red components */ |
|
127 "vhsub.s16 q10, q9, q10\n" /* calculate odd red components */ |
|
128 "vqshrun.s16 d0, q3, #6\n" /* right shift, narrow and saturate even red components */ |
|
129 "vqshrun.s16 d3, q10, #6\n" /* right shift, narrow and saturate odd red components */ |
|
130 |
|
131 "vhadd.s16 q3, q8, q11\n" /* calculate even green components */ |
|
132 "vhadd.s16 q11, q9, q11\n" /* calculate odd green components */ |
|
133 "vqshrun.s16 d1, q3, #6\n" /* right shift, narrow and saturate even green components */ |
|
134 "vqshrun.s16 d4, q11, #6\n" /* right shift, narrow and saturate odd green components */ |
|
135 |
|
136 "vhsub.s16 q3, q8, q12\n" /* calculate even blue components */ |
|
137 "vhsub.s16 q12, q9, q12\n" /* calculate odd blue components */ |
|
138 "vqshrun.s16 d2, q3, #6\n" /* right shift, narrow and saturate even blue components */ |
|
139 "vqshrun.s16 d5, q12, #6\n" /* right shift, narrow and saturate odd blue components */ |
|
140 |
|
141 "vzip.8 d0, d3\n" /* join even and odd red components */ |
|
142 "vzip.8 d1, d4\n" /* join even and odd green components */ |
|
143 "vzip.8 d2, d5\n" /* join even and odd blue components */ |
|
144 |
|
145 "vshll.u8 q3, d0, #8\n\t" |
|
146 "vshll.u8 q8, d1, #8\n\t" |
|
147 "vshll.u8 q9, d2, #8\n\t" |
|
148 "vsri.u16 q3, q8, #5\t\n" |
|
149 "vsri.u16 q3, q9, #11\t\n" |
|
150 /* store pixel data to memory */ |
|
151 ".if \\size == 16\n" |
|
152 " vst1.16 {d6, d7}, [%[dst]]!\n" |
|
153 " vshll.u8 q3, d3, #8\n\t" |
|
154 " vshll.u8 q8, d4, #8\n\t" |
|
155 " vshll.u8 q9, d5, #8\n\t" |
|
156 " vsri.u16 q3, q8, #5\t\n" |
|
157 " vsri.u16 q3, q9, #11\t\n" |
|
158 " vst1.16 {d6, d7}, [%[dst]]!\n" |
|
159 ".elseif \\size == 8\n" |
|
160 " vst1.16 {d6, d7}, [%[dst]]!\n" |
|
161 ".elseif \\size == 4\n" |
|
162 " vst1.16 {d6}, [%[dst]]!\n" |
|
163 ".elseif \\size == 2\n" |
|
164 " vst1.16 {d6[0]}, [%[dst]]!\n" |
|
165 " vst1.16 {d6[1]}, [%[dst]]!\n" |
|
166 ".elseif \\size == 1\n" |
|
167 " vst1.16 {d6[0]}, [%[dst]]!\n" |
|
168 ".endif\n" |
|
169 ".endm\n" |
|
170 |
|
171 "vmov.u8 d8, #15\n" /* add this to U/V to saturate upper boundary */ |
|
172 "vmov.u8 d9, #20\n" /* add this to Y to saturate upper boundary */ |
|
173 "vmov.u8 d10, #31\n" /* sub this from U/V to saturate lower boundary */ |
|
174 "vmov.u8 d11, #36\n" /* sub this from Y to saturate lower boundary */ |
|
175 |
|
176 "vmov.u8 d26, #16\n" |
|
177 "vmov.u8 d27, #149\n" |
|
178 "vmov.u8 d28, #204\n" |
|
179 "vmov.u8 d29, #50\n" |
|
180 "vmov.u8 d30, #104\n" |
|
181 "vmov.u8 d31, #154\n" |
|
182 |
|
183 "cmp %[oddflag], #0\n" |
|
184 "beq 1f\n" |
|
185 "convert_macroblock 1\n" |
|
186 "sub %[n], %[n], #1\n" |
|
187 "1:\n" |
|
188 "subs %[n], %[n], #16\n" |
|
189 "blt 2f\n" |
|
190 "1:\n" |
|
191 "convert_macroblock 16\n" |
|
192 "subs %[n], %[n], #16\n" |
|
193 "bge 1b\n" |
|
194 "2:\n" |
|
195 "tst %[n], #8\n" |
|
196 "beq 3f\n" |
|
197 "convert_macroblock 8\n" |
|
198 "3:\n" |
|
199 "tst %[n], #4\n" |
|
200 "beq 4f\n" |
|
201 "convert_macroblock 4\n" |
|
202 "4:\n" |
|
203 "tst %[n], #2\n" |
|
204 "beq 5f\n" |
|
205 "convert_macroblock 2\n" |
|
206 "5:\n" |
|
207 "tst %[n], #1\n" |
|
208 "beq 6f\n" |
|
209 "convert_macroblock 1\n" |
|
210 "6:\n" |
|
211 ".purgem convert_macroblock\n" |
|
212 : [y] "+&r" (y), [u] "+&r" (u), [v] "+&r" (v), [dst] "+&r" (dst), [n] "+&r" (n) |
|
213 : [acc_r] "r" (&acc_r[0]), [acc_g] "r" (&acc_g[0]), [acc_b] "r" (&acc_b[0]), |
|
214 [oddflag] "r" (oddflag) |
|
215 : "cc", "memory", |
|
216 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", |
|
217 "d8", "d9", "d10", "d11", /* "d12", "d13", "d14", "d15", */ |
|
218 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", |
|
219 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" |
|
220 ); |
|
221 } |
|
222 # endif // MOZILLA_MAY_SUPPORT_NEON |
|
223 |
|
224 } // namespace gfx |
|
225 |
|
226 } // namespace mozilla |
|
227 |
|
228 #endif // HAVE_YCBCR_TO_RGB565 |