|
1 /* |
|
2 * Copyright (c) 2012 |
|
3 * MIPS Technologies, Inc., California. |
|
4 * |
|
5 * Redistribution and use in source and binary forms, with or without |
|
6 * modification, are permitted provided that the following conditions |
|
7 * are met: |
|
8 * 1. Redistributions of source code must retain the above copyright |
|
9 * notice, this list of conditions and the following disclaimer. |
|
10 * 2. Redistributions in binary form must reproduce the above copyright |
|
11 * notice, this list of conditions and the following disclaimer in the |
|
12 * documentation and/or other materials provided with the distribution. |
|
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its |
|
14 * contributors may be used to endorse or promote products derived from |
|
15 * this software without specific prior written permission. |
|
16 * |
|
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND |
|
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE |
|
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
27 * SUCH DAMAGE. |
|
28 * |
|
29 * Author: Nemanja Lukic (nlukic@mips.com) |
|
30 */ |
|
31 |
|
32 #include "pixman-private.h" |
|
33 #include "pixman-mips-dspr2-asm.h" |
|
34 |
|
35 LEAF_MIPS_DSPR2(pixman_fill_buff16_mips) |
|
36 /* |
|
37 * a0 - *dest |
|
38 * a1 - count (bytes) |
|
39 * a2 - value to fill buffer with |
|
40 */ |
|
41 |
|
42 beqz a1, 3f |
|
43 andi t1, a0, 0x0002 |
|
44 beqz t1, 0f /* check if address is 4-byte aligned */ |
|
45 nop |
|
46 sh a2, 0(a0) |
|
47 addiu a0, a0, 2 |
|
48 addiu a1, a1, -2 |
|
49 0: |
|
50 srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ |
|
51 replv.ph a2, a2 /* replicate fill value (16bit) in a2 */ |
|
52 beqz t1, 2f |
|
53 nop |
|
54 1: |
|
55 addiu t1, t1, -1 |
|
56 beqz t1, 11f |
|
57 addiu a1, a1, -32 |
|
58 pref 30, 32(a0) |
|
59 sw a2, 0(a0) |
|
60 sw a2, 4(a0) |
|
61 sw a2, 8(a0) |
|
62 sw a2, 12(a0) |
|
63 sw a2, 16(a0) |
|
64 sw a2, 20(a0) |
|
65 sw a2, 24(a0) |
|
66 sw a2, 28(a0) |
|
67 b 1b |
|
68 addiu a0, a0, 32 |
|
69 11: |
|
70 sw a2, 0(a0) |
|
71 sw a2, 4(a0) |
|
72 sw a2, 8(a0) |
|
73 sw a2, 12(a0) |
|
74 sw a2, 16(a0) |
|
75 sw a2, 20(a0) |
|
76 sw a2, 24(a0) |
|
77 sw a2, 28(a0) |
|
78 addiu a0, a0, 32 |
|
79 2: |
|
80 blez a1, 3f |
|
81 addiu a1, a1, -2 |
|
82 sh a2, 0(a0) |
|
83 b 2b |
|
84 addiu a0, a0, 2 |
|
85 3: |
|
86 jr ra |
|
87 nop |
|
88 |
|
89 END(pixman_fill_buff16_mips) |
|
90 |
|
91 LEAF_MIPS32R2(pixman_fill_buff32_mips) |
|
92 /* |
|
93 * a0 - *dest |
|
94 * a1 - count (bytes) |
|
95 * a2 - value to fill buffer with |
|
96 */ |
|
97 |
|
98 beqz a1, 3f |
|
99 nop |
|
100 srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ |
|
101 beqz t1, 2f |
|
102 nop |
|
103 1: |
|
104 addiu t1, t1, -1 |
|
105 beqz t1, 11f |
|
106 addiu a1, a1, -32 |
|
107 pref 30, 32(a0) |
|
108 sw a2, 0(a0) |
|
109 sw a2, 4(a0) |
|
110 sw a2, 8(a0) |
|
111 sw a2, 12(a0) |
|
112 sw a2, 16(a0) |
|
113 sw a2, 20(a0) |
|
114 sw a2, 24(a0) |
|
115 sw a2, 28(a0) |
|
116 b 1b |
|
117 addiu a0, a0, 32 |
|
118 11: |
|
119 sw a2, 0(a0) |
|
120 sw a2, 4(a0) |
|
121 sw a2, 8(a0) |
|
122 sw a2, 12(a0) |
|
123 sw a2, 16(a0) |
|
124 sw a2, 20(a0) |
|
125 sw a2, 24(a0) |
|
126 sw a2, 28(a0) |
|
127 addiu a0, a0, 32 |
|
128 2: |
|
129 blez a1, 3f |
|
130 addiu a1, a1, -4 |
|
131 sw a2, 0(a0) |
|
132 b 2b |
|
133 addiu a0, a0, 4 |
|
134 3: |
|
135 jr ra |
|
136 nop |
|
137 |
|
138 END(pixman_fill_buff32_mips) |
|
139 |
|
140 LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips) |
|
141 /* |
|
142 * a0 - dst (r5g6b5) |
|
143 * a1 - src (a8r8g8b8) |
|
144 * a2 - w |
|
145 */ |
|
146 |
|
147 beqz a2, 3f |
|
148 nop |
|
149 addiu t1, a2, -1 |
|
150 beqz t1, 2f |
|
151 nop |
|
152 li t4, 0xf800f800 |
|
153 li t5, 0x07e007e0 |
|
154 li t6, 0x001f001f |
|
155 1: |
|
156 lw t0, 0(a1) |
|
157 lw t1, 4(a1) |
|
158 addiu a1, a1, 8 |
|
159 addiu a2, a2, -2 |
|
160 |
|
161 CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8 |
|
162 |
|
163 sh t2, 0(a0) |
|
164 sh t3, 2(a0) |
|
165 |
|
166 addiu t2, a2, -1 |
|
167 bgtz t2, 1b |
|
168 addiu a0, a0, 4 |
|
169 2: |
|
170 beqz a2, 3f |
|
171 nop |
|
172 lw t0, 0(a1) |
|
173 |
|
174 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 |
|
175 |
|
176 sh t1, 0(a0) |
|
177 3: |
|
178 j ra |
|
179 nop |
|
180 |
|
181 END(pixman_composite_src_8888_0565_asm_mips) |
|
182 |
|
183 LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips) |
|
184 /* |
|
185 * a0 - dst (a8r8g8b8) |
|
186 * a1 - src (r5g6b5) |
|
187 * a2 - w |
|
188 */ |
|
189 |
|
190 beqz a2, 3f |
|
191 nop |
|
192 addiu t1, a2, -1 |
|
193 beqz t1, 2f |
|
194 nop |
|
195 li t4, 0x07e007e0 |
|
196 li t5, 0x001F001F |
|
197 1: |
|
198 lhu t0, 0(a1) |
|
199 lhu t1, 2(a1) |
|
200 addiu a1, a1, 4 |
|
201 addiu a2, a2, -2 |
|
202 |
|
203 CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 |
|
204 |
|
205 sw t2, 0(a0) |
|
206 sw t3, 4(a0) |
|
207 |
|
208 addiu t2, a2, -1 |
|
209 bgtz t2, 1b |
|
210 addiu a0, a0, 8 |
|
211 2: |
|
212 beqz a2, 3f |
|
213 nop |
|
214 lhu t0, 0(a1) |
|
215 |
|
216 CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 |
|
217 |
|
218 sw t1, 0(a0) |
|
219 3: |
|
220 j ra |
|
221 nop |
|
222 |
|
223 END(pixman_composite_src_0565_8888_asm_mips) |
|
224 |
|
225 LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips) |
|
226 /* |
|
227 * a0 - dst (a8r8g8b8) |
|
228 * a1 - src (x8r8g8b8) |
|
229 * a2 - w |
|
230 */ |
|
231 |
|
232 beqz a2, 4f |
|
233 nop |
|
234 li t9, 0xff000000 |
|
235 srl t8, a2, 3 /* t1 = how many multiples of 8 src pixels */ |
|
236 beqz t8, 3f /* branch if less than 8 src pixels */ |
|
237 nop |
|
238 1: |
|
239 addiu t8, t8, -1 |
|
240 beqz t8, 2f |
|
241 addiu a2, a2, -8 |
|
242 pref 0, 32(a1) |
|
243 lw t0, 0(a1) |
|
244 lw t1, 4(a1) |
|
245 lw t2, 8(a1) |
|
246 lw t3, 12(a1) |
|
247 lw t4, 16(a1) |
|
248 lw t5, 20(a1) |
|
249 lw t6, 24(a1) |
|
250 lw t7, 28(a1) |
|
251 addiu a1, a1, 32 |
|
252 or t0, t0, t9 |
|
253 or t1, t1, t9 |
|
254 or t2, t2, t9 |
|
255 or t3, t3, t9 |
|
256 or t4, t4, t9 |
|
257 or t5, t5, t9 |
|
258 or t6, t6, t9 |
|
259 or t7, t7, t9 |
|
260 pref 30, 32(a0) |
|
261 sw t0, 0(a0) |
|
262 sw t1, 4(a0) |
|
263 sw t2, 8(a0) |
|
264 sw t3, 12(a0) |
|
265 sw t4, 16(a0) |
|
266 sw t5, 20(a0) |
|
267 sw t6, 24(a0) |
|
268 sw t7, 28(a0) |
|
269 b 1b |
|
270 addiu a0, a0, 32 |
|
271 2: |
|
272 lw t0, 0(a1) |
|
273 lw t1, 4(a1) |
|
274 lw t2, 8(a1) |
|
275 lw t3, 12(a1) |
|
276 lw t4, 16(a1) |
|
277 lw t5, 20(a1) |
|
278 lw t6, 24(a1) |
|
279 lw t7, 28(a1) |
|
280 addiu a1, a1, 32 |
|
281 or t0, t0, t9 |
|
282 or t1, t1, t9 |
|
283 or t2, t2, t9 |
|
284 or t3, t3, t9 |
|
285 or t4, t4, t9 |
|
286 or t5, t5, t9 |
|
287 or t6, t6, t9 |
|
288 or t7, t7, t9 |
|
289 sw t0, 0(a0) |
|
290 sw t1, 4(a0) |
|
291 sw t2, 8(a0) |
|
292 sw t3, 12(a0) |
|
293 sw t4, 16(a0) |
|
294 sw t5, 20(a0) |
|
295 sw t6, 24(a0) |
|
296 sw t7, 28(a0) |
|
297 beqz a2, 4f |
|
298 addiu a0, a0, 32 |
|
299 3: |
|
300 lw t0, 0(a1) |
|
301 addiu a1, a1, 4 |
|
302 addiu a2, a2, -1 |
|
303 or t1, t0, t9 |
|
304 sw t1, 0(a0) |
|
305 bnez a2, 3b |
|
306 addiu a0, a0, 4 |
|
307 4: |
|
308 jr ra |
|
309 nop |
|
310 |
|
311 END(pixman_composite_src_x888_8888_asm_mips) |
|
312 |
|
313 LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips) |
|
314 /* |
|
315 * a0 - dst (a8r8g8b8) |
|
316 * a1 - src (32bit constant) |
|
317 * a2 - mask (a8) |
|
318 * a3 - w |
|
319 */ |
|
320 |
|
321 |
|
322 SAVE_REGS_ON_STACK 0, v0 |
|
323 li v0, 0x00ff00ff |
|
324 |
|
325 beqz a3, 3f |
|
326 nop |
|
327 addiu t1, a3, -1 |
|
328 beqz t1, 2f |
|
329 nop |
|
330 |
|
331 1: |
|
332 /* a1 = source (32bit constant) */ |
|
333 lbu t0, 0(a2) /* t2 = mask (a8) */ |
|
334 lbu t1, 1(a2) /* t3 = mask (a8) */ |
|
335 addiu a2, a2, 2 |
|
336 |
|
337 MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9 |
|
338 |
|
339 sw t2, 0(a0) |
|
340 sw t3, 4(a0) |
|
341 addiu a3, a3, -2 |
|
342 addiu t2, a3, -1 |
|
343 bgtz t2, 1b |
|
344 addiu a0, a0, 8 |
|
345 |
|
346 beqz a3, 3f |
|
347 nop |
|
348 |
|
349 2: |
|
350 lbu t0, 0(a2) |
|
351 addiu a2, a2, 1 |
|
352 |
|
353 MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5 |
|
354 |
|
355 sw t1, 0(a0) |
|
356 addiu a3, a3, -1 |
|
357 addiu a0, a0, 4 |
|
358 |
|
359 3: |
|
360 RESTORE_REGS_FROM_STACK 0, v0 |
|
361 j ra |
|
362 nop |
|
363 |
|
364 END(pixman_composite_src_n_8_8888_asm_mips) |
|
365 |
|
366 LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips) |
|
367 /* |
|
368 * a0 - dst (a8) |
|
369 * a1 - src (32bit constant) |
|
370 * a2 - mask (a8) |
|
371 * a3 - w |
|
372 */ |
|
373 |
|
374 li t9, 0x00ff00ff |
|
375 beqz a3, 3f |
|
376 nop |
|
377 srl t7, a3, 2 /* t7 = how many multiples of 4 dst pixels */ |
|
378 beqz t7, 1f /* branch if less than 4 src pixels */ |
|
379 nop |
|
380 |
|
381 srl t8, a1, 24 |
|
382 replv.ph t8, t8 |
|
383 |
|
384 0: |
|
385 beqz t7, 1f |
|
386 addiu t7, t7, -1 |
|
387 lbu t0, 0(a2) |
|
388 lbu t1, 1(a2) |
|
389 lbu t2, 2(a2) |
|
390 lbu t3, 3(a2) |
|
391 |
|
392 addiu a2, a2, 4 |
|
393 |
|
394 precr_sra.ph.w t1, t0, 0 |
|
395 precr_sra.ph.w t3, t2, 0 |
|
396 precr.qb.ph t0, t3, t1 |
|
397 |
|
398 muleu_s.ph.qbl t2, t0, t8 |
|
399 muleu_s.ph.qbr t3, t0, t8 |
|
400 shra_r.ph t4, t2, 8 |
|
401 shra_r.ph t5, t3, 8 |
|
402 and t4, t4, t9 |
|
403 and t5, t5, t9 |
|
404 addq.ph t2, t2, t4 |
|
405 addq.ph t3, t3, t5 |
|
406 shra_r.ph t2, t2, 8 |
|
407 shra_r.ph t3, t3, 8 |
|
408 precr.qb.ph t2, t2, t3 |
|
409 |
|
410 sb t2, 0(a0) |
|
411 srl t2, t2, 8 |
|
412 sb t2, 1(a0) |
|
413 srl t2, t2, 8 |
|
414 sb t2, 2(a0) |
|
415 srl t2, t2, 8 |
|
416 sb t2, 3(a0) |
|
417 addiu a3, a3, -4 |
|
418 b 0b |
|
419 addiu a0, a0, 4 |
|
420 |
|
421 1: |
|
422 beqz a3, 3f |
|
423 nop |
|
424 srl t8, a1, 24 |
|
425 2: |
|
426 lbu t0, 0(a2) |
|
427 addiu a2, a2, 1 |
|
428 |
|
429 mul t2, t0, t8 |
|
430 shra_r.ph t3, t2, 8 |
|
431 andi t3, t3, 0x00ff |
|
432 addq.ph t2, t2, t3 |
|
433 shra_r.ph t2, t2, 8 |
|
434 |
|
435 sb t2, 0(a0) |
|
436 addiu a3, a3, -1 |
|
437 bnez a3, 2b |
|
438 addiu a0, a0, 1 |
|
439 |
|
440 3: |
|
441 j ra |
|
442 nop |
|
443 |
|
444 END(pixman_composite_src_n_8_8_asm_mips) |
|
445 |
|
446 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) |
|
447 /* |
|
448 * a0 - dst (a8r8g8b8) |
|
449 * a1 - src (32bit constant) |
|
450 * a2 - mask (a8r8g8b8) |
|
451 * a3 - w |
|
452 */ |
|
453 |
|
454 SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 |
|
455 beqz a3, 4f |
|
456 nop |
|
457 li t6, 0xff |
|
458 addiu t7, zero, -1 /* t7 = 0xffffffff */ |
|
459 srl t8, a1, 24 /* t8 = srca */ |
|
460 li t9, 0x00ff00ff |
|
461 addiu t1, a3, -1 |
|
462 beqz t1, 3f /* last pixel */ |
|
463 nop |
|
464 beq t8, t6, 2f /* if (srca == 0xff) */ |
|
465 nop |
|
466 1: |
|
467 /* a1 = src */ |
|
468 lw t0, 0(a2) /* t0 = mask */ |
|
469 lw t1, 4(a2) /* t1 = mask */ |
|
470 or t2, t0, t1 |
|
471 beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ |
|
472 addiu a2, a2, 8 |
|
473 and t3, t0, t1 |
|
474 move t4, a1 /* t4 = src */ |
|
475 move t5, a1 /* t5 = src */ |
|
476 lw t2, 0(a0) /* t2 = dst */ |
|
477 beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ |
|
478 lw t3, 4(a0) /* t3 = dst */ |
|
479 MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 |
|
480 MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 |
|
481 11: |
|
482 not t0, t0 |
|
483 not t1, t1 |
|
484 MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 |
|
485 addu_s.qb t2, t4, t2 |
|
486 addu_s.qb t3, t5, t3 |
|
487 sw t2, 0(a0) |
|
488 sw t3, 4(a0) |
|
489 12: |
|
490 addiu a3, a3, -2 |
|
491 addiu t1, a3, -1 |
|
492 bgtz t1, 1b |
|
493 addiu a0, a0, 8 |
|
494 b 3f |
|
495 nop |
|
496 2: |
|
497 /* a1 = src */ |
|
498 lw t0, 0(a2) /* t0 = mask */ |
|
499 lw t1, 4(a2) /* t1 = mask */ |
|
500 or t2, t0, t1 |
|
501 beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ |
|
502 addiu a2, a2, 8 |
|
503 and t2, t0, t1 |
|
504 move t4, a1 |
|
505 beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ |
|
506 move t5, a1 |
|
507 lw t2, 0(a0) /* t2 = dst */ |
|
508 lw t3, 4(a0) /* t3 = dst */ |
|
509 MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 |
|
510 not t0, t0 |
|
511 not t1, t1 |
|
512 MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 |
|
513 addu_s.qb t4, t4, t2 |
|
514 addu_s.qb t5, t5, t3 |
|
515 21: |
|
516 sw t4, 0(a0) |
|
517 sw t5, 4(a0) |
|
518 22: |
|
519 addiu a3, a3, -2 |
|
520 addiu t1, a3, -1 |
|
521 bgtz t1, 2b |
|
522 addiu a0, a0, 8 |
|
523 3: |
|
524 blez a3, 4f |
|
525 nop |
|
526 /* a1 = src */ |
|
527 lw t1, 0(a2) /* t1 = mask */ |
|
528 beqz t1, 4f |
|
529 nop |
|
530 move t2, a1 /* t2 = src */ |
|
531 beq t1, t7, 31f |
|
532 lw t0, 0(a0) /* t0 = dst */ |
|
533 |
|
534 MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 |
|
535 MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 |
|
536 31: |
|
537 not t1, t1 |
|
538 MIPS_UN8x4_MUL_UN8x4 t0, t1, t0, t9, t3, t4, t5, t6 |
|
539 addu_s.qb t0, t2, t0 |
|
540 sw t0, 0(a0) |
|
541 4: |
|
542 RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 |
|
543 j ra |
|
544 nop |
|
545 |
|
546 END(pixman_composite_over_n_8888_8888_ca_asm_mips) |
|
547 |
|
548 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips) |
|
549 /* |
|
550 * a0 - dst (r5g6b5) |
|
551 * a1 - src (32bit constant) |
|
552 * a2 - mask (a8r8g8b8) |
|
553 * a3 - w |
|
554 */ |
|
555 |
|
556 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
557 beqz a3, 4f |
|
558 nop |
|
559 li t5, 0xf800f800 |
|
560 li t6, 0x07e007e0 |
|
561 li t7, 0x001F001F |
|
562 li t9, 0x00ff00ff |
|
563 |
|
564 srl t8, a1, 24 /* t8 = srca */ |
|
565 addiu t1, a3, -1 |
|
566 beqz t1, 3f /* last pixel */ |
|
567 nop |
|
568 li s0, 0xff /* s0 = 0xff */ |
|
569 addiu s1, zero, -1 /* s1 = 0xffffffff */ |
|
570 |
|
571 beq t8, s0, 2f /* if (srca == 0xff) */ |
|
572 nop |
|
573 1: |
|
574 /* a1 = src */ |
|
575 lw t0, 0(a2) /* t0 = mask */ |
|
576 lw t1, 4(a2) /* t1 = mask */ |
|
577 or t2, t0, t1 |
|
578 beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ |
|
579 addiu a2, a2, 8 |
|
580 and t3, t0, t1 |
|
581 move s2, a1 /* s2 = src */ |
|
582 move s3, a1 /* s3 = src */ |
|
583 lhu t2, 0(a0) /* t2 = dst */ |
|
584 beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ |
|
585 lhu t3, 2(a0) /* t3 = dst */ |
|
586 MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 |
|
587 MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8 |
|
588 11: |
|
589 not t0, t0 |
|
590 not t1, t1 |
|
591 CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 |
|
592 MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1 |
|
593 addu_s.qb s2, s2, s4 |
|
594 addu_s.qb s3, s3, s5 |
|
595 CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5 |
|
596 sh t2, 0(a0) |
|
597 sh t3, 2(a0) |
|
598 12: |
|
599 addiu a3, a3, -2 |
|
600 addiu t1, a3, -1 |
|
601 bgtz t1, 1b |
|
602 addiu a0, a0, 4 |
|
603 b 3f |
|
604 nop |
|
605 2: |
|
606 /* a1 = src */ |
|
607 lw t0, 0(a2) /* t0 = mask */ |
|
608 lw t1, 4(a2) /* t1 = mask */ |
|
609 or t2, t0, t1 |
|
610 beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ |
|
611 addiu a2, a2, 8 |
|
612 and t3, t0, t1 |
|
613 move t2, a1 |
|
614 beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ |
|
615 move t3, a1 |
|
616 lhu t2, 0(a0) /* t2 = dst */ |
|
617 lhu t3, 2(a0) /* t3 = dst */ |
|
618 MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 |
|
619 not t0, t0 |
|
620 not t1, t1 |
|
621 CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 |
|
622 MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3 |
|
623 addu_s.qb t2, s2, s4 |
|
624 addu_s.qb t3, s3, s5 |
|
625 21: |
|
626 CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3 |
|
627 sh t0, 0(a0) |
|
628 sh t1, 2(a0) |
|
629 22: |
|
630 addiu a3, a3, -2 |
|
631 addiu t1, a3, -1 |
|
632 bgtz t1, 2b |
|
633 addiu a0, a0, 4 |
|
634 3: |
|
635 blez a3, 4f |
|
636 nop |
|
637 /* a1 = src */ |
|
638 lw t1, 0(a2) /* t1 = mask */ |
|
639 beqz t1, 4f |
|
640 nop |
|
641 move t2, a1 /* t2 = src */ |
|
642 beq t1, t7, 31f |
|
643 lhu t0, 0(a0) /* t0 = dst */ |
|
644 |
|
645 MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 |
|
646 MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 |
|
647 31: |
|
648 not t1, t1 |
|
649 CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3 |
|
650 MIPS_UN8x4_MUL_UN8x4 s1, t1, t3, t9, t4, t5, t6, t7 |
|
651 addu_s.qb t0, t2, t3 |
|
652 CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3 |
|
653 sh s1, 0(a0) |
|
654 4: |
|
655 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
656 j ra |
|
657 nop |
|
658 |
|
659 END(pixman_composite_over_n_8888_0565_ca_asm_mips) |
|
660 |
|
661 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips) |
|
662 /* |
|
663 * a0 - dst (a8r8g8b8) |
|
664 * a1 - src (32bit constant) |
|
665 * a2 - mask (a8) |
|
666 * a3 - w |
|
667 */ |
|
668 |
|
669 SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4 |
|
670 beqz a3, 4f |
|
671 nop |
|
672 li t4, 0x00ff00ff |
|
673 li t5, 0xff |
|
674 addiu t0, a3, -1 |
|
675 beqz t0, 3f /* last pixel */ |
|
676 srl t6, a1, 24 /* t6 = srca */ |
|
677 not s4, a1 |
|
678 beq t5, t6, 2f /* if (srca == 0xff) */ |
|
679 srl s4, s4, 24 |
|
680 1: |
|
681 /* a1 = src */ |
|
682 lbu t0, 0(a2) /* t0 = mask */ |
|
683 lbu t1, 1(a2) /* t1 = mask */ |
|
684 or t2, t0, t1 |
|
685 beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */ |
|
686 addiu a2, a2, 2 |
|
687 and t3, t0, t1 |
|
688 |
|
689 lw t2, 0(a0) /* t2 = dst */ |
|
690 beq t3, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */ |
|
691 lw t3, 4(a0) /* t3 = dst */ |
|
692 |
|
693 MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3 |
|
694 not s2, s0 |
|
695 not s3, s1 |
|
696 srl s2, s2, 24 |
|
697 srl s3, s3, 24 |
|
698 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9 |
|
699 addu_s.qb s2, t2, s0 |
|
700 addu_s.qb s3, t3, s1 |
|
701 sw s2, 0(a0) |
|
702 b 111f |
|
703 sw s3, 4(a0) |
|
704 11: |
|
705 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9 |
|
706 addu_s.qb s2, t2, a1 |
|
707 addu_s.qb s3, t3, a1 |
|
708 sw s2, 0(a0) |
|
709 sw s3, 4(a0) |
|
710 |
|
711 111: |
|
712 addiu a3, a3, -2 |
|
713 addiu t0, a3, -1 |
|
714 bgtz t0, 1b |
|
715 addiu a0, a0, 8 |
|
716 b 3f |
|
717 nop |
|
718 2: |
|
719 /* a1 = src */ |
|
720 lbu t0, 0(a2) /* t0 = mask */ |
|
721 lbu t1, 1(a2) /* t1 = mask */ |
|
722 or t2, t0, t1 |
|
723 beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */ |
|
724 addiu a2, a2, 2 |
|
725 and t3, t0, t1 |
|
726 beq t3, t5, 22f /* if (t0 == 0xff) && (t1 == 0xff) */ |
|
727 nop |
|
728 lw t2, 0(a0) /* t2 = dst */ |
|
729 lw t3, 4(a0) /* t3 = dst */ |
|
730 |
|
731 OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \ |
|
732 t6, t7, t4, t8, t9, s0, s1, s2, s3 |
|
733 sw t6, 0(a0) |
|
734 b 222f |
|
735 sw t7, 4(a0) |
|
736 22: |
|
737 sw a1, 0(a0) |
|
738 sw a1, 4(a0) |
|
739 222: |
|
740 addiu a3, a3, -2 |
|
741 addiu t0, a3, -1 |
|
742 bgtz t0, 2b |
|
743 addiu a0, a0, 8 |
|
744 3: |
|
745 blez a3, 4f |
|
746 nop |
|
747 /* a1 = src */ |
|
748 lbu t0, 0(a2) /* t0 = mask */ |
|
749 beqz t0, 4f /* if (t0 == 0) */ |
|
750 addiu a2, a2, 1 |
|
751 move t3, a1 |
|
752 beq t0, t5, 31f /* if (t0 == 0xff) */ |
|
753 lw t1, 0(a0) /* t1 = dst */ |
|
754 |
|
755 MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8 |
|
756 31: |
|
757 not t2, t3 |
|
758 srl t2, t2, 24 |
|
759 MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8 |
|
760 addu_s.qb t2, t1, t3 |
|
761 sw t2, 0(a0) |
|
762 4: |
|
763 RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4 |
|
764 j ra |
|
765 nop |
|
766 |
|
767 END(pixman_composite_over_n_8_8888_asm_mips) |
|
768 |
|
769 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips) |
|
770 /* |
|
771 * a0 - dst (r5g6b5) |
|
772 * a1 - src (32bit constant) |
|
773 * a2 - mask (a8) |
|
774 * a3 - w |
|
775 */ |
|
776 SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
777 beqz a3, 4f |
|
778 nop |
|
779 li t4, 0x00ff00ff |
|
780 li t5, 0xff |
|
781 li t6, 0xf800f800 |
|
782 li t7, 0x07e007e0 |
|
783 li t8, 0x001F001F |
|
784 addiu t1, a3, -1 |
|
785 beqz t1, 3f /* last pixel */ |
|
786 srl t0, a1, 24 /* t0 = srca */ |
|
787 not v0, a1 |
|
788 beq t0, t5, 2f /* if (srca == 0xff) */ |
|
789 srl v0, v0, 24 |
|
790 1: |
|
791 /* a1 = src */ |
|
792 lbu t0, 0(a2) /* t0 = mask */ |
|
793 lbu t1, 1(a2) /* t1 = mask */ |
|
794 or t2, t0, t1 |
|
795 beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */ |
|
796 addiu a2, a2, 2 |
|
797 lhu t2, 0(a0) /* t2 = dst */ |
|
798 lhu t3, 2(a0) /* t3 = dst */ |
|
799 CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4 |
|
800 and t9, t0, t1 |
|
801 beq t9, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */ |
|
802 nop |
|
803 |
|
804 MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8 |
|
805 not s4, s2 |
|
806 not s5, s3 |
|
807 srl s4, s4, 24 |
|
808 srl s5, s5, 24 |
|
809 MIPS_2xUN8x4_MUL_2xUN8 s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8 |
|
810 addu_s.qb s4, s2, s0 |
|
811 addu_s.qb s5, s3, s1 |
|
812 CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1 |
|
813 sh t2, 0(a0) |
|
814 b 111f |
|
815 sh t3, 2(a0) |
|
816 11: |
|
817 MIPS_2xUN8x4_MUL_2xUN8 s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8 |
|
818 addu_s.qb s4, a1, s0 |
|
819 addu_s.qb s5, a1, s1 |
|
820 CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1 |
|
821 sh t2, 0(a0) |
|
822 sh t3, 2(a0) |
|
823 111: |
|
824 addiu a3, a3, -2 |
|
825 addiu t0, a3, -1 |
|
826 bgtz t0, 1b |
|
827 addiu a0, a0, 4 |
|
828 b 3f |
|
829 nop |
|
830 2: |
|
831 CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2 |
|
832 21: |
|
833 /* a1 = src */ |
|
834 lbu t0, 0(a2) /* t0 = mask */ |
|
835 lbu t1, 1(a2) /* t1 = mask */ |
|
836 or t2, t0, t1 |
|
837 beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */ |
|
838 addiu a2, a2, 2 |
|
839 and t9, t0, t1 |
|
840 move s2, s0 |
|
841 beq t9, t5, 22f /* if (t0 == 0xff) && (t2 == 0xff) */ |
|
842 move s3, s0 |
|
843 lhu t2, 0(a0) /* t2 = dst */ |
|
844 lhu t3, 2(a0) /* t3 = dst */ |
|
845 |
|
846 CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7 |
|
847 OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, s2, s3, \ |
|
848 t2, t3, t4, t9, s4, s5, s6, s7, s8 |
|
849 CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5 |
|
850 22: |
|
851 sh s2, 0(a0) |
|
852 sh s3, 2(a0) |
|
853 222: |
|
854 addiu a3, a3, -2 |
|
855 addiu t0, a3, -1 |
|
856 bgtz t0, 21b |
|
857 addiu a0, a0, 4 |
|
858 3: |
|
859 blez a3, 4f |
|
860 nop |
|
861 /* a1 = src */ |
|
862 lbu t0, 0(a2) /* t0 = mask */ |
|
863 beqz t0, 4f /* if (t0 == 0) */ |
|
864 nop |
|
865 lhu t1, 0(a0) /* t1 = dst */ |
|
866 CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7 |
|
867 beq t0, t5, 31f /* if (t0 == 0xff) */ |
|
868 move t3, a1 |
|
869 |
|
870 MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t7, t8, t9 |
|
871 31: |
|
872 not t6, t3 |
|
873 srl t6, t6, 24 |
|
874 MIPS_UN8x4_MUL_UN8 t2, t6, t2, t4, t7, t8, t9 |
|
875 addu_s.qb t1, t2, t3 |
|
876 CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7 |
|
877 sh t2, 0(a0) |
|
878 4: |
|
879 RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
880 j ra |
|
881 nop |
|
882 |
|
883 END(pixman_composite_over_n_8_0565_asm_mips) |
|
884 |
|
885 LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips) |
|
886 /* |
|
887 * a0 - dst (a8r8g8b8) |
|
888 * a1 - src (a8r8g8b8) |
|
889 * a2 - mask (32bit constant) |
|
890 * a3 - w |
|
891 */ |
|
892 |
|
893 SAVE_REGS_ON_STACK 0, s0 |
|
894 li t4, 0x00ff00ff |
|
895 beqz a3, 3f |
|
896 nop |
|
897 addiu t1, a3, -1 |
|
898 srl a2, a2, 24 |
|
899 beqz t1, 2f |
|
900 nop |
|
901 |
|
902 1: |
|
903 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
904 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
905 /* a2 = mask (32bit constant) */ |
|
906 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
907 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ |
|
908 addiu a1, a1, 8 |
|
909 |
|
910 OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \ |
|
911 t5, t6, t4, t7, t8, t9, t0, t1, s0 |
|
912 |
|
913 sw t5, 0(a0) |
|
914 sw t6, 4(a0) |
|
915 addiu a3, a3, -2 |
|
916 addiu t1, a3, -1 |
|
917 bgtz t1, 1b |
|
918 addiu a0, a0, 8 |
|
919 2: |
|
920 beqz a3, 3f |
|
921 nop |
|
922 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
923 /* a2 = mask (32bit constant) */ |
|
924 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ |
|
925 |
|
926 OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8 |
|
927 |
|
928 sw t3, 0(a0) |
|
929 3: |
|
930 RESTORE_REGS_FROM_STACK 0, s0 |
|
931 j ra |
|
932 nop |
|
933 |
|
934 END(pixman_composite_over_8888_n_8888_asm_mips) |
|
935 |
|
936 LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips) |
|
937 /* |
|
938 * a0 - dst (r5g6b5) |
|
939 * a1 - src (a8r8g8b8) |
|
940 * a2 - mask (32bit constant) |
|
941 * a3 - w |
|
942 */ |
|
943 |
|
944 SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 |
|
945 li t6, 0x00ff00ff |
|
946 li t7, 0xf800f800 |
|
947 li t8, 0x07e007e0 |
|
948 li t9, 0x001F001F |
|
949 beqz a3, 3f |
|
950 nop |
|
951 srl a2, a2, 24 |
|
952 addiu t1, a3, -1 |
|
953 beqz t1, 2f |
|
954 nop |
|
955 1: |
|
956 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
957 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
958 /* a2 = mask (32bit constant) */ |
|
959 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ |
|
960 lhu t3, 2(a0) /* t2 = destination (r5g6b5) */ |
|
961 addiu a1, a1, 8 |
|
962 |
|
963 CONVERT_2x0565_TO_2x8888 t2, t3, t4, t5, t8, t9, s0, s1, t2, t3 |
|
964 OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t4, t5, \ |
|
965 t2, t3, t6, t0, t1, s0, s1, s2, s3 |
|
966 CONVERT_2x8888_TO_2x0565 t2, t3, t4, t5, t7, t8, t9, s0, s1 |
|
967 |
|
968 sh t4, 0(a0) |
|
969 sh t5, 2(a0) |
|
970 addiu a3, a3, -2 |
|
971 addiu t1, a3, -1 |
|
972 bgtz t1, 1b |
|
973 addiu a0, a0, 4 |
|
974 2: |
|
975 beqz a3, 3f |
|
976 nop |
|
977 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
978 /* a2 = mask (32bit constant) */ |
|
979 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ |
|
980 |
|
981 CONVERT_1x0565_TO_1x8888 t1, t2, t4, t5 |
|
982 OVER_8888_8_8888 t0, a2, t2, t1, t6, t3, t4, t5, t7 |
|
983 CONVERT_1x8888_TO_1x0565 t1, t3, t4, t5 |
|
984 |
|
985 sh t3, 0(a0) |
|
986 3: |
|
987 RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 |
|
988 j ra |
|
989 nop |
|
990 |
|
991 END(pixman_composite_over_8888_n_0565_asm_mips) |
|
992 |
|
993 LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips) |
|
994 /* |
|
995 * a0 - dst (r5g6b5) |
|
996 * a1 - src (r5g6b5) |
|
997 * a2 - mask (32bit constant) |
|
998 * a3 - w |
|
999 */ |
|
1000 |
|
1001 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 |
|
1002 li t6, 0x00ff00ff |
|
1003 li t7, 0xf800f800 |
|
1004 li t8, 0x07e007e0 |
|
1005 li t9, 0x001F001F |
|
1006 beqz a3, 3f |
|
1007 nop |
|
1008 srl a2, a2, 24 |
|
1009 addiu t1, a3, -1 |
|
1010 beqz t1, 2f |
|
1011 nop |
|
1012 1: |
|
1013 lhu t0, 0(a1) /* t0 = source (r5g6b5) */ |
|
1014 lhu t1, 2(a1) /* t1 = source (r5g6b5) */ |
|
1015 /* a2 = mask (32bit constant) */ |
|
1016 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ |
|
1017 lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ |
|
1018 addiu a1, a1, 4 |
|
1019 |
|
1020 CONVERT_2x0565_TO_2x8888 t0, t1, t4, t5, t8, t9, s0, s1, s2, s3 |
|
1021 CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t8, t9, s2, s3, s4, s5 |
|
1022 OVER_2x8888_2x8_2x8888 t4, t5, a2, a2, s0, s1, \ |
|
1023 t0, t1, t6, s2, s3, s4, s5, t4, t5 |
|
1024 CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t7, t8, t9, s2, s3 |
|
1025 |
|
1026 sh s0, 0(a0) |
|
1027 sh s1, 2(a0) |
|
1028 addiu a3, a3, -2 |
|
1029 addiu t1, a3, -1 |
|
1030 bgtz t1, 1b |
|
1031 addiu a0, a0, 4 |
|
1032 2: |
|
1033 beqz a3, 3f |
|
1034 nop |
|
1035 lhu t0, 0(a1) /* t0 = source (r5g6b5) */ |
|
1036 /* a2 = mask (32bit constant) */ |
|
1037 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ |
|
1038 |
|
1039 CONVERT_1x0565_TO_1x8888 t0, t2, t4, t5 |
|
1040 CONVERT_1x0565_TO_1x8888 t1, t3, t4, t5 |
|
1041 OVER_8888_8_8888 t2, a2, t3, t0, t6, t1, t4, t5, t7 |
|
1042 CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 |
|
1043 |
|
1044 sh t3, 0(a0) |
|
1045 3: |
|
1046 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 |
|
1047 j ra |
|
1048 nop |
|
1049 |
|
1050 END(pixman_composite_over_0565_n_0565_asm_mips) |
|
1051 |
|
1052 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips) |
|
1053 /* |
|
1054 * a0 - dst (a8r8g8b8) |
|
1055 * a1 - src (a8r8g8b8) |
|
1056 * a2 - mask (a8) |
|
1057 * a3 - w |
|
1058 */ |
|
1059 |
|
1060 SAVE_REGS_ON_STACK 0, s0, s1 |
|
1061 li t4, 0x00ff00ff |
|
1062 beqz a3, 3f |
|
1063 nop |
|
1064 addiu t1, a3, -1 |
|
1065 beqz t1, 2f |
|
1066 nop |
|
1067 1: |
|
1068 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1069 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
1070 lbu t2, 0(a2) /* t2 = mask (a8) */ |
|
1071 lbu t3, 1(a2) /* t3 = mask (a8) */ |
|
1072 lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ |
|
1073 lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ |
|
1074 addiu a1, a1, 8 |
|
1075 addiu a2, a2, 2 |
|
1076 |
|
1077 OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \ |
|
1078 t7, t8, t4, t9, s0, s1, t0, t1, t2 |
|
1079 |
|
1080 sw t7, 0(a0) |
|
1081 sw t8, 4(a0) |
|
1082 addiu a3, a3, -2 |
|
1083 addiu t1, a3, -1 |
|
1084 bgtz t1, 1b |
|
1085 addiu a0, a0, 8 |
|
1086 2: |
|
1087 beqz a3, 3f |
|
1088 nop |
|
1089 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1090 lbu t1, 0(a2) /* t1 = mask (a8) */ |
|
1091 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
1092 |
|
1093 OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8 |
|
1094 |
|
1095 sw t3, 0(a0) |
|
1096 3: |
|
1097 RESTORE_REGS_FROM_STACK 0, s0, s1 |
|
1098 j ra |
|
1099 nop |
|
1100 |
|
1101 END(pixman_composite_over_8888_8_8888_asm_mips) |
|
1102 |
|
1103 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips) |
|
1104 /* |
|
1105 * a0 - dst (r5g6b5) |
|
1106 * a1 - src (a8r8g8b8) |
|
1107 * a2 - mask (a8) |
|
1108 * a3 - w |
|
1109 */ |
|
1110 |
|
1111 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 |
|
1112 li t6, 0x00ff00ff |
|
1113 li t7, 0xf800f800 |
|
1114 li t8, 0x07e007e0 |
|
1115 li t9, 0x001F001F |
|
1116 beqz a3, 3f |
|
1117 nop |
|
1118 addiu t1, a3, -1 |
|
1119 beqz t1, 2f |
|
1120 nop |
|
1121 1: |
|
1122 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1123 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
1124 lbu t2, 0(a2) /* t2 = mask (a8) */ |
|
1125 lbu t3, 1(a2) /* t3 = mask (a8) */ |
|
1126 lhu t4, 0(a0) /* t4 = destination (r5g6b5) */ |
|
1127 lhu t5, 2(a0) /* t5 = destination (r5g6b5) */ |
|
1128 addiu a1, a1, 8 |
|
1129 addiu a2, a2, 2 |
|
1130 |
|
1131 CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5 |
|
1132 OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, s0, s1, \ |
|
1133 t4, t5, t6, s2, s3, s4, s5, t0, t1 |
|
1134 CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3 |
|
1135 |
|
1136 sh s0, 0(a0) |
|
1137 sh s1, 2(a0) |
|
1138 addiu a3, a3, -2 |
|
1139 addiu t1, a3, -1 |
|
1140 bgtz t1, 1b |
|
1141 addiu a0, a0, 4 |
|
1142 2: |
|
1143 beqz a3, 3f |
|
1144 nop |
|
1145 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1146 lbu t1, 0(a2) /* t1 = mask (a8) */ |
|
1147 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ |
|
1148 |
|
1149 CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5 |
|
1150 OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8 |
|
1151 CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 |
|
1152 |
|
1153 sh t3, 0(a0) |
|
1154 3: |
|
1155 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 |
|
1156 j ra |
|
1157 nop |
|
1158 |
|
1159 END(pixman_composite_over_8888_8_0565_asm_mips) |
|
1160 |
|
1161 LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips) |
|
1162 /* |
|
1163 * a0 - dst (r5g6b5) |
|
1164 * a1 - src (r5g6b5) |
|
1165 * a2 - mask (a8) |
|
1166 * a3 - w |
|
1167 */ |
|
1168 |
|
1169 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5 |
|
1170 li t4, 0xf800f800 |
|
1171 li t5, 0x07e007e0 |
|
1172 li t6, 0x001F001F |
|
1173 li t7, 0x00ff00ff |
|
1174 beqz a3, 3f |
|
1175 nop |
|
1176 addiu t1, a3, -1 |
|
1177 beqz t1, 2f |
|
1178 nop |
|
1179 1: |
|
1180 lhu t0, 0(a1) /* t0 = source (r5g6b5) */ |
|
1181 lhu t1, 2(a1) /* t1 = source (r5g6b5) */ |
|
1182 lbu t2, 0(a2) /* t2 = mask (a8) */ |
|
1183 lbu t3, 1(a2) /* t3 = mask (a8) */ |
|
1184 lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ |
|
1185 lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ |
|
1186 addiu a1, a1, 4 |
|
1187 addiu a2, a2, 2 |
|
1188 |
|
1189 CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 |
|
1190 CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1 |
|
1191 OVER_2x8888_2x8_2x8888 s0, s1, t2, t3, s2, s3, \ |
|
1192 t0, t1, t7, s4, s5, t8, t9, s0, s1 |
|
1193 CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 |
|
1194 |
|
1195 sh s0, 0(a0) |
|
1196 sh s1, 2(a0) |
|
1197 addiu a3, a3, -2 |
|
1198 addiu t1, a3, -1 |
|
1199 bgtz t1, 1b |
|
1200 addiu a0, a0, 4 |
|
1201 2: |
|
1202 beqz a3, 3f |
|
1203 nop |
|
1204 lhu t0, 0(a1) /* t0 = source (r5g6b5) */ |
|
1205 lbu t1, 0(a2) /* t1 = mask (a8) */ |
|
1206 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ |
|
1207 |
|
1208 CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 |
|
1209 CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 |
|
1210 OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8 |
|
1211 CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 |
|
1212 |
|
1213 sh t3, 0(a0) |
|
1214 3: |
|
1215 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5 |
|
1216 j ra |
|
1217 nop |
|
1218 |
|
1219 END(pixman_composite_over_0565_8_0565_asm_mips) |
|
1220 |
|
1221 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips) |
|
1222 /* |
|
1223 * a0 - dst (a8r8g8b8) |
|
1224 * a1 - src (a8r8g8b8) |
|
1225 * a2 - mask (a8r8g8b8) |
|
1226 * a3 - w |
|
1227 */ |
|
1228 |
|
1229 SAVE_REGS_ON_STACK 0, s0, s1, s2 |
|
1230 li t4, 0x00ff00ff |
|
1231 beqz a3, 3f |
|
1232 nop |
|
1233 addiu t1, a3, -1 |
|
1234 beqz t1, 2f |
|
1235 nop |
|
1236 1: |
|
1237 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1238 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
1239 lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */ |
|
1240 lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */ |
|
1241 lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ |
|
1242 lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ |
|
1243 addiu a1, a1, 8 |
|
1244 addiu a2, a2, 8 |
|
1245 srl t2, t2, 24 |
|
1246 srl t3, t3, 24 |
|
1247 |
|
1248 OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t0, t1 |
|
1249 |
|
1250 sw t7, 0(a0) |
|
1251 sw t8, 4(a0) |
|
1252 addiu a3, a3, -2 |
|
1253 addiu t1, a3, -1 |
|
1254 bgtz t1, 1b |
|
1255 addiu a0, a0, 8 |
|
1256 2: |
|
1257 beqz a3, 3f |
|
1258 nop |
|
1259 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1260 lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */ |
|
1261 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
1262 srl t1, t1, 24 |
|
1263 |
|
1264 OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8 |
|
1265 |
|
1266 sw t3, 0(a0) |
|
1267 3: |
|
1268 RESTORE_REGS_FROM_STACK 0, s0, s1, s2 |
|
1269 j ra |
|
1270 nop |
|
1271 |
|
1272 END(pixman_composite_over_8888_8888_8888_asm_mips) |
|
1273 |
|
1274 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips) |
|
1275 /* |
|
1276 * a0 - dst (a8r8g8b8) |
|
1277 * a1 - src (a8r8g8b8) |
|
1278 * a2 - w |
|
1279 */ |
|
1280 |
|
1281 SAVE_REGS_ON_STACK 0, s0, s1, s2 |
|
1282 li t4, 0x00ff00ff |
|
1283 beqz a2, 3f |
|
1284 nop |
|
1285 addiu t1, a2, -1 |
|
1286 beqz t1, 2f |
|
1287 nop |
|
1288 1: |
|
1289 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1290 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
1291 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
1292 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ |
|
1293 addiu a1, a1, 8 |
|
1294 |
|
1295 not t5, t0 |
|
1296 srl t5, t5, 24 |
|
1297 not t6, t1 |
|
1298 srl t6, t6, 24 |
|
1299 |
|
1300 or t7, t5, t6 |
|
1301 beqz t7, 11f |
|
1302 or t8, t0, t1 |
|
1303 beqz t8, 12f |
|
1304 |
|
1305 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t2, t3 |
|
1306 |
|
1307 addu_s.qb t0, t7, t0 |
|
1308 addu_s.qb t1, t8, t1 |
|
1309 11: |
|
1310 sw t0, 0(a0) |
|
1311 sw t1, 4(a0) |
|
1312 12: |
|
1313 addiu a2, a2, -2 |
|
1314 addiu t1, a2, -1 |
|
1315 bgtz t1, 1b |
|
1316 addiu a0, a0, 8 |
|
1317 2: |
|
1318 beqz a2, 3f |
|
1319 nop |
|
1320 |
|
1321 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1322 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ |
|
1323 addiu a1, a1, 4 |
|
1324 |
|
1325 not t2, t0 |
|
1326 srl t2, t2, 24 |
|
1327 |
|
1328 beqz t2, 21f |
|
1329 nop |
|
1330 beqz t0, 3f |
|
1331 |
|
1332 MIPS_UN8x4_MUL_UN8 t1, t2, t3, t4, t5, t6, t7 |
|
1333 |
|
1334 addu_s.qb t0, t3, t0 |
|
1335 21: |
|
1336 sw t0, 0(a0) |
|
1337 |
|
1338 3: |
|
1339 RESTORE_REGS_FROM_STACK 0, s0, s1, s2 |
|
1340 j ra |
|
1341 nop |
|
1342 |
|
1343 END(pixman_composite_over_8888_8888_asm_mips) |
|
1344 |
|
1345 LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips) |
|
1346 /* |
|
1347 * a0 - dst (r5g6b5) |
|
1348 * a1 - src (32bit constant) |
|
1349 * a2 - w |
|
1350 */ |
|
1351 |
|
1352 beqz a2, 5f |
|
1353 nop |
|
1354 |
|
1355 not t0, a1 |
|
1356 srl t0, t0, 24 |
|
1357 bgtz t0, 1f |
|
1358 nop |
|
1359 CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3 |
|
1360 0: |
|
1361 sh t1, 0(a0) |
|
1362 addiu a2, a2, -1 |
|
1363 bgtz a2, 0b |
|
1364 addiu a0, a0, 2 |
|
1365 j ra |
|
1366 nop |
|
1367 |
|
1368 1: |
|
1369 SAVE_REGS_ON_STACK 0, s0, s1, s2 |
|
1370 li t4, 0x00ff00ff |
|
1371 li t5, 0xf800f800 |
|
1372 li t6, 0x07e007e0 |
|
1373 li t7, 0x001F001F |
|
1374 addiu t1, a2, -1 |
|
1375 beqz t1, 3f |
|
1376 nop |
|
1377 2: |
|
1378 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ |
|
1379 lhu t2, 2(a0) /* t2 = destination (r5g6b5) */ |
|
1380 |
|
1381 CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2 |
|
1382 MIPS_2xUN8x4_MUL_2xUN8 t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8 |
|
1383 addu_s.qb t1, t1, a1 |
|
1384 addu_s.qb t2, t2, a1 |
|
1385 CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1 |
|
1386 |
|
1387 sh t3, 0(a0) |
|
1388 sh t8, 2(a0) |
|
1389 |
|
1390 addiu a2, a2, -2 |
|
1391 addiu t1, a2, -1 |
|
1392 bgtz t1, 2b |
|
1393 addiu a0, a0, 4 |
|
1394 3: |
|
1395 beqz a2, 4f |
|
1396 nop |
|
1397 |
|
1398 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ |
|
1399 |
|
1400 CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1 |
|
1401 MIPS_UN8x4_MUL_UN8 t2, t0, t1, t4, s0, s1, s2 |
|
1402 addu_s.qb t1, t1, a1 |
|
1403 CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1 |
|
1404 |
|
1405 sh t2, 0(a0) |
|
1406 |
|
1407 4: |
|
1408 RESTORE_REGS_FROM_STACK 0, s0, s1, s2 |
|
1409 5: |
|
1410 j ra |
|
1411 nop |
|
1412 |
|
1413 END(pixman_composite_over_n_0565_asm_mips) |
|
1414 |
|
1415 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips) |
|
1416 /* |
|
1417 * a0 - dst (a8r8g8b8) |
|
1418 * a1 - src (32bit constant) |
|
1419 * a2 - w |
|
1420 */ |
|
1421 |
|
1422 beqz a2, 5f |
|
1423 nop |
|
1424 |
|
1425 not t0, a1 |
|
1426 srl t0, t0, 24 |
|
1427 bgtz t0, 1f |
|
1428 nop |
|
1429 0: |
|
1430 sw a1, 0(a0) |
|
1431 addiu a2, a2, -1 |
|
1432 bgtz a2, 0b |
|
1433 addiu a0, a0, 4 |
|
1434 j ra |
|
1435 nop |
|
1436 |
|
1437 1: |
|
1438 SAVE_REGS_ON_STACK 0, s0, s1, s2 |
|
1439 li t4, 0x00ff00ff |
|
1440 addiu t1, a2, -1 |
|
1441 beqz t1, 3f |
|
1442 nop |
|
1443 2: |
|
1444 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
1445 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ |
|
1446 |
|
1447 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3 |
|
1448 |
|
1449 addu_s.qb t7, t7, a1 |
|
1450 addu_s.qb t8, t8, a1 |
|
1451 |
|
1452 sw t7, 0(a0) |
|
1453 sw t8, 4(a0) |
|
1454 |
|
1455 addiu a2, a2, -2 |
|
1456 addiu t1, a2, -1 |
|
1457 bgtz t1, 2b |
|
1458 addiu a0, a0, 8 |
|
1459 3: |
|
1460 beqz a2, 4f |
|
1461 nop |
|
1462 |
|
1463 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ |
|
1464 |
|
1465 MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7 |
|
1466 |
|
1467 addu_s.qb t3, t3, a1 |
|
1468 |
|
1469 sw t3, 0(a0) |
|
1470 |
|
1471 4: |
|
1472 RESTORE_REGS_FROM_STACK 0, s0, s1, s2 |
|
1473 5: |
|
1474 j ra |
|
1475 nop |
|
1476 |
|
1477 END(pixman_composite_over_n_8888_asm_mips) |
|
1478 |
|
1479 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips) |
|
1480 /* |
|
1481 * a0 - dst (a8) |
|
1482 * a1 - src (a8) |
|
1483 * a2 - mask (a8) |
|
1484 * a3 - w |
|
1485 */ |
|
1486 |
|
1487 SAVE_REGS_ON_STACK 0, v0, v1 |
|
1488 li t9, 0x00ff00ff |
|
1489 beqz a3, 3f |
|
1490 nop |
|
1491 |
|
1492 srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ |
|
1493 beqz v0, 1f /* branch if less than 4 src pixels */ |
|
1494 nop |
|
1495 |
|
1496 0: |
|
1497 beqz v0, 1f |
|
1498 addiu v0, v0, -1 |
|
1499 lbu t0, 0(a2) |
|
1500 lbu t1, 1(a2) |
|
1501 lbu t2, 2(a2) |
|
1502 lbu t3, 3(a2) |
|
1503 lbu t4, 0(a0) |
|
1504 lbu t5, 1(a0) |
|
1505 lbu t6, 2(a0) |
|
1506 lbu t7, 3(a0) |
|
1507 |
|
1508 addiu a2, a2, 4 |
|
1509 |
|
1510 precr_sra.ph.w t1, t0, 0 |
|
1511 precr_sra.ph.w t3, t2, 0 |
|
1512 precr_sra.ph.w t5, t4, 0 |
|
1513 precr_sra.ph.w t7, t6, 0 |
|
1514 |
|
1515 precr.qb.ph t0, t3, t1 |
|
1516 precr.qb.ph t1, t7, t5 |
|
1517 |
|
1518 lbu t4, 0(a1) |
|
1519 lbu v1, 1(a1) |
|
1520 lbu t7, 2(a1) |
|
1521 lbu t8, 3(a1) |
|
1522 |
|
1523 addiu a1, a1, 4 |
|
1524 |
|
1525 precr_sra.ph.w v1, t4, 0 |
|
1526 precr_sra.ph.w t8, t7, 0 |
|
1527 |
|
1528 muleu_s.ph.qbl t2, t0, t8 |
|
1529 muleu_s.ph.qbr t3, t0, v1 |
|
1530 shra_r.ph t4, t2, 8 |
|
1531 shra_r.ph t5, t3, 8 |
|
1532 and t4, t4, t9 |
|
1533 and t5, t5, t9 |
|
1534 addq.ph t2, t2, t4 |
|
1535 addq.ph t3, t3, t5 |
|
1536 shra_r.ph t2, t2, 8 |
|
1537 shra_r.ph t3, t3, 8 |
|
1538 precr.qb.ph t0, t2, t3 |
|
1539 |
|
1540 addu_s.qb t2, t0, t1 |
|
1541 |
|
1542 sb t2, 0(a0) |
|
1543 srl t2, t2, 8 |
|
1544 sb t2, 1(a0) |
|
1545 srl t2, t2, 8 |
|
1546 sb t2, 2(a0) |
|
1547 srl t2, t2, 8 |
|
1548 sb t2, 3(a0) |
|
1549 addiu a3, a3, -4 |
|
1550 b 0b |
|
1551 addiu a0, a0, 4 |
|
1552 |
|
1553 1: |
|
1554 beqz a3, 3f |
|
1555 nop |
|
1556 2: |
|
1557 lbu t8, 0(a1) |
|
1558 lbu t0, 0(a2) |
|
1559 lbu t1, 0(a0) |
|
1560 addiu a1, a1, 1 |
|
1561 addiu a2, a2, 1 |
|
1562 |
|
1563 mul t2, t0, t8 |
|
1564 shra_r.ph t3, t2, 8 |
|
1565 andi t3, t3, 0xff |
|
1566 addq.ph t2, t2, t3 |
|
1567 shra_r.ph t2, t2, 8 |
|
1568 andi t2, t2, 0xff |
|
1569 |
|
1570 addu_s.qb t2, t2, t1 |
|
1571 sb t2, 0(a0) |
|
1572 addiu a3, a3, -1 |
|
1573 bnez a3, 2b |
|
1574 addiu a0, a0, 1 |
|
1575 |
|
1576 3: |
|
1577 RESTORE_REGS_FROM_STACK 0, v0, v1 |
|
1578 j ra |
|
1579 nop |
|
1580 |
|
1581 END(pixman_composite_add_8_8_8_asm_mips) |
|
1582 |
|
1583 LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips) |
|
1584 /* |
|
1585 * a0 - dst (a8) |
|
1586 * a1 - src (32bit constant) |
|
1587 * a2 - mask (a8) |
|
1588 * a3 - w |
|
1589 */ |
|
1590 |
|
1591 SAVE_REGS_ON_STACK 0, v0 |
|
1592 li t9, 0x00ff00ff |
|
1593 beqz a3, 3f |
|
1594 nop |
|
1595 |
|
1596 srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ |
|
1597 beqz v0, 1f /* branch if less than 4 src pixels */ |
|
1598 nop |
|
1599 |
|
1600 srl t8, a1, 24 |
|
1601 replv.ph t8, t8 |
|
1602 |
|
1603 0: |
|
1604 beqz v0, 1f |
|
1605 addiu v0, v0, -1 |
|
1606 lbu t0, 0(a2) |
|
1607 lbu t1, 1(a2) |
|
1608 lbu t2, 2(a2) |
|
1609 lbu t3, 3(a2) |
|
1610 lbu t4, 0(a0) |
|
1611 lbu t5, 1(a0) |
|
1612 lbu t6, 2(a0) |
|
1613 lbu t7, 3(a0) |
|
1614 |
|
1615 addiu a2, a2, 4 |
|
1616 |
|
1617 precr_sra.ph.w t1, t0, 0 |
|
1618 precr_sra.ph.w t3, t2, 0 |
|
1619 precr_sra.ph.w t5, t4, 0 |
|
1620 precr_sra.ph.w t7, t6, 0 |
|
1621 |
|
1622 precr.qb.ph t0, t3, t1 |
|
1623 precr.qb.ph t1, t7, t5 |
|
1624 |
|
1625 muleu_s.ph.qbl t2, t0, t8 |
|
1626 muleu_s.ph.qbr t3, t0, t8 |
|
1627 shra_r.ph t4, t2, 8 |
|
1628 shra_r.ph t5, t3, 8 |
|
1629 and t4, t4, t9 |
|
1630 and t5, t5, t9 |
|
1631 addq.ph t2, t2, t4 |
|
1632 addq.ph t3, t3, t5 |
|
1633 shra_r.ph t2, t2, 8 |
|
1634 shra_r.ph t3, t3, 8 |
|
1635 precr.qb.ph t0, t2, t3 |
|
1636 |
|
1637 addu_s.qb t2, t0, t1 |
|
1638 |
|
1639 sb t2, 0(a0) |
|
1640 srl t2, t2, 8 |
|
1641 sb t2, 1(a0) |
|
1642 srl t2, t2, 8 |
|
1643 sb t2, 2(a0) |
|
1644 srl t2, t2, 8 |
|
1645 sb t2, 3(a0) |
|
1646 addiu a3, a3, -4 |
|
1647 b 0b |
|
1648 addiu a0, a0, 4 |
|
1649 |
|
1650 1: |
|
1651 beqz a3, 3f |
|
1652 nop |
|
1653 srl t8, a1, 24 |
|
1654 2: |
|
1655 lbu t0, 0(a2) |
|
1656 lbu t1, 0(a0) |
|
1657 addiu a2, a2, 1 |
|
1658 |
|
1659 mul t2, t0, t8 |
|
1660 shra_r.ph t3, t2, 8 |
|
1661 andi t3, t3, 0xff |
|
1662 addq.ph t2, t2, t3 |
|
1663 shra_r.ph t2, t2, 8 |
|
1664 andi t2, t2, 0xff |
|
1665 |
|
1666 addu_s.qb t2, t2, t1 |
|
1667 sb t2, 0(a0) |
|
1668 addiu a3, a3, -1 |
|
1669 bnez a3, 2b |
|
1670 addiu a0, a0, 1 |
|
1671 |
|
1672 3: |
|
1673 RESTORE_REGS_FROM_STACK 0, v0 |
|
1674 j ra |
|
1675 nop |
|
1676 |
|
1677 END(pixman_composite_add_n_8_8_asm_mips) |
|
1678 |
|
1679 LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips) |
|
1680 /* |
|
1681 * a0 - dst (a8r8g8b8) |
|
1682 * a1 - src (32bit constant) |
|
1683 * a2 - mask (a8) |
|
1684 * a3 - w |
|
1685 */ |
|
1686 |
|
1687 SAVE_REGS_ON_STACK 0, s0, s1, s2 |
|
1688 li t4, 0x00ff00ff |
|
1689 beqz a3, 3f |
|
1690 nop |
|
1691 addiu t1, a3, -1 |
|
1692 beqz t1, 2f |
|
1693 nop |
|
1694 1: |
|
1695 /* a1 = source (32bit constant) */ |
|
1696 lbu t0, 0(a2) /* t0 = mask (a8) */ |
|
1697 lbu t1, 1(a2) /* t1 = mask (a8) */ |
|
1698 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
1699 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ |
|
1700 addiu a2, a2, 2 |
|
1701 |
|
1702 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \ |
|
1703 t0, t1, \ |
|
1704 t2, t3, \ |
|
1705 t5, t6, \ |
|
1706 t4, t7, t8, t9, s0, s1, s2 |
|
1707 |
|
1708 sw t5, 0(a0) |
|
1709 sw t6, 4(a0) |
|
1710 addiu a3, a3, -2 |
|
1711 addiu t1, a3, -1 |
|
1712 bgtz t1, 1b |
|
1713 addiu a0, a0, 8 |
|
1714 2: |
|
1715 beqz a3, 3f |
|
1716 nop |
|
1717 /* a1 = source (32bit constant) */ |
|
1718 lbu t0, 0(a2) /* t0 = mask (a8) */ |
|
1719 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ |
|
1720 |
|
1721 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6 |
|
1722 |
|
1723 sw t2, 0(a0) |
|
1724 3: |
|
1725 RESTORE_REGS_FROM_STACK 0, s0, s1, s2 |
|
1726 j ra |
|
1727 nop |
|
1728 |
|
1729 END(pixman_composite_add_n_8_8888_asm_mips) |
|
1730 |
|
1731 LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips) |
|
1732 /* |
|
1733 * a0 - dst (r5g6b5) |
|
1734 * a1 - src (r5g6b5) |
|
1735 * a2 - mask (a8) |
|
1736 * a3 - w |
|
1737 */ |
|
1738 |
|
1739 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 |
|
1740 li t4, 0xf800f800 |
|
1741 li t5, 0x07e007e0 |
|
1742 li t6, 0x001F001F |
|
1743 li t7, 0x00ff00ff |
|
1744 beqz a3, 3f |
|
1745 nop |
|
1746 addiu t1, a3, -1 |
|
1747 beqz t1, 2f |
|
1748 nop |
|
1749 1: |
|
1750 lhu t0, 0(a1) /* t0 = source (r5g6b5) */ |
|
1751 lhu t1, 2(a1) /* t1 = source (r5g6b5) */ |
|
1752 lbu t2, 0(a2) /* t2 = mask (a8) */ |
|
1753 lbu t3, 1(a2) /* t3 = mask (a8) */ |
|
1754 lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ |
|
1755 lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ |
|
1756 addiu a1, a1, 4 |
|
1757 addiu a2, a2, 2 |
|
1758 |
|
1759 CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 |
|
1760 CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, s6, s7 |
|
1761 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s0, s1, \ |
|
1762 t2, t3, \ |
|
1763 s2, s3, \ |
|
1764 t0, t1, \ |
|
1765 t7, s4, s5, s6, s7, t8, t9 |
|
1766 CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 |
|
1767 |
|
1768 sh s0, 0(a0) |
|
1769 sh s1, 2(a0) |
|
1770 addiu a3, a3, -2 |
|
1771 addiu t1, a3, -1 |
|
1772 bgtz t1, 1b |
|
1773 addiu a0, a0, 4 |
|
1774 2: |
|
1775 beqz a3, 3f |
|
1776 nop |
|
1777 lhu t0, 0(a1) /* t0 = source (r5g6b5) */ |
|
1778 lbu t1, 0(a2) /* t1 = mask (a8) */ |
|
1779 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ |
|
1780 |
|
1781 CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 |
|
1782 CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 |
|
1783 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t3, t1, t4, t0, t7, t2, t5, t6 |
|
1784 CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 |
|
1785 |
|
1786 sh t3, 0(a0) |
|
1787 3: |
|
1788 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 |
|
1789 j ra |
|
1790 nop |
|
1791 |
|
1792 END(pixman_composite_add_0565_8_0565_asm_mips) |
|
1793 |
|
1794 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips) |
|
1795 /* |
|
1796 * a0 - dst (a8r8g8b8) |
|
1797 * a1 - src (a8r8g8b8) |
|
1798 * a2 - mask (a8) |
|
1799 * a3 - w |
|
1800 */ |
|
1801 |
|
1802 SAVE_REGS_ON_STACK 0, s0, s1, s2 |
|
1803 li t4, 0x00ff00ff |
|
1804 beqz a3, 3f |
|
1805 nop |
|
1806 addiu t1, a3, -1 |
|
1807 beqz t1, 2f |
|
1808 nop |
|
1809 1: |
|
1810 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1811 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
1812 lbu t2, 0(a2) /* t2 = mask (a8) */ |
|
1813 lbu t3, 1(a2) /* t3 = mask (a8) */ |
|
1814 lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ |
|
1815 lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ |
|
1816 addiu a1, a1, 8 |
|
1817 addiu a2, a2, 2 |
|
1818 |
|
1819 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ |
|
1820 t2, t3, \ |
|
1821 t5, t6, \ |
|
1822 t7, t8, \ |
|
1823 t4, t9, s0, s1, s2, t0, t1 |
|
1824 |
|
1825 sw t7, 0(a0) |
|
1826 sw t8, 4(a0) |
|
1827 addiu a3, a3, -2 |
|
1828 addiu t1, a3, -1 |
|
1829 bgtz t1, 1b |
|
1830 addiu a0, a0, 8 |
|
1831 2: |
|
1832 beqz a3, 3f |
|
1833 nop |
|
1834 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1835 lbu t1, 0(a2) /* t1 = mask (a8) */ |
|
1836 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
1837 |
|
1838 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7 |
|
1839 |
|
1840 sw t3, 0(a0) |
|
1841 3: |
|
1842 RESTORE_REGS_FROM_STACK 0, s0, s1, s2 |
|
1843 j ra |
|
1844 nop |
|
1845 |
|
1846 END(pixman_composite_add_8888_8_8888_asm_mips) |
|
1847 |
|
1848 LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips) |
|
1849 /* |
|
1850 * a0 - dst (a8r8g8b8) |
|
1851 * a1 - src (a8r8g8b8) |
|
1852 * a2 - mask (32bit constant) |
|
1853 * a3 - w |
|
1854 */ |
|
1855 |
|
1856 SAVE_REGS_ON_STACK 0, s0, s1, s2 |
|
1857 li t4, 0x00ff00ff |
|
1858 beqz a3, 3f |
|
1859 nop |
|
1860 srl a2, a2, 24 |
|
1861 addiu t1, a3, -1 |
|
1862 beqz t1, 2f |
|
1863 nop |
|
1864 1: |
|
1865 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1866 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
1867 /* a2 = mask (32bit constant) */ |
|
1868 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
1869 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ |
|
1870 addiu a1, a1, 8 |
|
1871 |
|
1872 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ |
|
1873 a2, a2, \ |
|
1874 t2, t3, \ |
|
1875 t5, t6, \ |
|
1876 t4, t7, t8, t9, s0, s1, s2 |
|
1877 |
|
1878 sw t5, 0(a0) |
|
1879 sw t6, 4(a0) |
|
1880 addiu a3, a3, -2 |
|
1881 addiu t1, a3, -1 |
|
1882 bgtz t1, 1b |
|
1883 addiu a0, a0, 8 |
|
1884 2: |
|
1885 beqz a3, 3f |
|
1886 nop |
|
1887 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1888 /* a2 = mask (32bit constant) */ |
|
1889 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ |
|
1890 |
|
1891 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7 |
|
1892 |
|
1893 sw t3, 0(a0) |
|
1894 3: |
|
1895 RESTORE_REGS_FROM_STACK 0, s0, s1, s2 |
|
1896 j ra |
|
1897 nop |
|
1898 |
|
1899 END(pixman_composite_add_8888_n_8888_asm_mips) |
|
1900 |
|
1901 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips) |
|
1902 /* |
|
1903 * a0 - dst (a8r8g8b8) |
|
1904 * a1 - src (a8r8g8b8) |
|
1905 * a2 - mask (a8r8g8b8) |
|
1906 * a3 - w |
|
1907 */ |
|
1908 |
|
1909 SAVE_REGS_ON_STACK 0, s0, s1, s2 |
|
1910 li t4, 0x00ff00ff |
|
1911 beqz a3, 3f |
|
1912 nop |
|
1913 addiu t1, a3, -1 |
|
1914 beqz t1, 2f |
|
1915 nop |
|
1916 1: |
|
1917 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1918 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ |
|
1919 lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */ |
|
1920 lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */ |
|
1921 lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ |
|
1922 lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ |
|
1923 addiu a1, a1, 8 |
|
1924 addiu a2, a2, 8 |
|
1925 srl t2, t2, 24 |
|
1926 srl t3, t3, 24 |
|
1927 |
|
1928 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \ |
|
1929 t2, t3, \ |
|
1930 t5, t6, \ |
|
1931 t7, t8, \ |
|
1932 t4, t9, s0, s1, s2, t0, t1 |
|
1933 |
|
1934 sw t7, 0(a0) |
|
1935 sw t8, 4(a0) |
|
1936 addiu a3, a3, -2 |
|
1937 addiu t1, a3, -1 |
|
1938 bgtz t1, 1b |
|
1939 addiu a0, a0, 8 |
|
1940 2: |
|
1941 beqz a3, 3f |
|
1942 nop |
|
1943 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ |
|
1944 lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */ |
|
1945 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
1946 srl t1, t1, 24 |
|
1947 |
|
1948 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7 |
|
1949 |
|
1950 sw t3, 0(a0) |
|
1951 3: |
|
1952 RESTORE_REGS_FROM_STACK 0, s0, s1, s2 |
|
1953 j ra |
|
1954 nop |
|
1955 |
|
1956 END(pixman_composite_add_8888_8888_8888_asm_mips) |
|
1957 |
|
1958 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips) |
|
1959 /* |
|
1960 * a0 - dst (a8) |
|
1961 * a1 - src (a8) |
|
1962 * a2 - w |
|
1963 */ |
|
1964 |
|
1965 beqz a2, 3f |
|
1966 nop |
|
1967 srl t9, a2, 2 /* t9 = how many multiples of 4 dst pixels */ |
|
1968 beqz t9, 1f /* branch if less than 4 src pixels */ |
|
1969 nop |
|
1970 |
|
1971 0: |
|
1972 beqz t9, 1f |
|
1973 addiu t9, t9, -1 |
|
1974 lbu t0, 0(a1) |
|
1975 lbu t1, 1(a1) |
|
1976 lbu t2, 2(a1) |
|
1977 lbu t3, 3(a1) |
|
1978 lbu t4, 0(a0) |
|
1979 lbu t5, 1(a0) |
|
1980 lbu t6, 2(a0) |
|
1981 lbu t7, 3(a0) |
|
1982 |
|
1983 addiu a1, a1, 4 |
|
1984 |
|
1985 precr_sra.ph.w t1, t0, 0 |
|
1986 precr_sra.ph.w t3, t2, 0 |
|
1987 precr_sra.ph.w t5, t4, 0 |
|
1988 precr_sra.ph.w t7, t6, 0 |
|
1989 |
|
1990 precr.qb.ph t0, t3, t1 |
|
1991 precr.qb.ph t1, t7, t5 |
|
1992 |
|
1993 addu_s.qb t2, t0, t1 |
|
1994 |
|
1995 sb t2, 0(a0) |
|
1996 srl t2, t2, 8 |
|
1997 sb t2, 1(a0) |
|
1998 srl t2, t2, 8 |
|
1999 sb t2, 2(a0) |
|
2000 srl t2, t2, 8 |
|
2001 sb t2, 3(a0) |
|
2002 addiu a2, a2, -4 |
|
2003 b 0b |
|
2004 addiu a0, a0, 4 |
|
2005 |
|
2006 1: |
|
2007 beqz a2, 3f |
|
2008 nop |
|
2009 2: |
|
2010 lbu t0, 0(a1) |
|
2011 lbu t1, 0(a0) |
|
2012 addiu a1, a1, 1 |
|
2013 |
|
2014 addu_s.qb t2, t0, t1 |
|
2015 sb t2, 0(a0) |
|
2016 addiu a2, a2, -1 |
|
2017 bnez a2, 2b |
|
2018 addiu a0, a0, 1 |
|
2019 |
|
2020 3: |
|
2021 j ra |
|
2022 nop |
|
2023 |
|
2024 END(pixman_composite_add_8_8_asm_mips) |
|
2025 |
|
2026 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips) |
|
2027 /* |
|
2028 * a0 - dst (a8r8g8b8) |
|
2029 * a1 - src (a8r8g8b8) |
|
2030 * a2 - w |
|
2031 */ |
|
2032 |
|
2033 beqz a2, 4f |
|
2034 nop |
|
2035 |
|
2036 srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */ |
|
2037 beqz t9, 3f /* branch if less than 4 src pixels */ |
|
2038 nop |
|
2039 1: |
|
2040 addiu t9, t9, -1 |
|
2041 beqz t9, 2f |
|
2042 addiu a2, a2, -4 |
|
2043 |
|
2044 lw t0, 0(a1) |
|
2045 lw t1, 4(a1) |
|
2046 lw t2, 8(a1) |
|
2047 lw t3, 12(a1) |
|
2048 lw t4, 0(a0) |
|
2049 lw t5, 4(a0) |
|
2050 lw t6, 8(a0) |
|
2051 lw t7, 12(a0) |
|
2052 addiu a1, a1, 16 |
|
2053 |
|
2054 addu_s.qb t4, t4, t0 |
|
2055 addu_s.qb t5, t5, t1 |
|
2056 addu_s.qb t6, t6, t2 |
|
2057 addu_s.qb t7, t7, t3 |
|
2058 |
|
2059 sw t4, 0(a0) |
|
2060 sw t5, 4(a0) |
|
2061 sw t6, 8(a0) |
|
2062 sw t7, 12(a0) |
|
2063 b 1b |
|
2064 addiu a0, a0, 16 |
|
2065 2: |
|
2066 lw t0, 0(a1) |
|
2067 lw t1, 4(a1) |
|
2068 lw t2, 8(a1) |
|
2069 lw t3, 12(a1) |
|
2070 lw t4, 0(a0) |
|
2071 lw t5, 4(a0) |
|
2072 lw t6, 8(a0) |
|
2073 lw t7, 12(a0) |
|
2074 addiu a1, a1, 16 |
|
2075 |
|
2076 addu_s.qb t4, t4, t0 |
|
2077 addu_s.qb t5, t5, t1 |
|
2078 addu_s.qb t6, t6, t2 |
|
2079 addu_s.qb t7, t7, t3 |
|
2080 |
|
2081 sw t4, 0(a0) |
|
2082 sw t5, 4(a0) |
|
2083 sw t6, 8(a0) |
|
2084 sw t7, 12(a0) |
|
2085 |
|
2086 beqz a2, 4f |
|
2087 addiu a0, a0, 16 |
|
2088 3: |
|
2089 lw t0, 0(a1) |
|
2090 lw t1, 0(a0) |
|
2091 addiu a1, a1, 4 |
|
2092 addiu a2, a2, -1 |
|
2093 addu_s.qb t1, t1, t0 |
|
2094 sw t1, 0(a0) |
|
2095 bnez a2, 3b |
|
2096 addiu a0, a0, 4 |
|
2097 4: |
|
2098 jr ra |
|
2099 nop |
|
2100 |
|
2101 END(pixman_composite_add_8888_8888_asm_mips) |
|
2102 |
|
2103 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips) |
|
2104 /* |
|
2105 * a0 - dst (r5g6b5) |
|
2106 * a1 - src (a8) |
|
2107 * a2 - w |
|
2108 */ |
|
2109 |
|
2110 beqz a2, 4f |
|
2111 nop |
|
2112 |
|
2113 SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 |
|
2114 li t2, 0xf800f800 |
|
2115 li t3, 0x07e007e0 |
|
2116 li t4, 0x001F001F |
|
2117 li t5, 0x00ff00ff |
|
2118 |
|
2119 addiu t1, a2, -1 |
|
2120 beqz t1, 2f |
|
2121 nop |
|
2122 1: |
|
2123 lbu t0, 0(a1) /* t0 = source (a8) */ |
|
2124 lbu t1, 1(a1) /* t1 = source (a8) */ |
|
2125 lhu t6, 0(a0) /* t6 = destination (r5g6b5) */ |
|
2126 lhu t7, 2(a0) /* t7 = destination (r5g6b5) */ |
|
2127 addiu a1, a1, 2 |
|
2128 |
|
2129 not t0, t0 |
|
2130 not t1, t1 |
|
2131 andi t0, 0xff /* t0 = neg source1 */ |
|
2132 andi t1, 0xff /* t1 = neg source2 */ |
|
2133 CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3 |
|
2134 MIPS_2xUN8x4_MUL_2xUN8 t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9 |
|
2135 CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1 |
|
2136 |
|
2137 sh t8, 0(a0) |
|
2138 sh t9, 2(a0) |
|
2139 addiu a2, a2, -2 |
|
2140 addiu t1, a2, -1 |
|
2141 bgtz t1, 1b |
|
2142 addiu a0, a0, 4 |
|
2143 2: |
|
2144 beqz a2, 3f |
|
2145 nop |
|
2146 lbu t0, 0(a1) /* t0 = source (a8) */ |
|
2147 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ |
|
2148 |
|
2149 not t0, t0 |
|
2150 andi t0, 0xff /* t0 = neg source */ |
|
2151 CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4 |
|
2152 MIPS_UN8x4_MUL_UN8 t2, t0, t1, t5, t3, t4, t6 |
|
2153 CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4 |
|
2154 |
|
2155 sh t2, 0(a0) |
|
2156 3: |
|
2157 RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 |
|
2158 4: |
|
2159 j ra |
|
2160 nop |
|
2161 |
|
2162 END(pixman_composite_out_reverse_8_0565_asm_mips) |
|
2163 |
|
2164 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips) |
|
2165 /* |
|
2166 * a0 - dst (a8r8g8b8) |
|
2167 * a1 - src (a8) |
|
2168 * a2 - w |
|
2169 */ |
|
2170 |
|
2171 beqz a2, 3f |
|
2172 nop |
|
2173 li t4, 0x00ff00ff |
|
2174 addiu t1, a2, -1 |
|
2175 beqz t1, 2f |
|
2176 nop |
|
2177 1: |
|
2178 lbu t0, 0(a1) /* t0 = source (a8) */ |
|
2179 lbu t1, 1(a1) /* t1 = source (a8) */ |
|
2180 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ |
|
2181 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ |
|
2182 addiu a1, a1, 2 |
|
2183 not t0, t0 |
|
2184 not t1, t1 |
|
2185 andi t0, 0xff /* t0 = neg source */ |
|
2186 andi t1, 0xff /* t1 = neg source */ |
|
2187 |
|
2188 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0 |
|
2189 |
|
2190 sw t5, 0(a0) |
|
2191 sw t6, 4(a0) |
|
2192 addiu a2, a2, -2 |
|
2193 addiu t1, a2, -1 |
|
2194 bgtz t1, 1b |
|
2195 addiu a0, a0, 8 |
|
2196 2: |
|
2197 beqz a2, 3f |
|
2198 nop |
|
2199 lbu t0, 0(a1) /* t0 = source (a8) */ |
|
2200 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ |
|
2201 not t0, t0 |
|
2202 andi t0, 0xff /* t0 = neg source */ |
|
2203 |
|
2204 MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6 |
|
2205 |
|
2206 sw t2, 0(a0) |
|
2207 3: |
|
2208 j ra |
|
2209 nop |
|
2210 |
|
2211 END(pixman_composite_out_reverse_8_8888_asm_mips) |
|
2212 |
|
2213 LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips) |
|
2214 /* |
|
2215 * a0 - dst (a8r8g8b8) |
|
2216 * a1 - src (32bit constant) |
|
2217 * a2 - w |
|
2218 */ |
|
2219 |
|
2220 beqz a2, 5f |
|
2221 nop |
|
2222 |
|
2223 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2224 li t0, 0x00ff00ff |
|
2225 srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ |
|
2226 beqz t9, 2f /* branch if less than 4 src pixels */ |
|
2227 nop |
|
2228 1: |
|
2229 beqz t9, 2f |
|
2230 addiu t9, t9, -1 |
|
2231 |
|
2232 lw t1, 0(a0) |
|
2233 lw t2, 4(a0) |
|
2234 lw t3, 8(a0) |
|
2235 lw t4, 12(a0) |
|
2236 |
|
2237 addiu a2, a2, -4 |
|
2238 |
|
2239 not t5, t1 |
|
2240 not t6, t2 |
|
2241 not t7, t3 |
|
2242 not t8, t4 |
|
2243 srl t5, t5, 24 |
|
2244 srl t6, t6, 24 |
|
2245 srl t7, t7, 24 |
|
2246 srl t8, t8, 24 |
|
2247 replv.ph t5, t5 |
|
2248 replv.ph t6, t6 |
|
2249 replv.ph t7, t7 |
|
2250 replv.ph t8, t8 |
|
2251 muleu_s.ph.qbl s0, a1, t5 |
|
2252 muleu_s.ph.qbr s1, a1, t5 |
|
2253 muleu_s.ph.qbl s2, a1, t6 |
|
2254 muleu_s.ph.qbr s3, a1, t6 |
|
2255 muleu_s.ph.qbl s4, a1, t7 |
|
2256 muleu_s.ph.qbr s5, a1, t7 |
|
2257 muleu_s.ph.qbl s6, a1, t8 |
|
2258 muleu_s.ph.qbr s7, a1, t8 |
|
2259 |
|
2260 shra_r.ph t5, s0, 8 |
|
2261 shra_r.ph t6, s1, 8 |
|
2262 shra_r.ph t7, s2, 8 |
|
2263 shra_r.ph t8, s3, 8 |
|
2264 and t5, t5, t0 |
|
2265 and t6, t6, t0 |
|
2266 and t7, t7, t0 |
|
2267 and t8, t8, t0 |
|
2268 addq.ph s0, s0, t5 |
|
2269 addq.ph s1, s1, t6 |
|
2270 addq.ph s2, s2, t7 |
|
2271 addq.ph s3, s3, t8 |
|
2272 shra_r.ph s0, s0, 8 |
|
2273 shra_r.ph s1, s1, 8 |
|
2274 shra_r.ph s2, s2, 8 |
|
2275 shra_r.ph s3, s3, 8 |
|
2276 shra_r.ph t5, s4, 8 |
|
2277 shra_r.ph t6, s5, 8 |
|
2278 shra_r.ph t7, s6, 8 |
|
2279 shra_r.ph t8, s7, 8 |
|
2280 and t5, t5, t0 |
|
2281 and t6, t6, t0 |
|
2282 and t7, t7, t0 |
|
2283 and t8, t8, t0 |
|
2284 addq.ph s4, s4, t5 |
|
2285 addq.ph s5, s5, t6 |
|
2286 addq.ph s6, s6, t7 |
|
2287 addq.ph s7, s7, t8 |
|
2288 shra_r.ph s4, s4, 8 |
|
2289 shra_r.ph s5, s5, 8 |
|
2290 shra_r.ph s6, s6, 8 |
|
2291 shra_r.ph s7, s7, 8 |
|
2292 |
|
2293 precr.qb.ph t5, s0, s1 |
|
2294 precr.qb.ph t6, s2, s3 |
|
2295 precr.qb.ph t7, s4, s5 |
|
2296 precr.qb.ph t8, s6, s7 |
|
2297 addu_s.qb t5, t1, t5 |
|
2298 addu_s.qb t6, t2, t6 |
|
2299 addu_s.qb t7, t3, t7 |
|
2300 addu_s.qb t8, t4, t8 |
|
2301 |
|
2302 sw t5, 0(a0) |
|
2303 sw t6, 4(a0) |
|
2304 sw t7, 8(a0) |
|
2305 sw t8, 12(a0) |
|
2306 b 1b |
|
2307 addiu a0, a0, 16 |
|
2308 |
|
2309 2: |
|
2310 beqz a2, 4f |
|
2311 nop |
|
2312 3: |
|
2313 lw t1, 0(a0) |
|
2314 |
|
2315 not t2, t1 |
|
2316 srl t2, t2, 24 |
|
2317 replv.ph t2, t2 |
|
2318 |
|
2319 muleu_s.ph.qbl t4, a1, t2 |
|
2320 muleu_s.ph.qbr t5, a1, t2 |
|
2321 shra_r.ph t6, t4, 8 |
|
2322 shra_r.ph t7, t5, 8 |
|
2323 |
|
2324 and t6,t6,t0 |
|
2325 and t7,t7,t0 |
|
2326 |
|
2327 addq.ph t8, t4, t6 |
|
2328 addq.ph t9, t5, t7 |
|
2329 |
|
2330 shra_r.ph t8, t8, 8 |
|
2331 shra_r.ph t9, t9, 8 |
|
2332 |
|
2333 precr.qb.ph t9, t8, t9 |
|
2334 |
|
2335 addu_s.qb t9, t1, t9 |
|
2336 sw t9, 0(a0) |
|
2337 |
|
2338 addiu a2, a2, -1 |
|
2339 bnez a2, 3b |
|
2340 addiu a0, a0, 4 |
|
2341 4: |
|
2342 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2343 5: |
|
2344 j ra |
|
2345 nop |
|
2346 |
|
2347 END(pixman_composite_over_reverse_n_8888_asm_mips) |
|
2348 |
|
2349 LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips) |
|
2350 /* |
|
2351 * a0 - dst (a8) |
|
2352 * a1 - src (a8r8g8b8) |
|
2353 * a2 - w |
|
2354 */ |
|
2355 |
|
2356 beqz a2, 5f |
|
2357 nop |
|
2358 |
|
2359 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2360 move t7, a1 |
|
2361 srl t5, t7, 24 |
|
2362 replv.ph t5, t5 |
|
2363 srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */ |
|
2364 beqz t9, 2f /* branch if less than 4 src pixels */ |
|
2365 nop |
|
2366 |
|
2367 1: |
|
2368 addiu t9, t9, -1 |
|
2369 addiu a2, a2, -4 |
|
2370 lbu t0, 0(a0) |
|
2371 lbu t1, 1(a0) |
|
2372 lbu t2, 2(a0) |
|
2373 lbu t3, 3(a0) |
|
2374 |
|
2375 muleu_s.ph.qbl s0, t0, t5 |
|
2376 muleu_s.ph.qbr s1, t0, t5 |
|
2377 muleu_s.ph.qbl s2, t1, t5 |
|
2378 muleu_s.ph.qbr s3, t1, t5 |
|
2379 muleu_s.ph.qbl s4, t2, t5 |
|
2380 muleu_s.ph.qbr s5, t2, t5 |
|
2381 muleu_s.ph.qbl s6, t3, t5 |
|
2382 muleu_s.ph.qbr s7, t3, t5 |
|
2383 |
|
2384 shrl.ph t4, s0, 8 |
|
2385 shrl.ph t6, s1, 8 |
|
2386 shrl.ph t7, s2, 8 |
|
2387 shrl.ph t8, s3, 8 |
|
2388 addq.ph t0, s0, t4 |
|
2389 addq.ph t1, s1, t6 |
|
2390 addq.ph t2, s2, t7 |
|
2391 addq.ph t3, s3, t8 |
|
2392 shra_r.ph t0, t0, 8 |
|
2393 shra_r.ph t1, t1, 8 |
|
2394 shra_r.ph t2, t2, 8 |
|
2395 shra_r.ph t3, t3, 8 |
|
2396 shrl.ph t4, s4, 8 |
|
2397 shrl.ph t6, s5, 8 |
|
2398 shrl.ph t7, s6, 8 |
|
2399 shrl.ph t8, s7, 8 |
|
2400 addq.ph s0, s4, t4 |
|
2401 addq.ph s1, s5, t6 |
|
2402 addq.ph s2, s6, t7 |
|
2403 addq.ph s3, s7, t8 |
|
2404 shra_r.ph t4, s0, 8 |
|
2405 shra_r.ph t6, s1, 8 |
|
2406 shra_r.ph t7, s2, 8 |
|
2407 shra_r.ph t8, s3, 8 |
|
2408 |
|
2409 precr.qb.ph s0, t0, t1 |
|
2410 precr.qb.ph s1, t2, t3 |
|
2411 precr.qb.ph s2, t4, t6 |
|
2412 precr.qb.ph s3, t7, t8 |
|
2413 |
|
2414 sb s0, 0(a0) |
|
2415 sb s1, 1(a0) |
|
2416 sb s2, 2(a0) |
|
2417 sb s3, 3(a0) |
|
2418 bgtz t9, 1b |
|
2419 addiu a0, a0, 4 |
|
2420 2: |
|
2421 beqz a2, 4f |
|
2422 nop |
|
2423 3: |
|
2424 lbu t1, 0(a0) |
|
2425 |
|
2426 muleu_s.ph.qbl t4, t1, t5 |
|
2427 muleu_s.ph.qbr t7, t1, t5 |
|
2428 shrl.ph t6, t4, 8 |
|
2429 shrl.ph t0, t7, 8 |
|
2430 addq.ph t8, t4, t6 |
|
2431 addq.ph t9, t7, t0 |
|
2432 shra_r.ph t8, t8, 8 |
|
2433 shra_r.ph t9, t9, 8 |
|
2434 precr.qb.ph t2, t8, t9 |
|
2435 sb t2, 0(a0) |
|
2436 addiu a2, a2, -1 |
|
2437 bnez a2, 3b |
|
2438 addiu a0, a0, 1 |
|
2439 4: |
|
2440 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2441 5: |
|
2442 j ra |
|
2443 nop |
|
2444 |
|
2445 END(pixman_composite_in_n_8_asm_mips) |
|
2446 |
|
2447 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) |
|
2448 /* |
|
2449 * a0 - dst (r5g6b5) |
|
2450 * a1 - src (a8r8g8b8) |
|
2451 * a2 - mask (a8) |
|
2452 * a3 - w |
|
2453 * 16(sp) - vx |
|
2454 * 20(sp) - unit_x |
|
2455 */ |
|
2456 beqz a3, 4f |
|
2457 nop |
|
2458 |
|
2459 SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 |
|
2460 lw v0, 36(sp) /* v0 = vx */ |
|
2461 lw v1, 40(sp) /* v1 = unit_x */ |
|
2462 li t6, 0x00ff00ff |
|
2463 li t7, 0xf800f800 |
|
2464 li t8, 0x07e007e0 |
|
2465 li t9, 0x001F001F |
|
2466 |
|
2467 addiu t1, a3, -1 |
|
2468 beqz t1, 2f |
|
2469 nop |
|
2470 1: |
|
2471 sra t0, v0, 16 /* t0 = vx >> 16 */ |
|
2472 sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ |
|
2473 addu t0, a1, t0 |
|
2474 lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ |
|
2475 addu v0, v0, v1 /* v0 = vx + unit_x */ |
|
2476 sra t1, v0, 16 /* t1 = vx >> 16 */ |
|
2477 sll t1, t1, 2 /* t1 = t1 * 4 (a8r8g8b8) */ |
|
2478 addu t1, a1, t1 |
|
2479 lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ |
|
2480 addu v0, v0, v1 /* v0 = vx + unit_x */ |
|
2481 lbu t2, 0(a2) /* t2 = mask (a8) */ |
|
2482 lbu t3, 1(a2) /* t3 = mask (a8) */ |
|
2483 lhu t4, 0(a0) /* t4 = destination (r5g6b5) */ |
|
2484 lhu t5, 2(a0) /* t5 = destination (r5g6b5) */ |
|
2485 addiu a2, a2, 2 |
|
2486 |
|
2487 CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5 |
|
2488 OVER_2x8888_2x8_2x8888 t0, t1, \ |
|
2489 t2, t3, \ |
|
2490 s0, s1, \ |
|
2491 t4, t5, \ |
|
2492 t6, s2, s3, s4, s5, t2, t3 |
|
2493 CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3 |
|
2494 |
|
2495 sh s0, 0(a0) |
|
2496 sh s1, 2(a0) |
|
2497 addiu a3, a3, -2 |
|
2498 addiu t1, a3, -1 |
|
2499 bgtz t1, 1b |
|
2500 addiu a0, a0, 4 |
|
2501 2: |
|
2502 beqz a3, 3f |
|
2503 nop |
|
2504 sra t0, v0, 16 /* t0 = vx >> 16 */ |
|
2505 sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ |
|
2506 addu t0, a1, t0 |
|
2507 lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ |
|
2508 lbu t1, 0(a2) /* t1 = mask (a8) */ |
|
2509 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ |
|
2510 |
|
2511 CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5 |
|
2512 OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8 |
|
2513 CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 |
|
2514 |
|
2515 sh t3, 0(a0) |
|
2516 3: |
|
2517 RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 |
|
2518 4: |
|
2519 j ra |
|
2520 nop |
|
2521 |
|
2522 END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) |
|
2523 |
|
2524 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips) |
|
2525 /* |
|
2526 * a0 - dst (r5g6b5) |
|
2527 * a1 - src (r5g6b5) |
|
2528 * a2 - mask (a8) |
|
2529 * a3 - w |
|
2530 * 16(sp) - vx |
|
2531 * 20(sp) - unit_x |
|
2532 */ |
|
2533 |
|
2534 beqz a3, 4f |
|
2535 nop |
|
2536 SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 |
|
2537 lw v0, 36(sp) /* v0 = vx */ |
|
2538 lw v1, 40(sp) /* v1 = unit_x */ |
|
2539 li t4, 0xf800f800 |
|
2540 li t5, 0x07e007e0 |
|
2541 li t6, 0x001F001F |
|
2542 li t7, 0x00ff00ff |
|
2543 |
|
2544 addiu t1, a3, -1 |
|
2545 beqz t1, 2f |
|
2546 nop |
|
2547 1: |
|
2548 sra t0, v0, 16 /* t0 = vx >> 16 */ |
|
2549 sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */ |
|
2550 addu t0, a1, t0 |
|
2551 lhu t0, 0(t0) /* t0 = source (r5g6b5) */ |
|
2552 addu v0, v0, v1 /* v0 = vx + unit_x */ |
|
2553 sra t1, v0, 16 /* t1 = vx >> 16 */ |
|
2554 sll t1, t1, 1 /* t1 = t1 * 2 (r5g6b5) */ |
|
2555 addu t1, a1, t1 |
|
2556 lhu t1, 0(t1) /* t1 = source (r5g6b5) */ |
|
2557 addu v0, v0, v1 /* v0 = vx + unit_x */ |
|
2558 lbu t2, 0(a2) /* t2 = mask (a8) */ |
|
2559 lbu t3, 1(a2) /* t3 = mask (a8) */ |
|
2560 lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ |
|
2561 lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ |
|
2562 addiu a2, a2, 2 |
|
2563 |
|
2564 CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 |
|
2565 CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1 |
|
2566 OVER_2x8888_2x8_2x8888 s0, s1, \ |
|
2567 t2, t3, \ |
|
2568 s2, s3, \ |
|
2569 t0, t1, \ |
|
2570 t7, t8, t9, s4, s5, s0, s1 |
|
2571 CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 |
|
2572 |
|
2573 sh s0, 0(a0) |
|
2574 sh s1, 2(a0) |
|
2575 addiu a3, a3, -2 |
|
2576 addiu t1, a3, -1 |
|
2577 bgtz t1, 1b |
|
2578 addiu a0, a0, 4 |
|
2579 2: |
|
2580 beqz a3, 3f |
|
2581 nop |
|
2582 sra t0, v0, 16 /* t0 = vx >> 16 */ |
|
2583 sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */ |
|
2584 addu t0, a1, t0 |
|
2585 |
|
2586 lhu t0, 0(t0) /* t0 = source (r5g6b5) */ |
|
2587 lbu t1, 0(a2) /* t1 = mask (a8) */ |
|
2588 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ |
|
2589 |
|
2590 CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 |
|
2591 CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 |
|
2592 OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8 |
|
2593 CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 |
|
2594 |
|
2595 sh t3, 0(a0) |
|
2596 3: |
|
2597 RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 |
|
2598 4: |
|
2599 j ra |
|
2600 nop |
|
2601 |
|
2602 END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips) |
|
2603 |
|
2604 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) |
|
2605 /* |
|
2606 * a0 - *dst |
|
2607 * a1 - *src_top |
|
2608 * a2 - *src_bottom |
|
2609 * a3 - w |
|
2610 * 16(sp) - wt |
|
2611 * 20(sp) - wb |
|
2612 * 24(sp) - vx |
|
2613 * 28(sp) - unit_x |
|
2614 */ |
|
2615 |
|
2616 beqz a3, 1f |
|
2617 nop |
|
2618 |
|
2619 SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2620 |
|
2621 lw s0, 36(sp) /* s0 = wt */ |
|
2622 lw s1, 40(sp) /* s1 = wb */ |
|
2623 lw s2, 44(sp) /* s2 = vx */ |
|
2624 lw s3, 48(sp) /* s3 = unit_x */ |
|
2625 li v0, BILINEAR_INTERPOLATION_RANGE |
|
2626 |
|
2627 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2628 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2629 0: |
|
2630 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
2631 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
2632 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
2633 |
|
2634 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
2635 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
2636 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
2637 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
2638 |
|
2639 sra t9, s2, 16 |
|
2640 sll t9, t9, 2 |
|
2641 addiu t8, t9, 4 |
|
2642 lwx t0, t9(a1) /* t0 = tl */ |
|
2643 lwx t1, t8(a1) /* t1 = tr */ |
|
2644 addiu a3, a3, -1 |
|
2645 lwx t2, t9(a2) /* t2 = bl */ |
|
2646 lwx t3, t8(a2) /* t3 = br */ |
|
2647 |
|
2648 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
2649 |
|
2650 addu s2, s2, s3 /* vx += unit_x; */ |
|
2651 sw t0, 0(a0) |
|
2652 bnez a3, 0b |
|
2653 addiu a0, a0, 4 |
|
2654 |
|
2655 RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2656 1: |
|
2657 j ra |
|
2658 nop |
|
2659 |
|
2660 END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) |
|
2661 |
|
2662 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) |
|
2663 /* |
|
2664 * a0 - *dst |
|
2665 * a1 - *src_top |
|
2666 * a2 - *src_bottom |
|
2667 * a3 - w |
|
2668 * 16(sp) - wt |
|
2669 * 20(sp) - wb |
|
2670 * 24(sp) - vx |
|
2671 * 28(sp) - unit_x |
|
2672 */ |
|
2673 |
|
2674 beqz a3, 1f |
|
2675 nop |
|
2676 |
|
2677 SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2678 |
|
2679 lw s0, 36(sp) /* s0 = wt */ |
|
2680 lw s1, 40(sp) /* s1 = wb */ |
|
2681 lw s2, 44(sp) /* s2 = vx */ |
|
2682 lw s3, 48(sp) /* s3 = unit_x */ |
|
2683 li v0, BILINEAR_INTERPOLATION_RANGE |
|
2684 |
|
2685 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2686 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2687 0: |
|
2688 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
2689 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
2690 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
2691 |
|
2692 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
2693 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
2694 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
2695 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
2696 |
|
2697 sra t9, s2, 16 |
|
2698 sll t9, t9, 2 |
|
2699 addiu t8, t9, 4 |
|
2700 lwx t0, t9(a1) /* t0 = tl */ |
|
2701 lwx t1, t8(a1) /* t1 = tr */ |
|
2702 addiu a3, a3, -1 |
|
2703 lwx t2, t9(a2) /* t2 = bl */ |
|
2704 lwx t3, t8(a2) /* t3 = br */ |
|
2705 |
|
2706 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
2707 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 |
|
2708 |
|
2709 addu s2, s2, s3 /* vx += unit_x; */ |
|
2710 sh t1, 0(a0) |
|
2711 bnez a3, 0b |
|
2712 addiu a0, a0, 2 |
|
2713 |
|
2714 RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2715 1: |
|
2716 j ra |
|
2717 nop |
|
2718 |
|
2719 END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) |
|
2720 |
|
2721 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) |
|
2722 /* |
|
2723 * a0 - *dst |
|
2724 * a1 - *src_top |
|
2725 * a2 - *src_bottom |
|
2726 * a3 - w |
|
2727 * 16(sp) - wt |
|
2728 * 20(sp) - wb |
|
2729 * 24(sp) - vx |
|
2730 * 28(sp) - unit_x |
|
2731 */ |
|
2732 |
|
2733 beqz a3, 1f |
|
2734 nop |
|
2735 |
|
2736 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
2737 |
|
2738 lw s0, 44(sp) /* s0 = wt */ |
|
2739 lw s1, 48(sp) /* s1 = wb */ |
|
2740 lw s2, 52(sp) /* s2 = vx */ |
|
2741 lw s3, 56(sp) /* s3 = unit_x */ |
|
2742 li v0, BILINEAR_INTERPOLATION_RANGE |
|
2743 li v1, 0x07e007e0 |
|
2744 li s8, 0x001f001f |
|
2745 |
|
2746 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2747 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2748 0: |
|
2749 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
2750 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
2751 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
2752 |
|
2753 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
2754 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
2755 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
2756 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
2757 |
|
2758 sra t9, s2, 16 |
|
2759 sll t9, t9, 1 |
|
2760 addiu t8, t9, 2 |
|
2761 lhx t0, t9(a1) /* t0 = tl */ |
|
2762 lhx t1, t8(a1) /* t1 = tr */ |
|
2763 andi t1, t1, 0xffff |
|
2764 addiu a3, a3, -1 |
|
2765 lhx t2, t9(a2) /* t2 = bl */ |
|
2766 lhx t3, t8(a2) /* t3 = br */ |
|
2767 andi t3, t3, 0xffff |
|
2768 |
|
2769 CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 |
|
2770 CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 |
|
2771 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
2772 |
|
2773 addu s2, s2, s3 /* vx += unit_x; */ |
|
2774 sw t0, 0(a0) |
|
2775 bnez a3, 0b |
|
2776 addiu a0, a0, 4 |
|
2777 |
|
2778 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
2779 1: |
|
2780 j ra |
|
2781 nop |
|
2782 |
|
2783 END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) |
|
2784 |
|
2785 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) |
|
2786 /* |
|
2787 * a0 - *dst |
|
2788 * a1 - *src_top |
|
2789 * a2 - *src_bottom |
|
2790 * a3 - w |
|
2791 * 16(sp) - wt |
|
2792 * 20(sp) - wb |
|
2793 * 24(sp) - vx |
|
2794 * 28(sp) - unit_x |
|
2795 */ |
|
2796 |
|
2797 beqz a3, 1f |
|
2798 nop |
|
2799 |
|
2800 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
2801 |
|
2802 lw s0, 44(sp) /* s0 = wt */ |
|
2803 lw s1, 48(sp) /* s1 = wb */ |
|
2804 lw s2, 52(sp) /* s2 = vx */ |
|
2805 lw s3, 56(sp) /* s3 = unit_x */ |
|
2806 li v0, BILINEAR_INTERPOLATION_RANGE |
|
2807 li v1, 0x07e007e0 |
|
2808 li s8, 0x001f001f |
|
2809 |
|
2810 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2811 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2812 0: |
|
2813 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
2814 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
2815 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
2816 |
|
2817 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
2818 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
2819 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
2820 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
2821 |
|
2822 sra t9, s2, 16 |
|
2823 sll t9, t9, 1 |
|
2824 addiu t8, t9, 2 |
|
2825 lhx t0, t9(a1) /* t0 = tl */ |
|
2826 lhx t1, t8(a1) /* t1 = tr */ |
|
2827 andi t1, t1, 0xffff |
|
2828 addiu a3, a3, -1 |
|
2829 lhx t2, t9(a2) /* t2 = bl */ |
|
2830 lhx t3, t8(a2) /* t3 = br */ |
|
2831 andi t3, t3, 0xffff |
|
2832 |
|
2833 CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 |
|
2834 CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 |
|
2835 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
2836 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 |
|
2837 |
|
2838 addu s2, s2, s3 /* vx += unit_x; */ |
|
2839 sh t1, 0(a0) |
|
2840 bnez a3, 0b |
|
2841 addiu a0, a0, 2 |
|
2842 |
|
2843 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
2844 1: |
|
2845 j ra |
|
2846 nop |
|
2847 |
|
2848 END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) |
|
2849 |
|
2850 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) |
|
2851 /* |
|
2852 * a0 - *dst |
|
2853 * a1 - *src_top |
|
2854 * a2 - *src_bottom |
|
2855 * a3 - w |
|
2856 * 16(sp) - wt |
|
2857 * 20(sp) - wb |
|
2858 * 24(sp) - vx |
|
2859 * 28(sp) - unit_x |
|
2860 */ |
|
2861 |
|
2862 beqz a3, 1f |
|
2863 nop |
|
2864 |
|
2865 SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
2866 |
|
2867 lw s0, 40(sp) /* s0 = wt */ |
|
2868 lw s1, 44(sp) /* s1 = wb */ |
|
2869 lw s2, 48(sp) /* s2 = vx */ |
|
2870 lw s3, 52(sp) /* s3 = unit_x */ |
|
2871 li v0, BILINEAR_INTERPOLATION_RANGE |
|
2872 li s8, 0x00ff00ff |
|
2873 |
|
2874 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2875 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2876 0: |
|
2877 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
2878 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
2879 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
2880 |
|
2881 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
2882 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
2883 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
2884 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
2885 |
|
2886 sra t9, s2, 16 |
|
2887 sll t9, t9, 2 |
|
2888 addiu t8, t9, 4 |
|
2889 lwx t0, t9(a1) /* t0 = tl */ |
|
2890 lwx t1, t8(a1) /* t1 = tr */ |
|
2891 addiu a3, a3, -1 |
|
2892 lwx t2, t9(a2) /* t2 = bl */ |
|
2893 lwx t3, t8(a2) /* t3 = br */ |
|
2894 |
|
2895 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
2896 lw t1, 0(a0) /* t1 = dest */ |
|
2897 OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6 |
|
2898 |
|
2899 addu s2, s2, s3 /* vx += unit_x; */ |
|
2900 sw t2, 0(a0) |
|
2901 bnez a3, 0b |
|
2902 addiu a0, a0, 4 |
|
2903 |
|
2904 RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
2905 1: |
|
2906 j ra |
|
2907 nop |
|
2908 |
|
2909 END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) |
|
2910 |
|
2911 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) |
|
2912 /* |
|
2913 * a0 - *dst |
|
2914 * a1 - *src_top |
|
2915 * a2 - *src_bottom |
|
2916 * a3 - w |
|
2917 * 16(sp) - wt |
|
2918 * 20(sp) - wb |
|
2919 * 24(sp) - vx |
|
2920 * 28(sp) - unit_x |
|
2921 */ |
|
2922 |
|
2923 beqz a3, 1f |
|
2924 nop |
|
2925 |
|
2926 SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2927 |
|
2928 lw s0, 36(sp) /* s0 = wt */ |
|
2929 lw s1, 40(sp) /* s1 = wb */ |
|
2930 lw s2, 44(sp) /* s2 = vx */ |
|
2931 lw s3, 48(sp) /* s3 = unit_x */ |
|
2932 li v0, BILINEAR_INTERPOLATION_RANGE |
|
2933 |
|
2934 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2935 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2936 0: |
|
2937 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
2938 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
2939 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
2940 |
|
2941 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
2942 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
2943 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
2944 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
2945 |
|
2946 sra t9, s2, 16 |
|
2947 sll t9, t9, 2 |
|
2948 addiu t8, t9, 4 |
|
2949 lwx t0, t9(a1) /* t0 = tl */ |
|
2950 lwx t1, t8(a1) /* t1 = tr */ |
|
2951 addiu a3, a3, -1 |
|
2952 lwx t2, t9(a2) /* t2 = bl */ |
|
2953 lwx t3, t8(a2) /* t3 = br */ |
|
2954 |
|
2955 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
2956 lw t1, 0(a0) |
|
2957 addu_s.qb t2, t0, t1 |
|
2958 |
|
2959 addu s2, s2, s3 /* vx += unit_x; */ |
|
2960 sw t2, 0(a0) |
|
2961 bnez a3, 0b |
|
2962 addiu a0, a0, 4 |
|
2963 |
|
2964 RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 |
|
2965 1: |
|
2966 j ra |
|
2967 nop |
|
2968 |
|
2969 END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) |
|
2970 |
|
2971 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) |
|
2972 /* |
|
2973 * a0 - *dst |
|
2974 * a1 - *mask |
|
2975 * a2 - *src_top |
|
2976 * a3 - *src_bottom |
|
2977 * 16(sp) - wt |
|
2978 * 20(sp) - wb |
|
2979 * 24(sp) - vx |
|
2980 * 28(sp) - unit_x |
|
2981 * 32(sp) - w |
|
2982 */ |
|
2983 |
|
2984 lw v1, 32(sp) |
|
2985 beqz v1, 1f |
|
2986 nop |
|
2987 |
|
2988 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
2989 |
|
2990 lw s0, 44(sp) /* s0 = wt */ |
|
2991 lw s1, 48(sp) /* s1 = wb */ |
|
2992 lw s2, 52(sp) /* s2 = vx */ |
|
2993 lw s3, 56(sp) /* s3 = unit_x */ |
|
2994 li v0, BILINEAR_INTERPOLATION_RANGE |
|
2995 li s8, 0x00ff00ff |
|
2996 |
|
2997 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2998 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
2999 0: |
|
3000 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
3001 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
3002 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
3003 |
|
3004 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
3005 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
3006 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
3007 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
3008 |
|
3009 sra t9, s2, 16 |
|
3010 sll t9, t9, 2 |
|
3011 addiu t8, t9, 4 |
|
3012 lwx t0, t9(a2) /* t0 = tl */ |
|
3013 lwx t1, t8(a2) /* t1 = tr */ |
|
3014 addiu v1, v1, -1 |
|
3015 lwx t2, t9(a3) /* t2 = bl */ |
|
3016 lwx t3, t8(a3) /* t3 = br */ |
|
3017 |
|
3018 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
3019 lbu t1, 0(a1) /* t1 = mask */ |
|
3020 addiu a1, a1, 1 |
|
3021 MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4 |
|
3022 |
|
3023 addu s2, s2, s3 /* vx += unit_x; */ |
|
3024 sw t0, 0(a0) |
|
3025 bnez v1, 0b |
|
3026 addiu a0, a0, 4 |
|
3027 |
|
3028 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
3029 1: |
|
3030 j ra |
|
3031 nop |
|
3032 |
|
3033 END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) |
|
3034 |
|
3035 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips) |
|
3036 /* |
|
3037 * a0 - *dst |
|
3038 * a1 - *mask |
|
3039 * a2 - *src_top |
|
3040 * a3 - *src_bottom |
|
3041 * 16(sp) - wt |
|
3042 * 20(sp) - wb |
|
3043 * 24(sp) - vx |
|
3044 * 28(sp) - unit_x |
|
3045 * 32(sp) - w |
|
3046 */ |
|
3047 |
|
3048 lw v1, 32(sp) |
|
3049 beqz v1, 1f |
|
3050 nop |
|
3051 |
|
3052 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
3053 |
|
3054 lw s0, 44(sp) /* s0 = wt */ |
|
3055 lw s1, 48(sp) /* s1 = wb */ |
|
3056 lw s2, 52(sp) /* s2 = vx */ |
|
3057 lw s3, 56(sp) /* s3 = unit_x */ |
|
3058 li v0, BILINEAR_INTERPOLATION_RANGE |
|
3059 li s8, 0x00ff00ff |
|
3060 |
|
3061 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3062 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3063 0: |
|
3064 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
3065 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
3066 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
3067 |
|
3068 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
3069 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
3070 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
3071 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
3072 |
|
3073 sra t9, s2, 16 |
|
3074 sll t9, t9, 2 |
|
3075 addiu t8, t9, 4 |
|
3076 lwx t0, t9(a2) /* t0 = tl */ |
|
3077 lwx t1, t8(a2) /* t1 = tr */ |
|
3078 addiu v1, v1, -1 |
|
3079 lwx t2, t9(a3) /* t2 = bl */ |
|
3080 lwx t3, t8(a3) /* t3 = br */ |
|
3081 |
|
3082 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
3083 lbu t1, 0(a1) /* t1 = mask */ |
|
3084 addiu a1, a1, 1 |
|
3085 MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4 |
|
3086 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 |
|
3087 |
|
3088 addu s2, s2, s3 /* vx += unit_x; */ |
|
3089 sh t1, 0(a0) |
|
3090 bnez v1, 0b |
|
3091 addiu a0, a0, 2 |
|
3092 |
|
3093 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
3094 1: |
|
3095 j ra |
|
3096 nop |
|
3097 |
|
3098 END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips) |
|
3099 |
|
3100 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips) |
|
3101 /* |
|
3102 * a0 - *dst |
|
3103 * a1 - *mask |
|
3104 * a2 - *src_top |
|
3105 * a3 - *src_bottom |
|
3106 * 16(sp) - wt |
|
3107 * 20(sp) - wb |
|
3108 * 24(sp) - vx |
|
3109 * 28(sp) - unit_x |
|
3110 * 32(sp) - w |
|
3111 */ |
|
3112 |
|
3113 lw t0, 32(sp) |
|
3114 beqz t0, 1f |
|
3115 nop |
|
3116 |
|
3117 SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra |
|
3118 |
|
3119 lw s0, 48(sp) /* s0 = wt */ |
|
3120 lw s1, 52(sp) /* s1 = wb */ |
|
3121 lw s2, 56(sp) /* s2 = vx */ |
|
3122 lw s3, 60(sp) /* s3 = unit_x */ |
|
3123 lw ra, 64(sp) /* ra = w */ |
|
3124 li v0, 0x00ff00ff |
|
3125 li v1, 0x07e007e0 |
|
3126 li s8, 0x001f001f |
|
3127 |
|
3128 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3129 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3130 0: |
|
3131 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
3132 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
3133 li t5, BILINEAR_INTERPOLATION_RANGE |
|
3134 subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
3135 |
|
3136 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
3137 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
3138 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
3139 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
3140 |
|
3141 sra t9, s2, 16 |
|
3142 sll t9, t9, 1 |
|
3143 addiu t8, t9, 2 |
|
3144 lhx t0, t9(a2) /* t0 = tl */ |
|
3145 lhx t1, t8(a2) /* t1 = tr */ |
|
3146 andi t1, t1, 0xffff |
|
3147 addiu ra, ra, -1 |
|
3148 lhx t2, t9(a3) /* t2 = bl */ |
|
3149 lhx t3, t8(a3) /* t3 = br */ |
|
3150 andi t3, t3, 0xffff |
|
3151 |
|
3152 CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 |
|
3153 CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 |
|
3154 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
3155 lbu t1, 0(a1) /* t1 = mask */ |
|
3156 addiu a1, a1, 1 |
|
3157 MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4 |
|
3158 |
|
3159 addu s2, s2, s3 /* vx += unit_x; */ |
|
3160 sw t0, 0(a0) |
|
3161 bnez ra, 0b |
|
3162 addiu a0, a0, 4 |
|
3163 |
|
3164 RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra |
|
3165 1: |
|
3166 j ra |
|
3167 nop |
|
3168 |
|
3169 END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips) |
|
3170 |
|
3171 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips) |
|
3172 /* |
|
3173 * a0 - *dst |
|
3174 * a1 - *mask |
|
3175 * a2 - *src_top |
|
3176 * a3 - *src_bottom |
|
3177 * 16(sp) - wt |
|
3178 * 20(sp) - wb |
|
3179 * 24(sp) - vx |
|
3180 * 28(sp) - unit_x |
|
3181 * 32(sp) - w |
|
3182 */ |
|
3183 |
|
3184 lw t0, 32(sp) |
|
3185 beqz t0, 1f |
|
3186 nop |
|
3187 |
|
3188 SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra |
|
3189 |
|
3190 lw s0, 48(sp) /* s0 = wt */ |
|
3191 lw s1, 52(sp) /* s1 = wb */ |
|
3192 lw s2, 56(sp) /* s2 = vx */ |
|
3193 lw s3, 60(sp) /* s3 = unit_x */ |
|
3194 lw ra, 64(sp) /* ra = w */ |
|
3195 li v0, 0x00ff00ff |
|
3196 li v1, 0x07e007e0 |
|
3197 li s8, 0x001f001f |
|
3198 |
|
3199 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3200 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3201 0: |
|
3202 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
3203 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
3204 li t5, BILINEAR_INTERPOLATION_RANGE |
|
3205 subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
3206 |
|
3207 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
3208 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
3209 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
3210 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
3211 |
|
3212 sra t9, s2, 16 |
|
3213 sll t9, t9, 1 |
|
3214 addiu t8, t9, 2 |
|
3215 lhx t0, t9(a2) /* t0 = tl */ |
|
3216 lhx t1, t8(a2) /* t1 = tr */ |
|
3217 andi t1, t1, 0xffff |
|
3218 addiu ra, ra, -1 |
|
3219 lhx t2, t9(a3) /* t2 = bl */ |
|
3220 lhx t3, t8(a3) /* t3 = br */ |
|
3221 andi t3, t3, 0xffff |
|
3222 |
|
3223 CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 |
|
3224 CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 |
|
3225 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
3226 lbu t1, 0(a1) /* t1 = mask */ |
|
3227 addiu a1, a1, 1 |
|
3228 MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4 |
|
3229 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 |
|
3230 |
|
3231 addu s2, s2, s3 /* vx += unit_x; */ |
|
3232 sh t1, 0(a0) |
|
3233 bnez ra, 0b |
|
3234 addiu a0, a0, 2 |
|
3235 |
|
3236 RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra |
|
3237 1: |
|
3238 j ra |
|
3239 nop |
|
3240 |
|
3241 END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips) |
|
3242 |
|
3243 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) |
|
3244 /* |
|
3245 * a0 - dst (a8r8g8b8) |
|
3246 * a1 - mask (a8) |
|
3247 * a2 - src_top (a8r8g8b8) |
|
3248 * a3 - src_bottom (a8r8g8b8) |
|
3249 * 16(sp) - wt |
|
3250 * 20(sp) - wb |
|
3251 * 24(sp) - vx |
|
3252 * 28(sp) - unit_x |
|
3253 * 32(sp) - w |
|
3254 */ |
|
3255 |
|
3256 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
3257 |
|
3258 lw v1, 60(sp) /* v1 = w(sp + 32 + 28 save regs stack offset)*/ |
|
3259 beqz v1, 1f |
|
3260 nop |
|
3261 |
|
3262 lw s0, 44(sp) /* s0 = wt */ |
|
3263 lw s1, 48(sp) /* s1 = wb */ |
|
3264 lw s2, 52(sp) /* s2 = vx */ |
|
3265 lw s3, 56(sp) /* s3 = unit_x */ |
|
3266 li v0, BILINEAR_INTERPOLATION_RANGE |
|
3267 li s8, 0x00ff00ff |
|
3268 |
|
3269 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3270 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3271 |
|
3272 0: |
|
3273 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
3274 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
3275 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
3276 |
|
3277 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
3278 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
3279 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
3280 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
3281 |
|
3282 sra t9, s2, 16 |
|
3283 sll t9, t9, 2 |
|
3284 addiu t8, t9, 4 |
|
3285 lwx t0, t9(a2) /* t0 = tl */ |
|
3286 lwx t1, t8(a2) /* t1 = tr */ |
|
3287 addiu v1, v1, -1 |
|
3288 lwx t2, t9(a3) /* t2 = bl */ |
|
3289 lwx t3, t8(a3) /* t3 = br */ |
|
3290 |
|
3291 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \ |
|
3292 t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
3293 lbu t1, 0(a1) /* t1 = mask */ |
|
3294 lw t2, 0(a0) /* t2 = dst */ |
|
3295 addiu a1, a1, 1 |
|
3296 OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6 |
|
3297 |
|
3298 addu s2, s2, s3 /* vx += unit_x; */ |
|
3299 sw t0, 0(a0) |
|
3300 bnez v1, 0b |
|
3301 addiu a0, a0, 4 |
|
3302 |
|
3303 1: |
|
3304 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
3305 j ra |
|
3306 nop |
|
3307 |
|
3308 END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) |
|
3309 |
|
3310 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips) |
|
3311 /* |
|
3312 * a0 - *dst |
|
3313 * a1 - *mask |
|
3314 * a2 - *src_top |
|
3315 * a3 - *src_bottom |
|
3316 * 16(sp) - wt |
|
3317 * 20(sp) - wb |
|
3318 * 24(sp) - vx |
|
3319 * 28(sp) - unit_x |
|
3320 * 32(sp) - w |
|
3321 */ |
|
3322 |
|
3323 lw v1, 32(sp) |
|
3324 beqz v1, 1f |
|
3325 nop |
|
3326 |
|
3327 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
3328 |
|
3329 lw s0, 44(sp) /* s0 = wt */ |
|
3330 lw s1, 48(sp) /* s1 = wb */ |
|
3331 lw s2, 52(sp) /* s2 = vx */ |
|
3332 lw s3, 56(sp) /* s3 = unit_x */ |
|
3333 li v0, BILINEAR_INTERPOLATION_RANGE |
|
3334 li s8, 0x00ff00ff |
|
3335 |
|
3336 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3337 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) |
|
3338 0: |
|
3339 andi t4, s2, 0xffff /* t4 = (short)vx */ |
|
3340 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ |
|
3341 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ |
|
3342 |
|
3343 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ |
|
3344 mul s5, s0, t4 /* s5 = wt*(vx>>8) */ |
|
3345 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ |
|
3346 mul s7, s1, t4 /* s7 = wb*(vx>>8) */ |
|
3347 |
|
3348 sra t9, s2, 16 |
|
3349 sll t9, t9, 2 |
|
3350 addiu t8, t9, 4 |
|
3351 lwx t0, t9(a2) /* t0 = tl */ |
|
3352 lwx t1, t8(a2) /* t1 = tr */ |
|
3353 addiu v1, v1, -1 |
|
3354 lwx t2, t9(a3) /* t2 = bl */ |
|
3355 lwx t3, t8(a3) /* t3 = br */ |
|
3356 |
|
3357 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 |
|
3358 lbu t1, 0(a1) /* t1 = mask */ |
|
3359 lw t2, 0(a0) /* t2 = dst */ |
|
3360 addiu a1, a1, 1 |
|
3361 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5 |
|
3362 |
|
3363 addu s2, s2, s3 /* vx += unit_x; */ |
|
3364 sw t0, 0(a0) |
|
3365 bnez v1, 0b |
|
3366 addiu a0, a0, 4 |
|
3367 |
|
3368 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 |
|
3369 1: |
|
3370 j ra |
|
3371 nop |
|
3372 |
|
3373 END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips) |