gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:affdd09a3165
1 /*
2 * Copyright (c) 2012
3 * MIPS Technologies, Inc., California.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * Author: Nemanja Lukic (nlukic@mips.com)
30 */
31
32 #include "pixman-private.h"
33 #include "pixman-mips-dspr2-asm.h"
34
35 LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
36 /*
37 * a0 - *dest
38 * a1 - count (bytes)
39 * a2 - value to fill buffer with
40 */
41
42 beqz a1, 3f
43 andi t1, a0, 0x0002
44 beqz t1, 0f /* check if address is 4-byte aligned */
45 nop
46 sh a2, 0(a0)
47 addiu a0, a0, 2
48 addiu a1, a1, -2
49 0:
50 srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
51 replv.ph a2, a2 /* replicate fill value (16bit) in a2 */
52 beqz t1, 2f
53 nop
54 1:
55 addiu t1, t1, -1
56 beqz t1, 11f
57 addiu a1, a1, -32
58 pref 30, 32(a0)
59 sw a2, 0(a0)
60 sw a2, 4(a0)
61 sw a2, 8(a0)
62 sw a2, 12(a0)
63 sw a2, 16(a0)
64 sw a2, 20(a0)
65 sw a2, 24(a0)
66 sw a2, 28(a0)
67 b 1b
68 addiu a0, a0, 32
69 11:
70 sw a2, 0(a0)
71 sw a2, 4(a0)
72 sw a2, 8(a0)
73 sw a2, 12(a0)
74 sw a2, 16(a0)
75 sw a2, 20(a0)
76 sw a2, 24(a0)
77 sw a2, 28(a0)
78 addiu a0, a0, 32
79 2:
80 blez a1, 3f
81 addiu a1, a1, -2
82 sh a2, 0(a0)
83 b 2b
84 addiu a0, a0, 2
85 3:
86 jr ra
87 nop
88
89 END(pixman_fill_buff16_mips)
90
91 LEAF_MIPS32R2(pixman_fill_buff32_mips)
92 /*
93 * a0 - *dest
94 * a1 - count (bytes)
95 * a2 - value to fill buffer with
96 */
97
98 beqz a1, 3f
99 nop
100 srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
101 beqz t1, 2f
102 nop
103 1:
104 addiu t1, t1, -1
105 beqz t1, 11f
106 addiu a1, a1, -32
107 pref 30, 32(a0)
108 sw a2, 0(a0)
109 sw a2, 4(a0)
110 sw a2, 8(a0)
111 sw a2, 12(a0)
112 sw a2, 16(a0)
113 sw a2, 20(a0)
114 sw a2, 24(a0)
115 sw a2, 28(a0)
116 b 1b
117 addiu a0, a0, 32
118 11:
119 sw a2, 0(a0)
120 sw a2, 4(a0)
121 sw a2, 8(a0)
122 sw a2, 12(a0)
123 sw a2, 16(a0)
124 sw a2, 20(a0)
125 sw a2, 24(a0)
126 sw a2, 28(a0)
127 addiu a0, a0, 32
128 2:
129 blez a1, 3f
130 addiu a1, a1, -4
131 sw a2, 0(a0)
132 b 2b
133 addiu a0, a0, 4
134 3:
135 jr ra
136 nop
137
138 END(pixman_fill_buff32_mips)
139
140 LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
141 /*
142 * a0 - dst (r5g6b5)
143 * a1 - src (a8r8g8b8)
144 * a2 - w
145 */
146
147 beqz a2, 3f
148 nop
149 addiu t1, a2, -1
150 beqz t1, 2f
151 nop
152 li t4, 0xf800f800
153 li t5, 0x07e007e0
154 li t6, 0x001f001f
155 1:
156 lw t0, 0(a1)
157 lw t1, 4(a1)
158 addiu a1, a1, 8
159 addiu a2, a2, -2
160
161 CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8
162
163 sh t2, 0(a0)
164 sh t3, 2(a0)
165
166 addiu t2, a2, -1
167 bgtz t2, 1b
168 addiu a0, a0, 4
169 2:
170 beqz a2, 3f
171 nop
172 lw t0, 0(a1)
173
174 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
175
176 sh t1, 0(a0)
177 3:
178 j ra
179 nop
180
181 END(pixman_composite_src_8888_0565_asm_mips)
182
183 LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
184 /*
185 * a0 - dst (a8r8g8b8)
186 * a1 - src (r5g6b5)
187 * a2 - w
188 */
189
190 beqz a2, 3f
191 nop
192 addiu t1, a2, -1
193 beqz t1, 2f
194 nop
195 li t4, 0x07e007e0
196 li t5, 0x001F001F
197 1:
198 lhu t0, 0(a1)
199 lhu t1, 2(a1)
200 addiu a1, a1, 4
201 addiu a2, a2, -2
202
203 CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
204
205 sw t2, 0(a0)
206 sw t3, 4(a0)
207
208 addiu t2, a2, -1
209 bgtz t2, 1b
210 addiu a0, a0, 8
211 2:
212 beqz a2, 3f
213 nop
214 lhu t0, 0(a1)
215
216 CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
217
218 sw t1, 0(a0)
219 3:
220 j ra
221 nop
222
223 END(pixman_composite_src_0565_8888_asm_mips)
224
225 LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
226 /*
227 * a0 - dst (a8r8g8b8)
228 * a1 - src (x8r8g8b8)
229 * a2 - w
230 */
231
232 beqz a2, 4f
233 nop
234 li t9, 0xff000000
235 srl t8, a2, 3 /* t1 = how many multiples of 8 src pixels */
236 beqz t8, 3f /* branch if less than 8 src pixels */
237 nop
238 1:
239 addiu t8, t8, -1
240 beqz t8, 2f
241 addiu a2, a2, -8
242 pref 0, 32(a1)
243 lw t0, 0(a1)
244 lw t1, 4(a1)
245 lw t2, 8(a1)
246 lw t3, 12(a1)
247 lw t4, 16(a1)
248 lw t5, 20(a1)
249 lw t6, 24(a1)
250 lw t7, 28(a1)
251 addiu a1, a1, 32
252 or t0, t0, t9
253 or t1, t1, t9
254 or t2, t2, t9
255 or t3, t3, t9
256 or t4, t4, t9
257 or t5, t5, t9
258 or t6, t6, t9
259 or t7, t7, t9
260 pref 30, 32(a0)
261 sw t0, 0(a0)
262 sw t1, 4(a0)
263 sw t2, 8(a0)
264 sw t3, 12(a0)
265 sw t4, 16(a0)
266 sw t5, 20(a0)
267 sw t6, 24(a0)
268 sw t7, 28(a0)
269 b 1b
270 addiu a0, a0, 32
271 2:
272 lw t0, 0(a1)
273 lw t1, 4(a1)
274 lw t2, 8(a1)
275 lw t3, 12(a1)
276 lw t4, 16(a1)
277 lw t5, 20(a1)
278 lw t6, 24(a1)
279 lw t7, 28(a1)
280 addiu a1, a1, 32
281 or t0, t0, t9
282 or t1, t1, t9
283 or t2, t2, t9
284 or t3, t3, t9
285 or t4, t4, t9
286 or t5, t5, t9
287 or t6, t6, t9
288 or t7, t7, t9
289 sw t0, 0(a0)
290 sw t1, 4(a0)
291 sw t2, 8(a0)
292 sw t3, 12(a0)
293 sw t4, 16(a0)
294 sw t5, 20(a0)
295 sw t6, 24(a0)
296 sw t7, 28(a0)
297 beqz a2, 4f
298 addiu a0, a0, 32
299 3:
300 lw t0, 0(a1)
301 addiu a1, a1, 4
302 addiu a2, a2, -1
303 or t1, t0, t9
304 sw t1, 0(a0)
305 bnez a2, 3b
306 addiu a0, a0, 4
307 4:
308 jr ra
309 nop
310
311 END(pixman_composite_src_x888_8888_asm_mips)
312
313 LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
314 /*
315 * a0 - dst (a8r8g8b8)
316 * a1 - src (32bit constant)
317 * a2 - mask (a8)
318 * a3 - w
319 */
320
321
322 SAVE_REGS_ON_STACK 0, v0
323 li v0, 0x00ff00ff
324
325 beqz a3, 3f
326 nop
327 addiu t1, a3, -1
328 beqz t1, 2f
329 nop
330
331 1:
332 /* a1 = source (32bit constant) */
333 lbu t0, 0(a2) /* t2 = mask (a8) */
334 lbu t1, 1(a2) /* t3 = mask (a8) */
335 addiu a2, a2, 2
336
337 MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9
338
339 sw t2, 0(a0)
340 sw t3, 4(a0)
341 addiu a3, a3, -2
342 addiu t2, a3, -1
343 bgtz t2, 1b
344 addiu a0, a0, 8
345
346 beqz a3, 3f
347 nop
348
349 2:
350 lbu t0, 0(a2)
351 addiu a2, a2, 1
352
353 MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5
354
355 sw t1, 0(a0)
356 addiu a3, a3, -1
357 addiu a0, a0, 4
358
359 3:
360 RESTORE_REGS_FROM_STACK 0, v0
361 j ra
362 nop
363
364 END(pixman_composite_src_n_8_8888_asm_mips)
365
366 LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
367 /*
368 * a0 - dst (a8)
369 * a1 - src (32bit constant)
370 * a2 - mask (a8)
371 * a3 - w
372 */
373
374 li t9, 0x00ff00ff
375 beqz a3, 3f
376 nop
377 srl t7, a3, 2 /* t7 = how many multiples of 4 dst pixels */
378 beqz t7, 1f /* branch if less than 4 src pixels */
379 nop
380
381 srl t8, a1, 24
382 replv.ph t8, t8
383
384 0:
385 beqz t7, 1f
386 addiu t7, t7, -1
387 lbu t0, 0(a2)
388 lbu t1, 1(a2)
389 lbu t2, 2(a2)
390 lbu t3, 3(a2)
391
392 addiu a2, a2, 4
393
394 precr_sra.ph.w t1, t0, 0
395 precr_sra.ph.w t3, t2, 0
396 precr.qb.ph t0, t3, t1
397
398 muleu_s.ph.qbl t2, t0, t8
399 muleu_s.ph.qbr t3, t0, t8
400 shra_r.ph t4, t2, 8
401 shra_r.ph t5, t3, 8
402 and t4, t4, t9
403 and t5, t5, t9
404 addq.ph t2, t2, t4
405 addq.ph t3, t3, t5
406 shra_r.ph t2, t2, 8
407 shra_r.ph t3, t3, 8
408 precr.qb.ph t2, t2, t3
409
410 sb t2, 0(a0)
411 srl t2, t2, 8
412 sb t2, 1(a0)
413 srl t2, t2, 8
414 sb t2, 2(a0)
415 srl t2, t2, 8
416 sb t2, 3(a0)
417 addiu a3, a3, -4
418 b 0b
419 addiu a0, a0, 4
420
421 1:
422 beqz a3, 3f
423 nop
424 srl t8, a1, 24
425 2:
426 lbu t0, 0(a2)
427 addiu a2, a2, 1
428
429 mul t2, t0, t8
430 shra_r.ph t3, t2, 8
431 andi t3, t3, 0x00ff
432 addq.ph t2, t2, t3
433 shra_r.ph t2, t2, 8
434
435 sb t2, 0(a0)
436 addiu a3, a3, -1
437 bnez a3, 2b
438 addiu a0, a0, 1
439
440 3:
441 j ra
442 nop
443
444 END(pixman_composite_src_n_8_8_asm_mips)
445
446 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
447 /*
448 * a0 - dst (a8r8g8b8)
449 * a1 - src (32bit constant)
450 * a2 - mask (a8r8g8b8)
451 * a3 - w
452 */
453
454 SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
455 beqz a3, 4f
456 nop
457 li t6, 0xff
458 addiu t7, zero, -1 /* t7 = 0xffffffff */
459 srl t8, a1, 24 /* t8 = srca */
460 li t9, 0x00ff00ff
461 addiu t1, a3, -1
462 beqz t1, 3f /* last pixel */
463 nop
464 beq t8, t6, 2f /* if (srca == 0xff) */
465 nop
466 1:
467 /* a1 = src */
468 lw t0, 0(a2) /* t0 = mask */
469 lw t1, 4(a2) /* t1 = mask */
470 or t2, t0, t1
471 beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */
472 addiu a2, a2, 8
473 and t3, t0, t1
474 move t4, a1 /* t4 = src */
475 move t5, a1 /* t5 = src */
476 lw t2, 0(a0) /* t2 = dst */
477 beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
478 lw t3, 4(a0) /* t3 = dst */
479 MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
480 MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
481 11:
482 not t0, t0
483 not t1, t1
484 MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
485 addu_s.qb t2, t4, t2
486 addu_s.qb t3, t5, t3
487 sw t2, 0(a0)
488 sw t3, 4(a0)
489 12:
490 addiu a3, a3, -2
491 addiu t1, a3, -1
492 bgtz t1, 1b
493 addiu a0, a0, 8
494 b 3f
495 nop
496 2:
497 /* a1 = src */
498 lw t0, 0(a2) /* t0 = mask */
499 lw t1, 4(a2) /* t1 = mask */
500 or t2, t0, t1
501 beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */
502 addiu a2, a2, 8
503 and t2, t0, t1
504 move t4, a1
505 beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
506 move t5, a1
507 lw t2, 0(a0) /* t2 = dst */
508 lw t3, 4(a0) /* t3 = dst */
509 MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
510 not t0, t0
511 not t1, t1
512 MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
513 addu_s.qb t4, t4, t2
514 addu_s.qb t5, t5, t3
515 21:
516 sw t4, 0(a0)
517 sw t5, 4(a0)
518 22:
519 addiu a3, a3, -2
520 addiu t1, a3, -1
521 bgtz t1, 2b
522 addiu a0, a0, 8
523 3:
524 blez a3, 4f
525 nop
526 /* a1 = src */
527 lw t1, 0(a2) /* t1 = mask */
528 beqz t1, 4f
529 nop
530 move t2, a1 /* t2 = src */
531 beq t1, t7, 31f
532 lw t0, 0(a0) /* t0 = dst */
533
534 MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6
535 MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5
536 31:
537 not t1, t1
538 MIPS_UN8x4_MUL_UN8x4 t0, t1, t0, t9, t3, t4, t5, t6
539 addu_s.qb t0, t2, t0
540 sw t0, 0(a0)
541 4:
542 RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
543 j ra
544 nop
545
546 END(pixman_composite_over_n_8888_8888_ca_asm_mips)
547
548 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
549 /*
550 * a0 - dst (r5g6b5)
551 * a1 - src (32bit constant)
552 * a2 - mask (a8r8g8b8)
553 * a3 - w
554 */
555
556 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
557 beqz a3, 4f
558 nop
559 li t5, 0xf800f800
560 li t6, 0x07e007e0
561 li t7, 0x001F001F
562 li t9, 0x00ff00ff
563
564 srl t8, a1, 24 /* t8 = srca */
565 addiu t1, a3, -1
566 beqz t1, 3f /* last pixel */
567 nop
568 li s0, 0xff /* s0 = 0xff */
569 addiu s1, zero, -1 /* s1 = 0xffffffff */
570
571 beq t8, s0, 2f /* if (srca == 0xff) */
572 nop
573 1:
574 /* a1 = src */
575 lw t0, 0(a2) /* t0 = mask */
576 lw t1, 4(a2) /* t1 = mask */
577 or t2, t0, t1
578 beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */
579 addiu a2, a2, 8
580 and t3, t0, t1
581 move s2, a1 /* s2 = src */
582 move s3, a1 /* s3 = src */
583 lhu t2, 0(a0) /* t2 = dst */
584 beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
585 lhu t3, 2(a0) /* t3 = dst */
586 MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
587 MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8
588 11:
589 not t0, t0
590 not t1, t1
591 CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
592 MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1
593 addu_s.qb s2, s2, s4
594 addu_s.qb s3, s3, s5
595 CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5
596 sh t2, 0(a0)
597 sh t3, 2(a0)
598 12:
599 addiu a3, a3, -2
600 addiu t1, a3, -1
601 bgtz t1, 1b
602 addiu a0, a0, 4
603 b 3f
604 nop
605 2:
606 /* a1 = src */
607 lw t0, 0(a2) /* t0 = mask */
608 lw t1, 4(a2) /* t1 = mask */
609 or t2, t0, t1
610 beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */
611 addiu a2, a2, 8
612 and t3, t0, t1
613 move t2, a1
614 beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
615 move t3, a1
616 lhu t2, 0(a0) /* t2 = dst */
617 lhu t3, 2(a0) /* t3 = dst */
618 MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
619 not t0, t0
620 not t1, t1
621 CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
622 MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3
623 addu_s.qb t2, s2, s4
624 addu_s.qb t3, s3, s5
625 21:
626 CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3
627 sh t0, 0(a0)
628 sh t1, 2(a0)
629 22:
630 addiu a3, a3, -2
631 addiu t1, a3, -1
632 bgtz t1, 2b
633 addiu a0, a0, 4
634 3:
635 blez a3, 4f
636 nop
637 /* a1 = src */
638 lw t1, 0(a2) /* t1 = mask */
639 beqz t1, 4f
640 nop
641 move t2, a1 /* t2 = src */
642 beq t1, t7, 31f
643 lhu t0, 0(a0) /* t0 = dst */
644
645 MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6
646 MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5
647 31:
648 not t1, t1
649 CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3
650 MIPS_UN8x4_MUL_UN8x4 s1, t1, t3, t9, t4, t5, t6, t7
651 addu_s.qb t0, t2, t3
652 CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3
653 sh s1, 0(a0)
654 4:
655 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
656 j ra
657 nop
658
659 END(pixman_composite_over_n_8888_0565_ca_asm_mips)
660
661 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
662 /*
663 * a0 - dst (a8r8g8b8)
664 * a1 - src (32bit constant)
665 * a2 - mask (a8)
666 * a3 - w
667 */
668
669 SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4
670 beqz a3, 4f
671 nop
672 li t4, 0x00ff00ff
673 li t5, 0xff
674 addiu t0, a3, -1
675 beqz t0, 3f /* last pixel */
676 srl t6, a1, 24 /* t6 = srca */
677 not s4, a1
678 beq t5, t6, 2f /* if (srca == 0xff) */
679 srl s4, s4, 24
680 1:
681 /* a1 = src */
682 lbu t0, 0(a2) /* t0 = mask */
683 lbu t1, 1(a2) /* t1 = mask */
684 or t2, t0, t1
685 beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */
686 addiu a2, a2, 2
687 and t3, t0, t1
688
689 lw t2, 0(a0) /* t2 = dst */
690 beq t3, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */
691 lw t3, 4(a0) /* t3 = dst */
692
693 MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3
694 not s2, s0
695 not s3, s1
696 srl s2, s2, 24
697 srl s3, s3, 24
698 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9
699 addu_s.qb s2, t2, s0
700 addu_s.qb s3, t3, s1
701 sw s2, 0(a0)
702 b 111f
703 sw s3, 4(a0)
704 11:
705 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9
706 addu_s.qb s2, t2, a1
707 addu_s.qb s3, t3, a1
708 sw s2, 0(a0)
709 sw s3, 4(a0)
710
711 111:
712 addiu a3, a3, -2
713 addiu t0, a3, -1
714 bgtz t0, 1b
715 addiu a0, a0, 8
716 b 3f
717 nop
718 2:
719 /* a1 = src */
720 lbu t0, 0(a2) /* t0 = mask */
721 lbu t1, 1(a2) /* t1 = mask */
722 or t2, t0, t1
723 beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */
724 addiu a2, a2, 2
725 and t3, t0, t1
726 beq t3, t5, 22f /* if (t0 == 0xff) && (t1 == 0xff) */
727 nop
728 lw t2, 0(a0) /* t2 = dst */
729 lw t3, 4(a0) /* t3 = dst */
730
731 OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \
732 t6, t7, t4, t8, t9, s0, s1, s2, s3
733 sw t6, 0(a0)
734 b 222f
735 sw t7, 4(a0)
736 22:
737 sw a1, 0(a0)
738 sw a1, 4(a0)
739 222:
740 addiu a3, a3, -2
741 addiu t0, a3, -1
742 bgtz t0, 2b
743 addiu a0, a0, 8
744 3:
745 blez a3, 4f
746 nop
747 /* a1 = src */
748 lbu t0, 0(a2) /* t0 = mask */
749 beqz t0, 4f /* if (t0 == 0) */
750 addiu a2, a2, 1
751 move t3, a1
752 beq t0, t5, 31f /* if (t0 == 0xff) */
753 lw t1, 0(a0) /* t1 = dst */
754
755 MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
756 31:
757 not t2, t3
758 srl t2, t2, 24
759 MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
760 addu_s.qb t2, t1, t3
761 sw t2, 0(a0)
762 4:
763 RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4
764 j ra
765 nop
766
767 END(pixman_composite_over_n_8_8888_asm_mips)
768
769 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
770 /*
771 * a0 - dst (r5g6b5)
772 * a1 - src (32bit constant)
773 * a2 - mask (a8)
774 * a3 - w
775 */
776 SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
777 beqz a3, 4f
778 nop
779 li t4, 0x00ff00ff
780 li t5, 0xff
781 li t6, 0xf800f800
782 li t7, 0x07e007e0
783 li t8, 0x001F001F
784 addiu t1, a3, -1
785 beqz t1, 3f /* last pixel */
786 srl t0, a1, 24 /* t0 = srca */
787 not v0, a1
788 beq t0, t5, 2f /* if (srca == 0xff) */
789 srl v0, v0, 24
790 1:
791 /* a1 = src */
792 lbu t0, 0(a2) /* t0 = mask */
793 lbu t1, 1(a2) /* t1 = mask */
794 or t2, t0, t1
795 beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */
796 addiu a2, a2, 2
797 lhu t2, 0(a0) /* t2 = dst */
798 lhu t3, 2(a0) /* t3 = dst */
799 CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4
800 and t9, t0, t1
801 beq t9, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */
802 nop
803
804 MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8
805 not s4, s2
806 not s5, s3
807 srl s4, s4, 24
808 srl s5, s5, 24
809 MIPS_2xUN8x4_MUL_2xUN8 s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8
810 addu_s.qb s4, s2, s0
811 addu_s.qb s5, s3, s1
812 CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
813 sh t2, 0(a0)
814 b 111f
815 sh t3, 2(a0)
816 11:
817 MIPS_2xUN8x4_MUL_2xUN8 s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8
818 addu_s.qb s4, a1, s0
819 addu_s.qb s5, a1, s1
820 CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
821 sh t2, 0(a0)
822 sh t3, 2(a0)
823 111:
824 addiu a3, a3, -2
825 addiu t0, a3, -1
826 bgtz t0, 1b
827 addiu a0, a0, 4
828 b 3f
829 nop
830 2:
831 CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2
832 21:
833 /* a1 = src */
834 lbu t0, 0(a2) /* t0 = mask */
835 lbu t1, 1(a2) /* t1 = mask */
836 or t2, t0, t1
837 beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */
838 addiu a2, a2, 2
839 and t9, t0, t1
840 move s2, s0
841 beq t9, t5, 22f /* if (t0 == 0xff) && (t2 == 0xff) */
842 move s3, s0
843 lhu t2, 0(a0) /* t2 = dst */
844 lhu t3, 2(a0) /* t3 = dst */
845
846 CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7
847 OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, s2, s3, \
848 t2, t3, t4, t9, s4, s5, s6, s7, s8
849 CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5
850 22:
851 sh s2, 0(a0)
852 sh s3, 2(a0)
853 222:
854 addiu a3, a3, -2
855 addiu t0, a3, -1
856 bgtz t0, 21b
857 addiu a0, a0, 4
858 3:
859 blez a3, 4f
860 nop
861 /* a1 = src */
862 lbu t0, 0(a2) /* t0 = mask */
863 beqz t0, 4f /* if (t0 == 0) */
864 nop
865 lhu t1, 0(a0) /* t1 = dst */
866 CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
867 beq t0, t5, 31f /* if (t0 == 0xff) */
868 move t3, a1
869
870 MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t7, t8, t9
871 31:
872 not t6, t3
873 srl t6, t6, 24
874 MIPS_UN8x4_MUL_UN8 t2, t6, t2, t4, t7, t8, t9
875 addu_s.qb t1, t2, t3
876 CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
877 sh t2, 0(a0)
878 4:
879 RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
880 j ra
881 nop
882
883 END(pixman_composite_over_n_8_0565_asm_mips)
884
885 LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
886 /*
887 * a0 - dst (a8r8g8b8)
888 * a1 - src (a8r8g8b8)
889 * a2 - mask (32bit constant)
890 * a3 - w
891 */
892
893 SAVE_REGS_ON_STACK 0, s0
894 li t4, 0x00ff00ff
895 beqz a3, 3f
896 nop
897 addiu t1, a3, -1
898 srl a2, a2, 24
899 beqz t1, 2f
900 nop
901
902 1:
903 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
904 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
905 /* a2 = mask (32bit constant) */
906 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
907 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
908 addiu a1, a1, 8
909
910 OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \
911 t5, t6, t4, t7, t8, t9, t0, t1, s0
912
913 sw t5, 0(a0)
914 sw t6, 4(a0)
915 addiu a3, a3, -2
916 addiu t1, a3, -1
917 bgtz t1, 1b
918 addiu a0, a0, 8
919 2:
920 beqz a3, 3f
921 nop
922 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
923 /* a2 = mask (32bit constant) */
924 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
925
926 OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8
927
928 sw t3, 0(a0)
929 3:
930 RESTORE_REGS_FROM_STACK 0, s0
931 j ra
932 nop
933
934 END(pixman_composite_over_8888_n_8888_asm_mips)
935
936 LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
937 /*
938 * a0 - dst (r5g6b5)
939 * a1 - src (a8r8g8b8)
940 * a2 - mask (32bit constant)
941 * a3 - w
942 */
943
944 SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
945 li t6, 0x00ff00ff
946 li t7, 0xf800f800
947 li t8, 0x07e007e0
948 li t9, 0x001F001F
949 beqz a3, 3f
950 nop
951 srl a2, a2, 24
952 addiu t1, a3, -1
953 beqz t1, 2f
954 nop
955 1:
956 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
957 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
958 /* a2 = mask (32bit constant) */
959 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
960 lhu t3, 2(a0) /* t2 = destination (r5g6b5) */
961 addiu a1, a1, 8
962
963 CONVERT_2x0565_TO_2x8888 t2, t3, t4, t5, t8, t9, s0, s1, t2, t3
964 OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t4, t5, \
965 t2, t3, t6, t0, t1, s0, s1, s2, s3
966 CONVERT_2x8888_TO_2x0565 t2, t3, t4, t5, t7, t8, t9, s0, s1
967
968 sh t4, 0(a0)
969 sh t5, 2(a0)
970 addiu a3, a3, -2
971 addiu t1, a3, -1
972 bgtz t1, 1b
973 addiu a0, a0, 4
974 2:
975 beqz a3, 3f
976 nop
977 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
978 /* a2 = mask (32bit constant) */
979 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
980
981 CONVERT_1x0565_TO_1x8888 t1, t2, t4, t5
982 OVER_8888_8_8888 t0, a2, t2, t1, t6, t3, t4, t5, t7
983 CONVERT_1x8888_TO_1x0565 t1, t3, t4, t5
984
985 sh t3, 0(a0)
986 3:
987 RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
988 j ra
989 nop
990
991 END(pixman_composite_over_8888_n_0565_asm_mips)
992
993 LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
994 /*
995 * a0 - dst (r5g6b5)
996 * a1 - src (r5g6b5)
997 * a2 - mask (32bit constant)
998 * a3 - w
999 */
1000
1001 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
1002 li t6, 0x00ff00ff
1003 li t7, 0xf800f800
1004 li t8, 0x07e007e0
1005 li t9, 0x001F001F
1006 beqz a3, 3f
1007 nop
1008 srl a2, a2, 24
1009 addiu t1, a3, -1
1010 beqz t1, 2f
1011 nop
1012 1:
1013 lhu t0, 0(a1) /* t0 = source (r5g6b5) */
1014 lhu t1, 2(a1) /* t1 = source (r5g6b5) */
1015 /* a2 = mask (32bit constant) */
1016 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
1017 lhu t3, 2(a0) /* t3 = destination (r5g6b5) */
1018 addiu a1, a1, 4
1019
1020 CONVERT_2x0565_TO_2x8888 t0, t1, t4, t5, t8, t9, s0, s1, s2, s3
1021 CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t8, t9, s2, s3, s4, s5
1022 OVER_2x8888_2x8_2x8888 t4, t5, a2, a2, s0, s1, \
1023 t0, t1, t6, s2, s3, s4, s5, t4, t5
1024 CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t7, t8, t9, s2, s3
1025
1026 sh s0, 0(a0)
1027 sh s1, 2(a0)
1028 addiu a3, a3, -2
1029 addiu t1, a3, -1
1030 bgtz t1, 1b
1031 addiu a0, a0, 4
1032 2:
1033 beqz a3, 3f
1034 nop
1035 lhu t0, 0(a1) /* t0 = source (r5g6b5) */
1036 /* a2 = mask (32bit constant) */
1037 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
1038
1039 CONVERT_1x0565_TO_1x8888 t0, t2, t4, t5
1040 CONVERT_1x0565_TO_1x8888 t1, t3, t4, t5
1041 OVER_8888_8_8888 t2, a2, t3, t0, t6, t1, t4, t5, t7
1042 CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
1043
1044 sh t3, 0(a0)
1045 3:
1046 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
1047 j ra
1048 nop
1049
1050 END(pixman_composite_over_0565_n_0565_asm_mips)
1051
1052 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
1053 /*
1054 * a0 - dst (a8r8g8b8)
1055 * a1 - src (a8r8g8b8)
1056 * a2 - mask (a8)
1057 * a3 - w
1058 */
1059
1060 SAVE_REGS_ON_STACK 0, s0, s1
1061 li t4, 0x00ff00ff
1062 beqz a3, 3f
1063 nop
1064 addiu t1, a3, -1
1065 beqz t1, 2f
1066 nop
1067 1:
1068 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1069 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
1070 lbu t2, 0(a2) /* t2 = mask (a8) */
1071 lbu t3, 1(a2) /* t3 = mask (a8) */
1072 lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */
1073 lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */
1074 addiu a1, a1, 8
1075 addiu a2, a2, 2
1076
1077 OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \
1078 t7, t8, t4, t9, s0, s1, t0, t1, t2
1079
1080 sw t7, 0(a0)
1081 sw t8, 4(a0)
1082 addiu a3, a3, -2
1083 addiu t1, a3, -1
1084 bgtz t1, 1b
1085 addiu a0, a0, 8
1086 2:
1087 beqz a3, 3f
1088 nop
1089 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1090 lbu t1, 0(a2) /* t1 = mask (a8) */
1091 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1092
1093 OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
1094
1095 sw t3, 0(a0)
1096 3:
1097 RESTORE_REGS_FROM_STACK 0, s0, s1
1098 j ra
1099 nop
1100
1101 END(pixman_composite_over_8888_8_8888_asm_mips)
1102
1103 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
1104 /*
1105 * a0 - dst (r5g6b5)
1106 * a1 - src (a8r8g8b8)
1107 * a2 - mask (a8)
1108 * a3 - w
1109 */
1110
1111 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
1112 li t6, 0x00ff00ff
1113 li t7, 0xf800f800
1114 li t8, 0x07e007e0
1115 li t9, 0x001F001F
1116 beqz a3, 3f
1117 nop
1118 addiu t1, a3, -1
1119 beqz t1, 2f
1120 nop
1121 1:
1122 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1123 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
1124 lbu t2, 0(a2) /* t2 = mask (a8) */
1125 lbu t3, 1(a2) /* t3 = mask (a8) */
1126 lhu t4, 0(a0) /* t4 = destination (r5g6b5) */
1127 lhu t5, 2(a0) /* t5 = destination (r5g6b5) */
1128 addiu a1, a1, 8
1129 addiu a2, a2, 2
1130
1131 CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
1132 OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, s0, s1, \
1133 t4, t5, t6, s2, s3, s4, s5, t0, t1
1134 CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
1135
1136 sh s0, 0(a0)
1137 sh s1, 2(a0)
1138 addiu a3, a3, -2
1139 addiu t1, a3, -1
1140 bgtz t1, 1b
1141 addiu a0, a0, 4
1142 2:
1143 beqz a3, 3f
1144 nop
1145 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1146 lbu t1, 0(a2) /* t1 = mask (a8) */
1147 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
1148
1149 CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
1150 OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8
1151 CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
1152
1153 sh t3, 0(a0)
1154 3:
1155 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
1156 j ra
1157 nop
1158
1159 END(pixman_composite_over_8888_8_0565_asm_mips)
1160
1161 LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
1162 /*
1163 * a0 - dst (r5g6b5)
1164 * a1 - src (r5g6b5)
1165 * a2 - mask (a8)
1166 * a3 - w
1167 */
1168
1169 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
1170 li t4, 0xf800f800
1171 li t5, 0x07e007e0
1172 li t6, 0x001F001F
1173 li t7, 0x00ff00ff
1174 beqz a3, 3f
1175 nop
1176 addiu t1, a3, -1
1177 beqz t1, 2f
1178 nop
1179 1:
1180 lhu t0, 0(a1) /* t0 = source (r5g6b5) */
1181 lhu t1, 2(a1) /* t1 = source (r5g6b5) */
1182 lbu t2, 0(a2) /* t2 = mask (a8) */
1183 lbu t3, 1(a2) /* t3 = mask (a8) */
1184 lhu t8, 0(a0) /* t8 = destination (r5g6b5) */
1185 lhu t9, 2(a0) /* t9 = destination (r5g6b5) */
1186 addiu a1, a1, 4
1187 addiu a2, a2, 2
1188
1189 CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
1190 CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
1191 OVER_2x8888_2x8_2x8888 s0, s1, t2, t3, s2, s3, \
1192 t0, t1, t7, s4, s5, t8, t9, s0, s1
1193 CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
1194
1195 sh s0, 0(a0)
1196 sh s1, 2(a0)
1197 addiu a3, a3, -2
1198 addiu t1, a3, -1
1199 bgtz t1, 1b
1200 addiu a0, a0, 4
1201 2:
1202 beqz a3, 3f
1203 nop
1204 lhu t0, 0(a1) /* t0 = source (r5g6b5) */
1205 lbu t1, 0(a2) /* t1 = mask (a8) */
1206 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
1207
1208 CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
1209 CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
1210 OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8
1211 CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
1212
1213 sh t3, 0(a0)
1214 3:
1215 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
1216 j ra
1217 nop
1218
1219 END(pixman_composite_over_0565_8_0565_asm_mips)
1220
1221 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
1222 /*
1223 * a0 - dst (a8r8g8b8)
1224 * a1 - src (a8r8g8b8)
1225 * a2 - mask (a8r8g8b8)
1226 * a3 - w
1227 */
1228
1229 SAVE_REGS_ON_STACK 0, s0, s1, s2
1230 li t4, 0x00ff00ff
1231 beqz a3, 3f
1232 nop
1233 addiu t1, a3, -1
1234 beqz t1, 2f
1235 nop
1236 1:
1237 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1238 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
1239 lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */
1240 lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */
1241 lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */
1242 lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */
1243 addiu a1, a1, 8
1244 addiu a2, a2, 8
1245 srl t2, t2, 24
1246 srl t3, t3, 24
1247
1248 OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t0, t1
1249
1250 sw t7, 0(a0)
1251 sw t8, 4(a0)
1252 addiu a3, a3, -2
1253 addiu t1, a3, -1
1254 bgtz t1, 1b
1255 addiu a0, a0, 8
1256 2:
1257 beqz a3, 3f
1258 nop
1259 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1260 lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */
1261 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1262 srl t1, t1, 24
1263
1264 OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
1265
1266 sw t3, 0(a0)
1267 3:
1268 RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1269 j ra
1270 nop
1271
1272 END(pixman_composite_over_8888_8888_8888_asm_mips)
1273
1274 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
1275 /*
1276 * a0 - dst (a8r8g8b8)
1277 * a1 - src (a8r8g8b8)
1278 * a2 - w
1279 */
1280
1281 SAVE_REGS_ON_STACK 0, s0, s1, s2
1282 li t4, 0x00ff00ff
1283 beqz a2, 3f
1284 nop
1285 addiu t1, a2, -1
1286 beqz t1, 2f
1287 nop
1288 1:
1289 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1290 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
1291 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1292 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
1293 addiu a1, a1, 8
1294
1295 not t5, t0
1296 srl t5, t5, 24
1297 not t6, t1
1298 srl t6, t6, 24
1299
1300 or t7, t5, t6
1301 beqz t7, 11f
1302 or t8, t0, t1
1303 beqz t8, 12f
1304
1305 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t2, t3
1306
1307 addu_s.qb t0, t7, t0
1308 addu_s.qb t1, t8, t1
1309 11:
1310 sw t0, 0(a0)
1311 sw t1, 4(a0)
1312 12:
1313 addiu a2, a2, -2
1314 addiu t1, a2, -1
1315 bgtz t1, 1b
1316 addiu a0, a0, 8
1317 2:
1318 beqz a2, 3f
1319 nop
1320
1321 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1322 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
1323 addiu a1, a1, 4
1324
1325 not t2, t0
1326 srl t2, t2, 24
1327
1328 beqz t2, 21f
1329 nop
1330 beqz t0, 3f
1331
1332 MIPS_UN8x4_MUL_UN8 t1, t2, t3, t4, t5, t6, t7
1333
1334 addu_s.qb t0, t3, t0
1335 21:
1336 sw t0, 0(a0)
1337
1338 3:
1339 RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1340 j ra
1341 nop
1342
1343 END(pixman_composite_over_8888_8888_asm_mips)
1344
1345 LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
1346 /*
1347 * a0 - dst (r5g6b5)
1348 * a1 - src (32bit constant)
1349 * a2 - w
1350 */
1351
1352 beqz a2, 5f
1353 nop
1354
1355 not t0, a1
1356 srl t0, t0, 24
1357 bgtz t0, 1f
1358 nop
1359 CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3
1360 0:
1361 sh t1, 0(a0)
1362 addiu a2, a2, -1
1363 bgtz a2, 0b
1364 addiu a0, a0, 2
1365 j ra
1366 nop
1367
1368 1:
1369 SAVE_REGS_ON_STACK 0, s0, s1, s2
1370 li t4, 0x00ff00ff
1371 li t5, 0xf800f800
1372 li t6, 0x07e007e0
1373 li t7, 0x001F001F
1374 addiu t1, a2, -1
1375 beqz t1, 3f
1376 nop
1377 2:
1378 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
1379 lhu t2, 2(a0) /* t2 = destination (r5g6b5) */
1380
1381 CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2
1382 MIPS_2xUN8x4_MUL_2xUN8 t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8
1383 addu_s.qb t1, t1, a1
1384 addu_s.qb t2, t2, a1
1385 CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1
1386
1387 sh t3, 0(a0)
1388 sh t8, 2(a0)
1389
1390 addiu a2, a2, -2
1391 addiu t1, a2, -1
1392 bgtz t1, 2b
1393 addiu a0, a0, 4
1394 3:
1395 beqz a2, 4f
1396 nop
1397
1398 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
1399
1400 CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1
1401 MIPS_UN8x4_MUL_UN8 t2, t0, t1, t4, s0, s1, s2
1402 addu_s.qb t1, t1, a1
1403 CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1
1404
1405 sh t2, 0(a0)
1406
1407 4:
1408 RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1409 5:
1410 j ra
1411 nop
1412
1413 END(pixman_composite_over_n_0565_asm_mips)
1414
1415 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
1416 /*
1417 * a0 - dst (a8r8g8b8)
1418 * a1 - src (32bit constant)
1419 * a2 - w
1420 */
1421
1422 beqz a2, 5f
1423 nop
1424
1425 not t0, a1
1426 srl t0, t0, 24
1427 bgtz t0, 1f
1428 nop
1429 0:
1430 sw a1, 0(a0)
1431 addiu a2, a2, -1
1432 bgtz a2, 0b
1433 addiu a0, a0, 4
1434 j ra
1435 nop
1436
1437 1:
1438 SAVE_REGS_ON_STACK 0, s0, s1, s2
1439 li t4, 0x00ff00ff
1440 addiu t1, a2, -1
1441 beqz t1, 3f
1442 nop
1443 2:
1444 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1445 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
1446
1447 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3
1448
1449 addu_s.qb t7, t7, a1
1450 addu_s.qb t8, t8, a1
1451
1452 sw t7, 0(a0)
1453 sw t8, 4(a0)
1454
1455 addiu a2, a2, -2
1456 addiu t1, a2, -1
1457 bgtz t1, 2b
1458 addiu a0, a0, 8
1459 3:
1460 beqz a2, 4f
1461 nop
1462
1463 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
1464
1465 MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7
1466
1467 addu_s.qb t3, t3, a1
1468
1469 sw t3, 0(a0)
1470
1471 4:
1472 RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1473 5:
1474 j ra
1475 nop
1476
1477 END(pixman_composite_over_n_8888_asm_mips)
1478
1479 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
1480 /*
1481 * a0 - dst (a8)
1482 * a1 - src (a8)
1483 * a2 - mask (a8)
1484 * a3 - w
1485 */
1486
1487 SAVE_REGS_ON_STACK 0, v0, v1
1488 li t9, 0x00ff00ff
1489 beqz a3, 3f
1490 nop
1491
1492 srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */
1493 beqz v0, 1f /* branch if less than 4 src pixels */
1494 nop
1495
1496 0:
1497 beqz v0, 1f
1498 addiu v0, v0, -1
1499 lbu t0, 0(a2)
1500 lbu t1, 1(a2)
1501 lbu t2, 2(a2)
1502 lbu t3, 3(a2)
1503 lbu t4, 0(a0)
1504 lbu t5, 1(a0)
1505 lbu t6, 2(a0)
1506 lbu t7, 3(a0)
1507
1508 addiu a2, a2, 4
1509
1510 precr_sra.ph.w t1, t0, 0
1511 precr_sra.ph.w t3, t2, 0
1512 precr_sra.ph.w t5, t4, 0
1513 precr_sra.ph.w t7, t6, 0
1514
1515 precr.qb.ph t0, t3, t1
1516 precr.qb.ph t1, t7, t5
1517
1518 lbu t4, 0(a1)
1519 lbu v1, 1(a1)
1520 lbu t7, 2(a1)
1521 lbu t8, 3(a1)
1522
1523 addiu a1, a1, 4
1524
1525 precr_sra.ph.w v1, t4, 0
1526 precr_sra.ph.w t8, t7, 0
1527
1528 muleu_s.ph.qbl t2, t0, t8
1529 muleu_s.ph.qbr t3, t0, v1
1530 shra_r.ph t4, t2, 8
1531 shra_r.ph t5, t3, 8
1532 and t4, t4, t9
1533 and t5, t5, t9
1534 addq.ph t2, t2, t4
1535 addq.ph t3, t3, t5
1536 shra_r.ph t2, t2, 8
1537 shra_r.ph t3, t3, 8
1538 precr.qb.ph t0, t2, t3
1539
1540 addu_s.qb t2, t0, t1
1541
1542 sb t2, 0(a0)
1543 srl t2, t2, 8
1544 sb t2, 1(a0)
1545 srl t2, t2, 8
1546 sb t2, 2(a0)
1547 srl t2, t2, 8
1548 sb t2, 3(a0)
1549 addiu a3, a3, -4
1550 b 0b
1551 addiu a0, a0, 4
1552
1553 1:
1554 beqz a3, 3f
1555 nop
1556 2:
1557 lbu t8, 0(a1)
1558 lbu t0, 0(a2)
1559 lbu t1, 0(a0)
1560 addiu a1, a1, 1
1561 addiu a2, a2, 1
1562
1563 mul t2, t0, t8
1564 shra_r.ph t3, t2, 8
1565 andi t3, t3, 0xff
1566 addq.ph t2, t2, t3
1567 shra_r.ph t2, t2, 8
1568 andi t2, t2, 0xff
1569
1570 addu_s.qb t2, t2, t1
1571 sb t2, 0(a0)
1572 addiu a3, a3, -1
1573 bnez a3, 2b
1574 addiu a0, a0, 1
1575
1576 3:
1577 RESTORE_REGS_FROM_STACK 0, v0, v1
1578 j ra
1579 nop
1580
1581 END(pixman_composite_add_8_8_8_asm_mips)
1582
1583 LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
1584 /*
1585 * a0 - dst (a8)
1586 * a1 - src (32bit constant)
1587 * a2 - mask (a8)
1588 * a3 - w
1589 */
1590
1591 SAVE_REGS_ON_STACK 0, v0
1592 li t9, 0x00ff00ff
1593 beqz a3, 3f
1594 nop
1595
1596 srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */
1597 beqz v0, 1f /* branch if less than 4 src pixels */
1598 nop
1599
1600 srl t8, a1, 24
1601 replv.ph t8, t8
1602
1603 0:
1604 beqz v0, 1f
1605 addiu v0, v0, -1
1606 lbu t0, 0(a2)
1607 lbu t1, 1(a2)
1608 lbu t2, 2(a2)
1609 lbu t3, 3(a2)
1610 lbu t4, 0(a0)
1611 lbu t5, 1(a0)
1612 lbu t6, 2(a0)
1613 lbu t7, 3(a0)
1614
1615 addiu a2, a2, 4
1616
1617 precr_sra.ph.w t1, t0, 0
1618 precr_sra.ph.w t3, t2, 0
1619 precr_sra.ph.w t5, t4, 0
1620 precr_sra.ph.w t7, t6, 0
1621
1622 precr.qb.ph t0, t3, t1
1623 precr.qb.ph t1, t7, t5
1624
1625 muleu_s.ph.qbl t2, t0, t8
1626 muleu_s.ph.qbr t3, t0, t8
1627 shra_r.ph t4, t2, 8
1628 shra_r.ph t5, t3, 8
1629 and t4, t4, t9
1630 and t5, t5, t9
1631 addq.ph t2, t2, t4
1632 addq.ph t3, t3, t5
1633 shra_r.ph t2, t2, 8
1634 shra_r.ph t3, t3, 8
1635 precr.qb.ph t0, t2, t3
1636
1637 addu_s.qb t2, t0, t1
1638
1639 sb t2, 0(a0)
1640 srl t2, t2, 8
1641 sb t2, 1(a0)
1642 srl t2, t2, 8
1643 sb t2, 2(a0)
1644 srl t2, t2, 8
1645 sb t2, 3(a0)
1646 addiu a3, a3, -4
1647 b 0b
1648 addiu a0, a0, 4
1649
1650 1:
1651 beqz a3, 3f
1652 nop
1653 srl t8, a1, 24
1654 2:
1655 lbu t0, 0(a2)
1656 lbu t1, 0(a0)
1657 addiu a2, a2, 1
1658
1659 mul t2, t0, t8
1660 shra_r.ph t3, t2, 8
1661 andi t3, t3, 0xff
1662 addq.ph t2, t2, t3
1663 shra_r.ph t2, t2, 8
1664 andi t2, t2, 0xff
1665
1666 addu_s.qb t2, t2, t1
1667 sb t2, 0(a0)
1668 addiu a3, a3, -1
1669 bnez a3, 2b
1670 addiu a0, a0, 1
1671
1672 3:
1673 RESTORE_REGS_FROM_STACK 0, v0
1674 j ra
1675 nop
1676
1677 END(pixman_composite_add_n_8_8_asm_mips)
1678
1679 LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
1680 /*
1681 * a0 - dst (a8r8g8b8)
1682 * a1 - src (32bit constant)
1683 * a2 - mask (a8)
1684 * a3 - w
1685 */
1686
1687 SAVE_REGS_ON_STACK 0, s0, s1, s2
1688 li t4, 0x00ff00ff
1689 beqz a3, 3f
1690 nop
1691 addiu t1, a3, -1
1692 beqz t1, 2f
1693 nop
1694 1:
1695 /* a1 = source (32bit constant) */
1696 lbu t0, 0(a2) /* t0 = mask (a8) */
1697 lbu t1, 1(a2) /* t1 = mask (a8) */
1698 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1699 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
1700 addiu a2, a2, 2
1701
1702 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \
1703 t0, t1, \
1704 t2, t3, \
1705 t5, t6, \
1706 t4, t7, t8, t9, s0, s1, s2
1707
1708 sw t5, 0(a0)
1709 sw t6, 4(a0)
1710 addiu a3, a3, -2
1711 addiu t1, a3, -1
1712 bgtz t1, 1b
1713 addiu a0, a0, 8
1714 2:
1715 beqz a3, 3f
1716 nop
1717 /* a1 = source (32bit constant) */
1718 lbu t0, 0(a2) /* t0 = mask (a8) */
1719 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
1720
1721 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6
1722
1723 sw t2, 0(a0)
1724 3:
1725 RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1726 j ra
1727 nop
1728
1729 END(pixman_composite_add_n_8_8888_asm_mips)
1730
1731 LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
1732 /*
1733 * a0 - dst (r5g6b5)
1734 * a1 - src (r5g6b5)
1735 * a2 - mask (a8)
1736 * a3 - w
1737 */
1738
1739 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
1740 li t4, 0xf800f800
1741 li t5, 0x07e007e0
1742 li t6, 0x001F001F
1743 li t7, 0x00ff00ff
1744 beqz a3, 3f
1745 nop
1746 addiu t1, a3, -1
1747 beqz t1, 2f
1748 nop
1749 1:
1750 lhu t0, 0(a1) /* t0 = source (r5g6b5) */
1751 lhu t1, 2(a1) /* t1 = source (r5g6b5) */
1752 lbu t2, 0(a2) /* t2 = mask (a8) */
1753 lbu t3, 1(a2) /* t3 = mask (a8) */
1754 lhu t8, 0(a0) /* t8 = destination (r5g6b5) */
1755 lhu t9, 2(a0) /* t9 = destination (r5g6b5) */
1756 addiu a1, a1, 4
1757 addiu a2, a2, 2
1758
1759 CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
1760 CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, s6, s7
1761 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s0, s1, \
1762 t2, t3, \
1763 s2, s3, \
1764 t0, t1, \
1765 t7, s4, s5, s6, s7, t8, t9
1766 CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
1767
1768 sh s0, 0(a0)
1769 sh s1, 2(a0)
1770 addiu a3, a3, -2
1771 addiu t1, a3, -1
1772 bgtz t1, 1b
1773 addiu a0, a0, 4
1774 2:
1775 beqz a3, 3f
1776 nop
1777 lhu t0, 0(a1) /* t0 = source (r5g6b5) */
1778 lbu t1, 0(a2) /* t1 = mask (a8) */
1779 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
1780
1781 CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
1782 CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
1783 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t3, t1, t4, t0, t7, t2, t5, t6
1784 CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
1785
1786 sh t3, 0(a0)
1787 3:
1788 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
1789 j ra
1790 nop
1791
1792 END(pixman_composite_add_0565_8_0565_asm_mips)
1793
1794 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
1795 /*
1796 * a0 - dst (a8r8g8b8)
1797 * a1 - src (a8r8g8b8)
1798 * a2 - mask (a8)
1799 * a3 - w
1800 */
1801
1802 SAVE_REGS_ON_STACK 0, s0, s1, s2
1803 li t4, 0x00ff00ff
1804 beqz a3, 3f
1805 nop
1806 addiu t1, a3, -1
1807 beqz t1, 2f
1808 nop
1809 1:
1810 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1811 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
1812 lbu t2, 0(a2) /* t2 = mask (a8) */
1813 lbu t3, 1(a2) /* t3 = mask (a8) */
1814 lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */
1815 lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */
1816 addiu a1, a1, 8
1817 addiu a2, a2, 2
1818
1819 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
1820 t2, t3, \
1821 t5, t6, \
1822 t7, t8, \
1823 t4, t9, s0, s1, s2, t0, t1
1824
1825 sw t7, 0(a0)
1826 sw t8, 4(a0)
1827 addiu a3, a3, -2
1828 addiu t1, a3, -1
1829 bgtz t1, 1b
1830 addiu a0, a0, 8
1831 2:
1832 beqz a3, 3f
1833 nop
1834 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1835 lbu t1, 0(a2) /* t1 = mask (a8) */
1836 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1837
1838 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
1839
1840 sw t3, 0(a0)
1841 3:
1842 RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1843 j ra
1844 nop
1845
1846 END(pixman_composite_add_8888_8_8888_asm_mips)
1847
1848 LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
1849 /*
1850 * a0 - dst (a8r8g8b8)
1851 * a1 - src (a8r8g8b8)
1852 * a2 - mask (32bit constant)
1853 * a3 - w
1854 */
1855
1856 SAVE_REGS_ON_STACK 0, s0, s1, s2
1857 li t4, 0x00ff00ff
1858 beqz a3, 3f
1859 nop
1860 srl a2, a2, 24
1861 addiu t1, a3, -1
1862 beqz t1, 2f
1863 nop
1864 1:
1865 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1866 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
1867 /* a2 = mask (32bit constant) */
1868 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1869 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
1870 addiu a1, a1, 8
1871
1872 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
1873 a2, a2, \
1874 t2, t3, \
1875 t5, t6, \
1876 t4, t7, t8, t9, s0, s1, s2
1877
1878 sw t5, 0(a0)
1879 sw t6, 4(a0)
1880 addiu a3, a3, -2
1881 addiu t1, a3, -1
1882 bgtz t1, 1b
1883 addiu a0, a0, 8
1884 2:
1885 beqz a3, 3f
1886 nop
1887 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1888 /* a2 = mask (32bit constant) */
1889 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
1890
1891 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7
1892
1893 sw t3, 0(a0)
1894 3:
1895 RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1896 j ra
1897 nop
1898
1899 END(pixman_composite_add_8888_n_8888_asm_mips)
1900
1901 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
1902 /*
1903 * a0 - dst (a8r8g8b8)
1904 * a1 - src (a8r8g8b8)
1905 * a2 - mask (a8r8g8b8)
1906 * a3 - w
1907 */
1908
1909 SAVE_REGS_ON_STACK 0, s0, s1, s2
1910 li t4, 0x00ff00ff
1911 beqz a3, 3f
1912 nop
1913 addiu t1, a3, -1
1914 beqz t1, 2f
1915 nop
1916 1:
1917 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1918 lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
1919 lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */
1920 lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */
1921 lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */
1922 lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */
1923 addiu a1, a1, 8
1924 addiu a2, a2, 8
1925 srl t2, t2, 24
1926 srl t3, t3, 24
1927
1928 MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
1929 t2, t3, \
1930 t5, t6, \
1931 t7, t8, \
1932 t4, t9, s0, s1, s2, t0, t1
1933
1934 sw t7, 0(a0)
1935 sw t8, 4(a0)
1936 addiu a3, a3, -2
1937 addiu t1, a3, -1
1938 bgtz t1, 1b
1939 addiu a0, a0, 8
1940 2:
1941 beqz a3, 3f
1942 nop
1943 lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
1944 lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */
1945 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
1946 srl t1, t1, 24
1947
1948 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
1949
1950 sw t3, 0(a0)
1951 3:
1952 RESTORE_REGS_FROM_STACK 0, s0, s1, s2
1953 j ra
1954 nop
1955
1956 END(pixman_composite_add_8888_8888_8888_asm_mips)
1957
1958 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
1959 /*
1960 * a0 - dst (a8)
1961 * a1 - src (a8)
1962 * a2 - w
1963 */
1964
1965 beqz a2, 3f
1966 nop
1967 srl t9, a2, 2 /* t9 = how many multiples of 4 dst pixels */
1968 beqz t9, 1f /* branch if less than 4 src pixels */
1969 nop
1970
1971 0:
1972 beqz t9, 1f
1973 addiu t9, t9, -1
1974 lbu t0, 0(a1)
1975 lbu t1, 1(a1)
1976 lbu t2, 2(a1)
1977 lbu t3, 3(a1)
1978 lbu t4, 0(a0)
1979 lbu t5, 1(a0)
1980 lbu t6, 2(a0)
1981 lbu t7, 3(a0)
1982
1983 addiu a1, a1, 4
1984
1985 precr_sra.ph.w t1, t0, 0
1986 precr_sra.ph.w t3, t2, 0
1987 precr_sra.ph.w t5, t4, 0
1988 precr_sra.ph.w t7, t6, 0
1989
1990 precr.qb.ph t0, t3, t1
1991 precr.qb.ph t1, t7, t5
1992
1993 addu_s.qb t2, t0, t1
1994
1995 sb t2, 0(a0)
1996 srl t2, t2, 8
1997 sb t2, 1(a0)
1998 srl t2, t2, 8
1999 sb t2, 2(a0)
2000 srl t2, t2, 8
2001 sb t2, 3(a0)
2002 addiu a2, a2, -4
2003 b 0b
2004 addiu a0, a0, 4
2005
2006 1:
2007 beqz a2, 3f
2008 nop
2009 2:
2010 lbu t0, 0(a1)
2011 lbu t1, 0(a0)
2012 addiu a1, a1, 1
2013
2014 addu_s.qb t2, t0, t1
2015 sb t2, 0(a0)
2016 addiu a2, a2, -1
2017 bnez a2, 2b
2018 addiu a0, a0, 1
2019
2020 3:
2021 j ra
2022 nop
2023
2024 END(pixman_composite_add_8_8_asm_mips)
2025
2026 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
2027 /*
2028 * a0 - dst (a8r8g8b8)
2029 * a1 - src (a8r8g8b8)
2030 * a2 - w
2031 */
2032
2033 beqz a2, 4f
2034 nop
2035
2036 srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */
2037 beqz t9, 3f /* branch if less than 4 src pixels */
2038 nop
2039 1:
2040 addiu t9, t9, -1
2041 beqz t9, 2f
2042 addiu a2, a2, -4
2043
2044 lw t0, 0(a1)
2045 lw t1, 4(a1)
2046 lw t2, 8(a1)
2047 lw t3, 12(a1)
2048 lw t4, 0(a0)
2049 lw t5, 4(a0)
2050 lw t6, 8(a0)
2051 lw t7, 12(a0)
2052 addiu a1, a1, 16
2053
2054 addu_s.qb t4, t4, t0
2055 addu_s.qb t5, t5, t1
2056 addu_s.qb t6, t6, t2
2057 addu_s.qb t7, t7, t3
2058
2059 sw t4, 0(a0)
2060 sw t5, 4(a0)
2061 sw t6, 8(a0)
2062 sw t7, 12(a0)
2063 b 1b
2064 addiu a0, a0, 16
2065 2:
2066 lw t0, 0(a1)
2067 lw t1, 4(a1)
2068 lw t2, 8(a1)
2069 lw t3, 12(a1)
2070 lw t4, 0(a0)
2071 lw t5, 4(a0)
2072 lw t6, 8(a0)
2073 lw t7, 12(a0)
2074 addiu a1, a1, 16
2075
2076 addu_s.qb t4, t4, t0
2077 addu_s.qb t5, t5, t1
2078 addu_s.qb t6, t6, t2
2079 addu_s.qb t7, t7, t3
2080
2081 sw t4, 0(a0)
2082 sw t5, 4(a0)
2083 sw t6, 8(a0)
2084 sw t7, 12(a0)
2085
2086 beqz a2, 4f
2087 addiu a0, a0, 16
2088 3:
2089 lw t0, 0(a1)
2090 lw t1, 0(a0)
2091 addiu a1, a1, 4
2092 addiu a2, a2, -1
2093 addu_s.qb t1, t1, t0
2094 sw t1, 0(a0)
2095 bnez a2, 3b
2096 addiu a0, a0, 4
2097 4:
2098 jr ra
2099 nop
2100
2101 END(pixman_composite_add_8888_8888_asm_mips)
2102
2103 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
2104 /*
2105 * a0 - dst (r5g6b5)
2106 * a1 - src (a8)
2107 * a2 - w
2108 */
2109
2110 beqz a2, 4f
2111 nop
2112
2113 SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
2114 li t2, 0xf800f800
2115 li t3, 0x07e007e0
2116 li t4, 0x001F001F
2117 li t5, 0x00ff00ff
2118
2119 addiu t1, a2, -1
2120 beqz t1, 2f
2121 nop
2122 1:
2123 lbu t0, 0(a1) /* t0 = source (a8) */
2124 lbu t1, 1(a1) /* t1 = source (a8) */
2125 lhu t6, 0(a0) /* t6 = destination (r5g6b5) */
2126 lhu t7, 2(a0) /* t7 = destination (r5g6b5) */
2127 addiu a1, a1, 2
2128
2129 not t0, t0
2130 not t1, t1
2131 andi t0, 0xff /* t0 = neg source1 */
2132 andi t1, 0xff /* t1 = neg source2 */
2133 CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3
2134 MIPS_2xUN8x4_MUL_2xUN8 t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9
2135 CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1
2136
2137 sh t8, 0(a0)
2138 sh t9, 2(a0)
2139 addiu a2, a2, -2
2140 addiu t1, a2, -1
2141 bgtz t1, 1b
2142 addiu a0, a0, 4
2143 2:
2144 beqz a2, 3f
2145 nop
2146 lbu t0, 0(a1) /* t0 = source (a8) */
2147 lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
2148
2149 not t0, t0
2150 andi t0, 0xff /* t0 = neg source */
2151 CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4
2152 MIPS_UN8x4_MUL_UN8 t2, t0, t1, t5, t3, t4, t6
2153 CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4
2154
2155 sh t2, 0(a0)
2156 3:
2157 RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
2158 4:
2159 j ra
2160 nop
2161
2162 END(pixman_composite_out_reverse_8_0565_asm_mips)
2163
2164 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
2165 /*
2166 * a0 - dst (a8r8g8b8)
2167 * a1 - src (a8)
2168 * a2 - w
2169 */
2170
2171 beqz a2, 3f
2172 nop
2173 li t4, 0x00ff00ff
2174 addiu t1, a2, -1
2175 beqz t1, 2f
2176 nop
2177 1:
2178 lbu t0, 0(a1) /* t0 = source (a8) */
2179 lbu t1, 1(a1) /* t1 = source (a8) */
2180 lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
2181 lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
2182 addiu a1, a1, 2
2183 not t0, t0
2184 not t1, t1
2185 andi t0, 0xff /* t0 = neg source */
2186 andi t1, 0xff /* t1 = neg source */
2187
2188 MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0
2189
2190 sw t5, 0(a0)
2191 sw t6, 4(a0)
2192 addiu a2, a2, -2
2193 addiu t1, a2, -1
2194 bgtz t1, 1b
2195 addiu a0, a0, 8
2196 2:
2197 beqz a2, 3f
2198 nop
2199 lbu t0, 0(a1) /* t0 = source (a8) */
2200 lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
2201 not t0, t0
2202 andi t0, 0xff /* t0 = neg source */
2203
2204 MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6
2205
2206 sw t2, 0(a0)
2207 3:
2208 j ra
2209 nop
2210
2211 END(pixman_composite_out_reverse_8_8888_asm_mips)
2212
2213 LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
2214 /*
2215 * a0 - dst (a8r8g8b8)
2216 * a1 - src (32bit constant)
2217 * a2 - w
2218 */
2219
2220 beqz a2, 5f
2221 nop
2222
2223 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
2224 li t0, 0x00ff00ff
2225 srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */
2226 beqz t9, 2f /* branch if less than 4 src pixels */
2227 nop
2228 1:
2229 beqz t9, 2f
2230 addiu t9, t9, -1
2231
2232 lw t1, 0(a0)
2233 lw t2, 4(a0)
2234 lw t3, 8(a0)
2235 lw t4, 12(a0)
2236
2237 addiu a2, a2, -4
2238
2239 not t5, t1
2240 not t6, t2
2241 not t7, t3
2242 not t8, t4
2243 srl t5, t5, 24
2244 srl t6, t6, 24
2245 srl t7, t7, 24
2246 srl t8, t8, 24
2247 replv.ph t5, t5
2248 replv.ph t6, t6
2249 replv.ph t7, t7
2250 replv.ph t8, t8
2251 muleu_s.ph.qbl s0, a1, t5
2252 muleu_s.ph.qbr s1, a1, t5
2253 muleu_s.ph.qbl s2, a1, t6
2254 muleu_s.ph.qbr s3, a1, t6
2255 muleu_s.ph.qbl s4, a1, t7
2256 muleu_s.ph.qbr s5, a1, t7
2257 muleu_s.ph.qbl s6, a1, t8
2258 muleu_s.ph.qbr s7, a1, t8
2259
2260 shra_r.ph t5, s0, 8
2261 shra_r.ph t6, s1, 8
2262 shra_r.ph t7, s2, 8
2263 shra_r.ph t8, s3, 8
2264 and t5, t5, t0
2265 and t6, t6, t0
2266 and t7, t7, t0
2267 and t8, t8, t0
2268 addq.ph s0, s0, t5
2269 addq.ph s1, s1, t6
2270 addq.ph s2, s2, t7
2271 addq.ph s3, s3, t8
2272 shra_r.ph s0, s0, 8
2273 shra_r.ph s1, s1, 8
2274 shra_r.ph s2, s2, 8
2275 shra_r.ph s3, s3, 8
2276 shra_r.ph t5, s4, 8
2277 shra_r.ph t6, s5, 8
2278 shra_r.ph t7, s6, 8
2279 shra_r.ph t8, s7, 8
2280 and t5, t5, t0
2281 and t6, t6, t0
2282 and t7, t7, t0
2283 and t8, t8, t0
2284 addq.ph s4, s4, t5
2285 addq.ph s5, s5, t6
2286 addq.ph s6, s6, t7
2287 addq.ph s7, s7, t8
2288 shra_r.ph s4, s4, 8
2289 shra_r.ph s5, s5, 8
2290 shra_r.ph s6, s6, 8
2291 shra_r.ph s7, s7, 8
2292
2293 precr.qb.ph t5, s0, s1
2294 precr.qb.ph t6, s2, s3
2295 precr.qb.ph t7, s4, s5
2296 precr.qb.ph t8, s6, s7
2297 addu_s.qb t5, t1, t5
2298 addu_s.qb t6, t2, t6
2299 addu_s.qb t7, t3, t7
2300 addu_s.qb t8, t4, t8
2301
2302 sw t5, 0(a0)
2303 sw t6, 4(a0)
2304 sw t7, 8(a0)
2305 sw t8, 12(a0)
2306 b 1b
2307 addiu a0, a0, 16
2308
2309 2:
2310 beqz a2, 4f
2311 nop
2312 3:
2313 lw t1, 0(a0)
2314
2315 not t2, t1
2316 srl t2, t2, 24
2317 replv.ph t2, t2
2318
2319 muleu_s.ph.qbl t4, a1, t2
2320 muleu_s.ph.qbr t5, a1, t2
2321 shra_r.ph t6, t4, 8
2322 shra_r.ph t7, t5, 8
2323
2324 and t6,t6,t0
2325 and t7,t7,t0
2326
2327 addq.ph t8, t4, t6
2328 addq.ph t9, t5, t7
2329
2330 shra_r.ph t8, t8, 8
2331 shra_r.ph t9, t9, 8
2332
2333 precr.qb.ph t9, t8, t9
2334
2335 addu_s.qb t9, t1, t9
2336 sw t9, 0(a0)
2337
2338 addiu a2, a2, -1
2339 bnez a2, 3b
2340 addiu a0, a0, 4
2341 4:
2342 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
2343 5:
2344 j ra
2345 nop
2346
2347 END(pixman_composite_over_reverse_n_8888_asm_mips)
2348
2349 LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
2350 /*
2351 * a0 - dst (a8)
2352 * a1 - src (a8r8g8b8)
2353 * a2 - w
2354 */
2355
2356 beqz a2, 5f
2357 nop
2358
2359 SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
2360 move t7, a1
2361 srl t5, t7, 24
2362 replv.ph t5, t5
2363 srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */
2364 beqz t9, 2f /* branch if less than 4 src pixels */
2365 nop
2366
2367 1:
2368 addiu t9, t9, -1
2369 addiu a2, a2, -4
2370 lbu t0, 0(a0)
2371 lbu t1, 1(a0)
2372 lbu t2, 2(a0)
2373 lbu t3, 3(a0)
2374
2375 muleu_s.ph.qbl s0, t0, t5
2376 muleu_s.ph.qbr s1, t0, t5
2377 muleu_s.ph.qbl s2, t1, t5
2378 muleu_s.ph.qbr s3, t1, t5
2379 muleu_s.ph.qbl s4, t2, t5
2380 muleu_s.ph.qbr s5, t2, t5
2381 muleu_s.ph.qbl s6, t3, t5
2382 muleu_s.ph.qbr s7, t3, t5
2383
2384 shrl.ph t4, s0, 8
2385 shrl.ph t6, s1, 8
2386 shrl.ph t7, s2, 8
2387 shrl.ph t8, s3, 8
2388 addq.ph t0, s0, t4
2389 addq.ph t1, s1, t6
2390 addq.ph t2, s2, t7
2391 addq.ph t3, s3, t8
2392 shra_r.ph t0, t0, 8
2393 shra_r.ph t1, t1, 8
2394 shra_r.ph t2, t2, 8
2395 shra_r.ph t3, t3, 8
2396 shrl.ph t4, s4, 8
2397 shrl.ph t6, s5, 8
2398 shrl.ph t7, s6, 8
2399 shrl.ph t8, s7, 8
2400 addq.ph s0, s4, t4
2401 addq.ph s1, s5, t6
2402 addq.ph s2, s6, t7
2403 addq.ph s3, s7, t8
2404 shra_r.ph t4, s0, 8
2405 shra_r.ph t6, s1, 8
2406 shra_r.ph t7, s2, 8
2407 shra_r.ph t8, s3, 8
2408
2409 precr.qb.ph s0, t0, t1
2410 precr.qb.ph s1, t2, t3
2411 precr.qb.ph s2, t4, t6
2412 precr.qb.ph s3, t7, t8
2413
2414 sb s0, 0(a0)
2415 sb s1, 1(a0)
2416 sb s2, 2(a0)
2417 sb s3, 3(a0)
2418 bgtz t9, 1b
2419 addiu a0, a0, 4
2420 2:
2421 beqz a2, 4f
2422 nop
2423 3:
2424 lbu t1, 0(a0)
2425
2426 muleu_s.ph.qbl t4, t1, t5
2427 muleu_s.ph.qbr t7, t1, t5
2428 shrl.ph t6, t4, 8
2429 shrl.ph t0, t7, 8
2430 addq.ph t8, t4, t6
2431 addq.ph t9, t7, t0
2432 shra_r.ph t8, t8, 8
2433 shra_r.ph t9, t9, 8
2434 precr.qb.ph t2, t8, t9
2435 sb t2, 0(a0)
2436 addiu a2, a2, -1
2437 bnez a2, 3b
2438 addiu a0, a0, 1
2439 4:
2440 RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
2441 5:
2442 j ra
2443 nop
2444
2445 END(pixman_composite_in_n_8_asm_mips)
2446
2447 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
2448 /*
2449 * a0 - dst (r5g6b5)
2450 * a1 - src (a8r8g8b8)
2451 * a2 - mask (a8)
2452 * a3 - w
2453 * 16(sp) - vx
2454 * 20(sp) - unit_x
2455 */
2456 beqz a3, 4f
2457 nop
2458
2459 SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
2460 lw v0, 36(sp) /* v0 = vx */
2461 lw v1, 40(sp) /* v1 = unit_x */
2462 li t6, 0x00ff00ff
2463 li t7, 0xf800f800
2464 li t8, 0x07e007e0
2465 li t9, 0x001F001F
2466
2467 addiu t1, a3, -1
2468 beqz t1, 2f
2469 nop
2470 1:
2471 sra t0, v0, 16 /* t0 = vx >> 16 */
2472 sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
2473 addu t0, a1, t0
2474 lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
2475 addu v0, v0, v1 /* v0 = vx + unit_x */
2476 sra t1, v0, 16 /* t1 = vx >> 16 */
2477 sll t1, t1, 2 /* t1 = t1 * 4 (a8r8g8b8) */
2478 addu t1, a1, t1
2479 lw t1, 0(t1) /* t1 = source (a8r8g8b8) */
2480 addu v0, v0, v1 /* v0 = vx + unit_x */
2481 lbu t2, 0(a2) /* t2 = mask (a8) */
2482 lbu t3, 1(a2) /* t3 = mask (a8) */
2483 lhu t4, 0(a0) /* t4 = destination (r5g6b5) */
2484 lhu t5, 2(a0) /* t5 = destination (r5g6b5) */
2485 addiu a2, a2, 2
2486
2487 CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
2488 OVER_2x8888_2x8_2x8888 t0, t1, \
2489 t2, t3, \
2490 s0, s1, \
2491 t4, t5, \
2492 t6, s2, s3, s4, s5, t2, t3
2493 CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
2494
2495 sh s0, 0(a0)
2496 sh s1, 2(a0)
2497 addiu a3, a3, -2
2498 addiu t1, a3, -1
2499 bgtz t1, 1b
2500 addiu a0, a0, 4
2501 2:
2502 beqz a3, 3f
2503 nop
2504 sra t0, v0, 16 /* t0 = vx >> 16 */
2505 sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
2506 addu t0, a1, t0
2507 lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
2508 lbu t1, 0(a2) /* t1 = mask (a8) */
2509 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
2510
2511 CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
2512 OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8
2513 CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
2514
2515 sh t3, 0(a0)
2516 3:
2517 RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
2518 4:
2519 j ra
2520 nop
2521
2522 END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
2523
2524 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
2525 /*
2526 * a0 - dst (r5g6b5)
2527 * a1 - src (r5g6b5)
2528 * a2 - mask (a8)
2529 * a3 - w
2530 * 16(sp) - vx
2531 * 20(sp) - unit_x
2532 */
2533
2534 beqz a3, 4f
2535 nop
2536 SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
2537 lw v0, 36(sp) /* v0 = vx */
2538 lw v1, 40(sp) /* v1 = unit_x */
2539 li t4, 0xf800f800
2540 li t5, 0x07e007e0
2541 li t6, 0x001F001F
2542 li t7, 0x00ff00ff
2543
2544 addiu t1, a3, -1
2545 beqz t1, 2f
2546 nop
2547 1:
2548 sra t0, v0, 16 /* t0 = vx >> 16 */
2549 sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */
2550 addu t0, a1, t0
2551 lhu t0, 0(t0) /* t0 = source (r5g6b5) */
2552 addu v0, v0, v1 /* v0 = vx + unit_x */
2553 sra t1, v0, 16 /* t1 = vx >> 16 */
2554 sll t1, t1, 1 /* t1 = t1 * 2 (r5g6b5) */
2555 addu t1, a1, t1
2556 lhu t1, 0(t1) /* t1 = source (r5g6b5) */
2557 addu v0, v0, v1 /* v0 = vx + unit_x */
2558 lbu t2, 0(a2) /* t2 = mask (a8) */
2559 lbu t3, 1(a2) /* t3 = mask (a8) */
2560 lhu t8, 0(a0) /* t8 = destination (r5g6b5) */
2561 lhu t9, 2(a0) /* t9 = destination (r5g6b5) */
2562 addiu a2, a2, 2
2563
2564 CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
2565 CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
2566 OVER_2x8888_2x8_2x8888 s0, s1, \
2567 t2, t3, \
2568 s2, s3, \
2569 t0, t1, \
2570 t7, t8, t9, s4, s5, s0, s1
2571 CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
2572
2573 sh s0, 0(a0)
2574 sh s1, 2(a0)
2575 addiu a3, a3, -2
2576 addiu t1, a3, -1
2577 bgtz t1, 1b
2578 addiu a0, a0, 4
2579 2:
2580 beqz a3, 3f
2581 nop
2582 sra t0, v0, 16 /* t0 = vx >> 16 */
2583 sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */
2584 addu t0, a1, t0
2585
2586 lhu t0, 0(t0) /* t0 = source (r5g6b5) */
2587 lbu t1, 0(a2) /* t1 = mask (a8) */
2588 lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
2589
2590 CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
2591 CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
2592 OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8
2593 CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
2594
2595 sh t3, 0(a0)
2596 3:
2597 RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
2598 4:
2599 j ra
2600 nop
2601
2602 END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
2603
2604 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
2605 /*
2606 * a0 - *dst
2607 * a1 - *src_top
2608 * a2 - *src_bottom
2609 * a3 - w
2610 * 16(sp) - wt
2611 * 20(sp) - wb
2612 * 24(sp) - vx
2613 * 28(sp) - unit_x
2614 */
2615
2616 beqz a3, 1f
2617 nop
2618
2619 SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
2620
2621 lw s0, 36(sp) /* s0 = wt */
2622 lw s1, 40(sp) /* s1 = wb */
2623 lw s2, 44(sp) /* s2 = vx */
2624 lw s3, 48(sp) /* s3 = unit_x */
2625 li v0, BILINEAR_INTERPOLATION_RANGE
2626
2627 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2628 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2629 0:
2630 andi t4, s2, 0xffff /* t4 = (short)vx */
2631 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
2632 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
2633
2634 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
2635 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
2636 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
2637 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
2638
2639 sra t9, s2, 16
2640 sll t9, t9, 2
2641 addiu t8, t9, 4
2642 lwx t0, t9(a1) /* t0 = tl */
2643 lwx t1, t8(a1) /* t1 = tr */
2644 addiu a3, a3, -1
2645 lwx t2, t9(a2) /* t2 = bl */
2646 lwx t3, t8(a2) /* t3 = br */
2647
2648 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
2649
2650 addu s2, s2, s3 /* vx += unit_x; */
2651 sw t0, 0(a0)
2652 bnez a3, 0b
2653 addiu a0, a0, 4
2654
2655 RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
2656 1:
2657 j ra
2658 nop
2659
2660 END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
2661
2662 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
2663 /*
2664 * a0 - *dst
2665 * a1 - *src_top
2666 * a2 - *src_bottom
2667 * a3 - w
2668 * 16(sp) - wt
2669 * 20(sp) - wb
2670 * 24(sp) - vx
2671 * 28(sp) - unit_x
2672 */
2673
2674 beqz a3, 1f
2675 nop
2676
2677 SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
2678
2679 lw s0, 36(sp) /* s0 = wt */
2680 lw s1, 40(sp) /* s1 = wb */
2681 lw s2, 44(sp) /* s2 = vx */
2682 lw s3, 48(sp) /* s3 = unit_x */
2683 li v0, BILINEAR_INTERPOLATION_RANGE
2684
2685 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2686 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2687 0:
2688 andi t4, s2, 0xffff /* t4 = (short)vx */
2689 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
2690 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
2691
2692 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
2693 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
2694 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
2695 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
2696
2697 sra t9, s2, 16
2698 sll t9, t9, 2
2699 addiu t8, t9, 4
2700 lwx t0, t9(a1) /* t0 = tl */
2701 lwx t1, t8(a1) /* t1 = tr */
2702 addiu a3, a3, -1
2703 lwx t2, t9(a2) /* t2 = bl */
2704 lwx t3, t8(a2) /* t3 = br */
2705
2706 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
2707 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
2708
2709 addu s2, s2, s3 /* vx += unit_x; */
2710 sh t1, 0(a0)
2711 bnez a3, 0b
2712 addiu a0, a0, 2
2713
2714 RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
2715 1:
2716 j ra
2717 nop
2718
2719 END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
2720
2721 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
2722 /*
2723 * a0 - *dst
2724 * a1 - *src_top
2725 * a2 - *src_bottom
2726 * a3 - w
2727 * 16(sp) - wt
2728 * 20(sp) - wb
2729 * 24(sp) - vx
2730 * 28(sp) - unit_x
2731 */
2732
2733 beqz a3, 1f
2734 nop
2735
2736 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
2737
2738 lw s0, 44(sp) /* s0 = wt */
2739 lw s1, 48(sp) /* s1 = wb */
2740 lw s2, 52(sp) /* s2 = vx */
2741 lw s3, 56(sp) /* s3 = unit_x */
2742 li v0, BILINEAR_INTERPOLATION_RANGE
2743 li v1, 0x07e007e0
2744 li s8, 0x001f001f
2745
2746 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2747 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2748 0:
2749 andi t4, s2, 0xffff /* t4 = (short)vx */
2750 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
2751 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
2752
2753 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
2754 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
2755 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
2756 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
2757
2758 sra t9, s2, 16
2759 sll t9, t9, 1
2760 addiu t8, t9, 2
2761 lhx t0, t9(a1) /* t0 = tl */
2762 lhx t1, t8(a1) /* t1 = tr */
2763 andi t1, t1, 0xffff
2764 addiu a3, a3, -1
2765 lhx t2, t9(a2) /* t2 = bl */
2766 lhx t3, t8(a2) /* t3 = br */
2767 andi t3, t3, 0xffff
2768
2769 CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
2770 CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
2771 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
2772
2773 addu s2, s2, s3 /* vx += unit_x; */
2774 sw t0, 0(a0)
2775 bnez a3, 0b
2776 addiu a0, a0, 4
2777
2778 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
2779 1:
2780 j ra
2781 nop
2782
2783 END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
2784
2785 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
2786 /*
2787 * a0 - *dst
2788 * a1 - *src_top
2789 * a2 - *src_bottom
2790 * a3 - w
2791 * 16(sp) - wt
2792 * 20(sp) - wb
2793 * 24(sp) - vx
2794 * 28(sp) - unit_x
2795 */
2796
2797 beqz a3, 1f
2798 nop
2799
2800 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
2801
2802 lw s0, 44(sp) /* s0 = wt */
2803 lw s1, 48(sp) /* s1 = wb */
2804 lw s2, 52(sp) /* s2 = vx */
2805 lw s3, 56(sp) /* s3 = unit_x */
2806 li v0, BILINEAR_INTERPOLATION_RANGE
2807 li v1, 0x07e007e0
2808 li s8, 0x001f001f
2809
2810 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2811 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2812 0:
2813 andi t4, s2, 0xffff /* t4 = (short)vx */
2814 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
2815 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
2816
2817 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
2818 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
2819 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
2820 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
2821
2822 sra t9, s2, 16
2823 sll t9, t9, 1
2824 addiu t8, t9, 2
2825 lhx t0, t9(a1) /* t0 = tl */
2826 lhx t1, t8(a1) /* t1 = tr */
2827 andi t1, t1, 0xffff
2828 addiu a3, a3, -1
2829 lhx t2, t9(a2) /* t2 = bl */
2830 lhx t3, t8(a2) /* t3 = br */
2831 andi t3, t3, 0xffff
2832
2833 CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
2834 CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
2835 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
2836 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
2837
2838 addu s2, s2, s3 /* vx += unit_x; */
2839 sh t1, 0(a0)
2840 bnez a3, 0b
2841 addiu a0, a0, 2
2842
2843 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
2844 1:
2845 j ra
2846 nop
2847
2848 END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
2849
2850 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
2851 /*
2852 * a0 - *dst
2853 * a1 - *src_top
2854 * a2 - *src_bottom
2855 * a3 - w
2856 * 16(sp) - wt
2857 * 20(sp) - wb
2858 * 24(sp) - vx
2859 * 28(sp) - unit_x
2860 */
2861
2862 beqz a3, 1f
2863 nop
2864
2865 SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
2866
2867 lw s0, 40(sp) /* s0 = wt */
2868 lw s1, 44(sp) /* s1 = wb */
2869 lw s2, 48(sp) /* s2 = vx */
2870 lw s3, 52(sp) /* s3 = unit_x */
2871 li v0, BILINEAR_INTERPOLATION_RANGE
2872 li s8, 0x00ff00ff
2873
2874 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2875 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2876 0:
2877 andi t4, s2, 0xffff /* t4 = (short)vx */
2878 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
2879 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
2880
2881 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
2882 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
2883 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
2884 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
2885
2886 sra t9, s2, 16
2887 sll t9, t9, 2
2888 addiu t8, t9, 4
2889 lwx t0, t9(a1) /* t0 = tl */
2890 lwx t1, t8(a1) /* t1 = tr */
2891 addiu a3, a3, -1
2892 lwx t2, t9(a2) /* t2 = bl */
2893 lwx t3, t8(a2) /* t3 = br */
2894
2895 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
2896 lw t1, 0(a0) /* t1 = dest */
2897 OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6
2898
2899 addu s2, s2, s3 /* vx += unit_x; */
2900 sw t2, 0(a0)
2901 bnez a3, 0b
2902 addiu a0, a0, 4
2903
2904 RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
2905 1:
2906 j ra
2907 nop
2908
2909 END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
2910
2911 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
2912 /*
2913 * a0 - *dst
2914 * a1 - *src_top
2915 * a2 - *src_bottom
2916 * a3 - w
2917 * 16(sp) - wt
2918 * 20(sp) - wb
2919 * 24(sp) - vx
2920 * 28(sp) - unit_x
2921 */
2922
2923 beqz a3, 1f
2924 nop
2925
2926 SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
2927
2928 lw s0, 36(sp) /* s0 = wt */
2929 lw s1, 40(sp) /* s1 = wb */
2930 lw s2, 44(sp) /* s2 = vx */
2931 lw s3, 48(sp) /* s3 = unit_x */
2932 li v0, BILINEAR_INTERPOLATION_RANGE
2933
2934 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2935 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2936 0:
2937 andi t4, s2, 0xffff /* t4 = (short)vx */
2938 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
2939 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
2940
2941 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
2942 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
2943 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
2944 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
2945
2946 sra t9, s2, 16
2947 sll t9, t9, 2
2948 addiu t8, t9, 4
2949 lwx t0, t9(a1) /* t0 = tl */
2950 lwx t1, t8(a1) /* t1 = tr */
2951 addiu a3, a3, -1
2952 lwx t2, t9(a2) /* t2 = bl */
2953 lwx t3, t8(a2) /* t3 = br */
2954
2955 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
2956 lw t1, 0(a0)
2957 addu_s.qb t2, t0, t1
2958
2959 addu s2, s2, s3 /* vx += unit_x; */
2960 sw t2, 0(a0)
2961 bnez a3, 0b
2962 addiu a0, a0, 4
2963
2964 RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
2965 1:
2966 j ra
2967 nop
2968
2969 END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
2970
2971 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
2972 /*
2973 * a0 - *dst
2974 * a1 - *mask
2975 * a2 - *src_top
2976 * a3 - *src_bottom
2977 * 16(sp) - wt
2978 * 20(sp) - wb
2979 * 24(sp) - vx
2980 * 28(sp) - unit_x
2981 * 32(sp) - w
2982 */
2983
2984 lw v1, 32(sp)
2985 beqz v1, 1f
2986 nop
2987
2988 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
2989
2990 lw s0, 44(sp) /* s0 = wt */
2991 lw s1, 48(sp) /* s1 = wb */
2992 lw s2, 52(sp) /* s2 = vx */
2993 lw s3, 56(sp) /* s3 = unit_x */
2994 li v0, BILINEAR_INTERPOLATION_RANGE
2995 li s8, 0x00ff00ff
2996
2997 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2998 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
2999 0:
3000 andi t4, s2, 0xffff /* t4 = (short)vx */
3001 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3002 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
3003
3004 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
3005 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
3006 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
3007 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
3008
3009 sra t9, s2, 16
3010 sll t9, t9, 2
3011 addiu t8, t9, 4
3012 lwx t0, t9(a2) /* t0 = tl */
3013 lwx t1, t8(a2) /* t1 = tr */
3014 addiu v1, v1, -1
3015 lwx t2, t9(a3) /* t2 = bl */
3016 lwx t3, t8(a3) /* t3 = br */
3017
3018 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3019 lbu t1, 0(a1) /* t1 = mask */
3020 addiu a1, a1, 1
3021 MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
3022
3023 addu s2, s2, s3 /* vx += unit_x; */
3024 sw t0, 0(a0)
3025 bnez v1, 0b
3026 addiu a0, a0, 4
3027
3028 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3029 1:
3030 j ra
3031 nop
3032
3033 END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
3034
3035 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
3036 /*
3037 * a0 - *dst
3038 * a1 - *mask
3039 * a2 - *src_top
3040 * a3 - *src_bottom
3041 * 16(sp) - wt
3042 * 20(sp) - wb
3043 * 24(sp) - vx
3044 * 28(sp) - unit_x
3045 * 32(sp) - w
3046 */
3047
3048 lw v1, 32(sp)
3049 beqz v1, 1f
3050 nop
3051
3052 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3053
3054 lw s0, 44(sp) /* s0 = wt */
3055 lw s1, 48(sp) /* s1 = wb */
3056 lw s2, 52(sp) /* s2 = vx */
3057 lw s3, 56(sp) /* s3 = unit_x */
3058 li v0, BILINEAR_INTERPOLATION_RANGE
3059 li s8, 0x00ff00ff
3060
3061 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3062 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3063 0:
3064 andi t4, s2, 0xffff /* t4 = (short)vx */
3065 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3066 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
3067
3068 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
3069 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
3070 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
3071 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
3072
3073 sra t9, s2, 16
3074 sll t9, t9, 2
3075 addiu t8, t9, 4
3076 lwx t0, t9(a2) /* t0 = tl */
3077 lwx t1, t8(a2) /* t1 = tr */
3078 addiu v1, v1, -1
3079 lwx t2, t9(a3) /* t2 = bl */
3080 lwx t3, t8(a3) /* t3 = br */
3081
3082 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3083 lbu t1, 0(a1) /* t1 = mask */
3084 addiu a1, a1, 1
3085 MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
3086 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
3087
3088 addu s2, s2, s3 /* vx += unit_x; */
3089 sh t1, 0(a0)
3090 bnez v1, 0b
3091 addiu a0, a0, 2
3092
3093 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3094 1:
3095 j ra
3096 nop
3097
3098 END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
3099
3100 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
3101 /*
3102 * a0 - *dst
3103 * a1 - *mask
3104 * a2 - *src_top
3105 * a3 - *src_bottom
3106 * 16(sp) - wt
3107 * 20(sp) - wb
3108 * 24(sp) - vx
3109 * 28(sp) - unit_x
3110 * 32(sp) - w
3111 */
3112
3113 lw t0, 32(sp)
3114 beqz t0, 1f
3115 nop
3116
3117 SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
3118
3119 lw s0, 48(sp) /* s0 = wt */
3120 lw s1, 52(sp) /* s1 = wb */
3121 lw s2, 56(sp) /* s2 = vx */
3122 lw s3, 60(sp) /* s3 = unit_x */
3123 lw ra, 64(sp) /* ra = w */
3124 li v0, 0x00ff00ff
3125 li v1, 0x07e007e0
3126 li s8, 0x001f001f
3127
3128 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3129 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3130 0:
3131 andi t4, s2, 0xffff /* t4 = (short)vx */
3132 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3133 li t5, BILINEAR_INTERPOLATION_RANGE
3134 subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */
3135
3136 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
3137 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
3138 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
3139 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
3140
3141 sra t9, s2, 16
3142 sll t9, t9, 1
3143 addiu t8, t9, 2
3144 lhx t0, t9(a2) /* t0 = tl */
3145 lhx t1, t8(a2) /* t1 = tr */
3146 andi t1, t1, 0xffff
3147 addiu ra, ra, -1
3148 lhx t2, t9(a3) /* t2 = bl */
3149 lhx t3, t8(a3) /* t3 = br */
3150 andi t3, t3, 0xffff
3151
3152 CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
3153 CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
3154 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3155 lbu t1, 0(a1) /* t1 = mask */
3156 addiu a1, a1, 1
3157 MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
3158
3159 addu s2, s2, s3 /* vx += unit_x; */
3160 sw t0, 0(a0)
3161 bnez ra, 0b
3162 addiu a0, a0, 4
3163
3164 RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
3165 1:
3166 j ra
3167 nop
3168
3169 END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
3170
3171 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
3172 /*
3173 * a0 - *dst
3174 * a1 - *mask
3175 * a2 - *src_top
3176 * a3 - *src_bottom
3177 * 16(sp) - wt
3178 * 20(sp) - wb
3179 * 24(sp) - vx
3180 * 28(sp) - unit_x
3181 * 32(sp) - w
3182 */
3183
3184 lw t0, 32(sp)
3185 beqz t0, 1f
3186 nop
3187
3188 SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
3189
3190 lw s0, 48(sp) /* s0 = wt */
3191 lw s1, 52(sp) /* s1 = wb */
3192 lw s2, 56(sp) /* s2 = vx */
3193 lw s3, 60(sp) /* s3 = unit_x */
3194 lw ra, 64(sp) /* ra = w */
3195 li v0, 0x00ff00ff
3196 li v1, 0x07e007e0
3197 li s8, 0x001f001f
3198
3199 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3200 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3201 0:
3202 andi t4, s2, 0xffff /* t4 = (short)vx */
3203 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3204 li t5, BILINEAR_INTERPOLATION_RANGE
3205 subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */
3206
3207 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
3208 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
3209 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
3210 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
3211
3212 sra t9, s2, 16
3213 sll t9, t9, 1
3214 addiu t8, t9, 2
3215 lhx t0, t9(a2) /* t0 = tl */
3216 lhx t1, t8(a2) /* t1 = tr */
3217 andi t1, t1, 0xffff
3218 addiu ra, ra, -1
3219 lhx t2, t9(a3) /* t2 = bl */
3220 lhx t3, t8(a3) /* t3 = br */
3221 andi t3, t3, 0xffff
3222
3223 CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
3224 CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
3225 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3226 lbu t1, 0(a1) /* t1 = mask */
3227 addiu a1, a1, 1
3228 MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
3229 CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
3230
3231 addu s2, s2, s3 /* vx += unit_x; */
3232 sh t1, 0(a0)
3233 bnez ra, 0b
3234 addiu a0, a0, 2
3235
3236 RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
3237 1:
3238 j ra
3239 nop
3240
3241 END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
3242
3243 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
3244 /*
3245 * a0 - dst (a8r8g8b8)
3246 * a1 - mask (a8)
3247 * a2 - src_top (a8r8g8b8)
3248 * a3 - src_bottom (a8r8g8b8)
3249 * 16(sp) - wt
3250 * 20(sp) - wb
3251 * 24(sp) - vx
3252 * 28(sp) - unit_x
3253 * 32(sp) - w
3254 */
3255
3256 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3257
3258 lw v1, 60(sp) /* v1 = w(sp + 32 + 28 save regs stack offset)*/
3259 beqz v1, 1f
3260 nop
3261
3262 lw s0, 44(sp) /* s0 = wt */
3263 lw s1, 48(sp) /* s1 = wb */
3264 lw s2, 52(sp) /* s2 = vx */
3265 lw s3, 56(sp) /* s3 = unit_x */
3266 li v0, BILINEAR_INTERPOLATION_RANGE
3267 li s8, 0x00ff00ff
3268
3269 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3270 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3271
3272 0:
3273 andi t4, s2, 0xffff /* t4 = (short)vx */
3274 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3275 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
3276
3277 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
3278 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
3279 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
3280 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
3281
3282 sra t9, s2, 16
3283 sll t9, t9, 2
3284 addiu t8, t9, 4
3285 lwx t0, t9(a2) /* t0 = tl */
3286 lwx t1, t8(a2) /* t1 = tr */
3287 addiu v1, v1, -1
3288 lwx t2, t9(a3) /* t2 = bl */
3289 lwx t3, t8(a3) /* t3 = br */
3290
3291 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \
3292 t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3293 lbu t1, 0(a1) /* t1 = mask */
3294 lw t2, 0(a0) /* t2 = dst */
3295 addiu a1, a1, 1
3296 OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6
3297
3298 addu s2, s2, s3 /* vx += unit_x; */
3299 sw t0, 0(a0)
3300 bnez v1, 0b
3301 addiu a0, a0, 4
3302
3303 1:
3304 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3305 j ra
3306 nop
3307
3308 END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
3309
3310 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
3311 /*
3312 * a0 - *dst
3313 * a1 - *mask
3314 * a2 - *src_top
3315 * a3 - *src_bottom
3316 * 16(sp) - wt
3317 * 20(sp) - wb
3318 * 24(sp) - vx
3319 * 28(sp) - unit_x
3320 * 32(sp) - w
3321 */
3322
3323 lw v1, 32(sp)
3324 beqz v1, 1f
3325 nop
3326
3327 SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3328
3329 lw s0, 44(sp) /* s0 = wt */
3330 lw s1, 48(sp) /* s1 = wb */
3331 lw s2, 52(sp) /* s2 = vx */
3332 lw s3, 56(sp) /* s3 = unit_x */
3333 li v0, BILINEAR_INTERPOLATION_RANGE
3334 li s8, 0x00ff00ff
3335
3336 sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3337 sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
3338 0:
3339 andi t4, s2, 0xffff /* t4 = (short)vx */
3340 srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
3341 subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
3342
3343 mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
3344 mul s5, s0, t4 /* s5 = wt*(vx>>8) */
3345 mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
3346 mul s7, s1, t4 /* s7 = wb*(vx>>8) */
3347
3348 sra t9, s2, 16
3349 sll t9, t9, 2
3350 addiu t8, t9, 4
3351 lwx t0, t9(a2) /* t0 = tl */
3352 lwx t1, t8(a2) /* t1 = tr */
3353 addiu v1, v1, -1
3354 lwx t2, t9(a3) /* t2 = bl */
3355 lwx t3, t8(a3) /* t3 = br */
3356
3357 BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
3358 lbu t1, 0(a1) /* t1 = mask */
3359 lw t2, 0(a0) /* t2 = dst */
3360 addiu a1, a1, 1
3361 MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5
3362
3363 addu s2, s2, s3 /* vx += unit_x; */
3364 sw t0, 0(a0)
3365 bnez v1, 0b
3366 addiu a0, a0, 4
3367
3368 RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
3369 1:
3370 j ra
3371 nop
3372
3373 END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)

mercurial