1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,681 @@ 1.4 +/* 1.5 + * Copyright (c) 2012 1.6 + * MIPS Technologies, Inc., California. 1.7 + * 1.8 + * Redistribution and use in source and binary forms, with or without 1.9 + * modification, are permitted provided that the following conditions 1.10 + * are met: 1.11 + * 1. Redistributions of source code must retain the above copyright 1.12 + * notice, this list of conditions and the following disclaimer. 1.13 + * 2. Redistributions in binary form must reproduce the above copyright 1.14 + * notice, this list of conditions and the following disclaimer in the 1.15 + * documentation and/or other materials provided with the distribution. 1.16 + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 1.17 + * contributors may be used to endorse or promote products derived from 1.18 + * this software without specific prior written permission. 1.19 + * 1.20 + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 1.21 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.22 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1.23 + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 1.24 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1.25 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1.26 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 1.27 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 1.28 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 1.29 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 1.30 + * SUCH DAMAGE. 1.31 + * 1.32 + * Author: Nemanja Lukic (nlukic@mips.com) 1.33 + */ 1.34 + 1.35 +#ifndef PIXMAN_MIPS_DSPR2_ASM_H 1.36 +#define PIXMAN_MIPS_DSPR2_ASM_H 1.37 + 1.38 +#define zero $0 1.39 +#define AT $1 1.40 +#define v0 $2 1.41 +#define v1 $3 1.42 +#define a0 $4 1.43 +#define a1 $5 1.44 +#define a2 $6 1.45 +#define a3 $7 1.46 +#define t0 $8 1.47 +#define t1 $9 1.48 +#define t2 $10 1.49 +#define t3 $11 1.50 +#define t4 $12 1.51 +#define t5 $13 1.52 +#define t6 $14 1.53 +#define t7 $15 1.54 +#define s0 $16 1.55 +#define s1 $17 1.56 +#define s2 $18 1.57 +#define s3 $19 1.58 +#define s4 $20 1.59 +#define s5 $21 1.60 +#define s6 $22 1.61 +#define s7 $23 1.62 +#define t8 $24 1.63 +#define t9 $25 1.64 +#define k0 $26 1.65 +#define k1 $27 1.66 +#define gp $28 1.67 +#define sp $29 1.68 +#define fp $30 1.69 +#define s8 $30 1.70 +#define ra $31 1.71 + 1.72 +/* 1.73 + * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 1.74 + */ 1.75 +#define LEAF_MIPS32R2(symbol) \ 1.76 + .globl symbol; \ 1.77 + .align 2; \ 1.78 + .type symbol, @function; \ 1.79 + .ent symbol, 0; \ 1.80 +symbol: .frame sp, 0, ra; \ 1.81 + .set push; \ 1.82 + .set arch=mips32r2; \ 1.83 + .set noreorder; \ 1.84 + .set noat; 1.85 + 1.86 +/* 1.87 + * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2 1.88 + */ 1.89 +#define LEAF_MIPS_DSPR2(symbol) \ 1.90 +LEAF_MIPS32R2(symbol) \ 1.91 + .set dspr2; 1.92 + 1.93 +/* 1.94 + * END - mark end of function 1.95 + */ 1.96 +#define END(function) \ 1.97 + .set pop; \ 1.98 + .end function; \ 1.99 + .size function,.-function 1.100 + 1.101 +/* 1.102 + * Checks if stack offset is big enough for storing/restoring regs_num 1.103 + * number of register to/from stack. Stack offset must be greater than 1.104 + * or equal to the number of bytes needed for storing registers (regs_num*4). 1.105 + * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is 1.106 + * preserved for input arguments of the functions, already stored in a0-a3), 1.107 + * stack size can be further optimized by utilizing this space. 1.108 + */ 1.109 +.macro CHECK_STACK_OFFSET regs_num, stack_offset 1.110 +.if \stack_offset < \regs_num * 4 - 16 1.111 +.error "Stack offset too small." 1.112 +.endif 1.113 +.endm 1.114 + 1.115 +/* 1.116 + * Saves set of registers on stack. Maximum number of registers that 1.117 + * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). 1.118 + * Stack offset is number of bytes that are added to stack pointer (sp) 1.119 + * before registers are pushed in order to provide enough space on stack 1.120 + * (offset must be multiple of 4, and must be big enough, as described by 1.121 + * CHECK_STACK_OFFSET macro). This macro is intended to be used in 1.122 + * combination with RESTORE_REGS_FROM_STACK macro. Example: 1.123 + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 1.124 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 1.125 + */ 1.126 +.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ 1.127 + r2 = 0, r3 = 0, r4 = 0, \ 1.128 + r5 = 0, r6 = 0, r7 = 0, \ 1.129 + r8 = 0, r9 = 0, r10 = 0, \ 1.130 + r11 = 0, r12 = 0, r13 = 0, \ 1.131 + r14 = 0 1.132 + .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) 1.133 + .error "Stack offset must be pozitive and multiple of 4." 1.134 + .endif 1.135 + .if \stack_offset != 0 1.136 + addiu sp, sp, -\stack_offset 1.137 + .endif 1.138 + sw \r1, 0(sp) 1.139 + .if \r2 != 0 1.140 + sw \r2, 4(sp) 1.141 + .endif 1.142 + .if \r3 != 0 1.143 + sw \r3, 8(sp) 1.144 + .endif 1.145 + .if \r4 != 0 1.146 + sw \r4, 12(sp) 1.147 + .endif 1.148 + .if \r5 != 0 1.149 + CHECK_STACK_OFFSET 5, \stack_offset 1.150 + sw \r5, 16(sp) 1.151 + .endif 1.152 + .if \r6 != 0 1.153 + CHECK_STACK_OFFSET 6, \stack_offset 1.154 + sw \r6, 20(sp) 1.155 + .endif 1.156 + .if \r7 != 0 1.157 + CHECK_STACK_OFFSET 7, \stack_offset 1.158 + sw \r7, 24(sp) 1.159 + .endif 1.160 + .if \r8 != 0 1.161 + CHECK_STACK_OFFSET 8, \stack_offset 1.162 + sw \r8, 28(sp) 1.163 + .endif 1.164 + .if \r9 != 0 1.165 + CHECK_STACK_OFFSET 9, \stack_offset 1.166 + sw \r9, 32(sp) 1.167 + .endif 1.168 + .if \r10 != 0 1.169 + CHECK_STACK_OFFSET 10, \stack_offset 1.170 + sw \r10, 36(sp) 1.171 + .endif 1.172 + .if \r11 != 0 1.173 + CHECK_STACK_OFFSET 11, \stack_offset 1.174 + sw \r11, 40(sp) 1.175 + .endif 1.176 + .if \r12 != 0 1.177 + CHECK_STACK_OFFSET 12, \stack_offset 1.178 + sw \r12, 44(sp) 1.179 + .endif 1.180 + .if \r13 != 0 1.181 + CHECK_STACK_OFFSET 13, \stack_offset 1.182 + sw \r13, 48(sp) 1.183 + .endif 1.184 + .if \r14 != 0 1.185 + CHECK_STACK_OFFSET 14, \stack_offset 1.186 + sw \r14, 52(sp) 1.187 + .endif 1.188 +.endm 1.189 + 1.190 +/* 1.191 + * Restores set of registers from stack. Maximum number of registers that 1.192 + * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). 1.193 + * Stack offset is number of bytes that are added to stack pointer (sp) 1.194 + * after registers are restored (offset must be multiple of 4, and must 1.195 + * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is 1.196 + * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. 1.197 + * Example: 1.198 + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 1.199 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 1.200 + */ 1.201 +.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ 1.202 + r2 = 0, r3 = 0, r4 = 0, \ 1.203 + r5 = 0, r6 = 0, r7 = 0, \ 1.204 + r8 = 0, r9 = 0, r10 = 0, \ 1.205 + r11 = 0, r12 = 0, r13 = 0, \ 1.206 + r14 = 0 1.207 + .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) 1.208 + .error "Stack offset must be pozitive and multiple of 4." 1.209 + .endif 1.210 + lw \r1, 0(sp) 1.211 + .if \r2 != 0 1.212 + lw \r2, 4(sp) 1.213 + .endif 1.214 + .if \r3 != 0 1.215 + lw \r3, 8(sp) 1.216 + .endif 1.217 + .if \r4 != 0 1.218 + lw \r4, 12(sp) 1.219 + .endif 1.220 + .if \r5 != 0 1.221 + CHECK_STACK_OFFSET 5, \stack_offset 1.222 + lw \r5, 16(sp) 1.223 + .endif 1.224 + .if \r6 != 0 1.225 + CHECK_STACK_OFFSET 6, \stack_offset 1.226 + lw \r6, 20(sp) 1.227 + .endif 1.228 + .if \r7 != 0 1.229 + CHECK_STACK_OFFSET 7, \stack_offset 1.230 + lw \r7, 24(sp) 1.231 + .endif 1.232 + .if \r8 != 0 1.233 + CHECK_STACK_OFFSET 8, \stack_offset 1.234 + lw \r8, 28(sp) 1.235 + .endif 1.236 + .if \r9 != 0 1.237 + CHECK_STACK_OFFSET 9, \stack_offset 1.238 + lw \r9, 32(sp) 1.239 + .endif 1.240 + .if \r10 != 0 1.241 + CHECK_STACK_OFFSET 10, \stack_offset 1.242 + lw \r10, 36(sp) 1.243 + .endif 1.244 + .if \r11 != 0 1.245 + CHECK_STACK_OFFSET 11, \stack_offset 1.246 + lw \r11, 40(sp) 1.247 + .endif 1.248 + .if \r12 != 0 1.249 + CHECK_STACK_OFFSET 12, \stack_offset 1.250 + lw \r12, 44(sp) 1.251 + .endif 1.252 + .if \r13 != 0 1.253 + CHECK_STACK_OFFSET 13, \stack_offset 1.254 + lw \r13, 48(sp) 1.255 + .endif 1.256 + .if \r14 != 0 1.257 + CHECK_STACK_OFFSET 14, \stack_offset 1.258 + lw \r14, 52(sp) 1.259 + .endif 1.260 + .if \stack_offset != 0 1.261 + addiu sp, sp, \stack_offset 1.262 + .endif 1.263 +.endm 1.264 + 1.265 +/* 1.266 + * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel 1.267 + * returned in (out_8888) register. Requires two temporary registers 1.268 + * (scratch1 and scratch2). 1.269 + */ 1.270 +.macro CONVERT_1x0565_TO_1x8888 in_565, \ 1.271 + out_8888, \ 1.272 + scratch1, scratch2 1.273 + lui \out_8888, 0xff00 1.274 + sll \scratch1, \in_565, 0x3 1.275 + andi \scratch2, \scratch1, 0xff 1.276 + ext \scratch1, \in_565, 0x2, 0x3 1.277 + or \scratch1, \scratch2, \scratch1 1.278 + or \out_8888, \out_8888, \scratch1 1.279 + 1.280 + sll \scratch1, \in_565, 0x5 1.281 + andi \scratch1, \scratch1, 0xfc00 1.282 + srl \scratch2, \in_565, 0x1 1.283 + andi \scratch2, \scratch2, 0x300 1.284 + or \scratch2, \scratch1, \scratch2 1.285 + or \out_8888, \out_8888, \scratch2 1.286 + 1.287 + andi \scratch1, \in_565, 0xf800 1.288 + srl \scratch2, \scratch1, 0x5 1.289 + andi \scratch2, \scratch2, 0xff00 1.290 + or \scratch1, \scratch1, \scratch2 1.291 + sll \scratch1, \scratch1, 0x8 1.292 + or \out_8888, \out_8888, \scratch1 1.293 +.endm 1.294 + 1.295 +/* 1.296 + * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels 1.297 + * returned in (out1_8888 and out2_8888) registers. Requires four scratch 1.298 + * registers (scratch1 ... scratch4). It also requires maskG and maskB for 1.299 + * color component extractions. These masks must have following values: 1.300 + * li maskG, 0x07e007e0 1.301 + * li maskB, 0x001F001F 1.302 + */ 1.303 +.macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565, \ 1.304 + out1_8888, out2_8888, \ 1.305 + maskG, maskB, \ 1.306 + scratch1, scratch2, scratch3, scratch4 1.307 + sll \scratch1, \in1_565, 16 1.308 + or \scratch1, \scratch1, \in2_565 1.309 + lui \out2_8888, 0xff00 1.310 + ori \out2_8888, \out2_8888, 0xff00 1.311 + shrl.ph \scratch2, \scratch1, 11 1.312 + and \scratch3, \scratch1, \maskG 1.313 + shra.ph \scratch4, \scratch2, 2 1.314 + shll.ph \scratch2, \scratch2, 3 1.315 + shll.ph \scratch3, \scratch3, 5 1.316 + or \scratch2, \scratch2, \scratch4 1.317 + shrl.qb \scratch4, \scratch3, 6 1.318 + or \out2_8888, \out2_8888, \scratch2 1.319 + or \scratch3, \scratch3, \scratch4 1.320 + and \scratch1, \scratch1, \maskB 1.321 + shll.ph \scratch2, \scratch1, 3 1.322 + shra.ph \scratch4, \scratch1, 2 1.323 + or \scratch2, \scratch2, \scratch4 1.324 + or \scratch3, \scratch2, \scratch3 1.325 + precrq.ph.w \out1_8888, \out2_8888, \scratch3 1.326 + precr_sra.ph.w \out2_8888, \scratch3, 0 1.327 +.endm 1.328 + 1.329 +/* 1.330 + * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel 1.331 + * returned in (out_565) register. Requires two temporary registers 1.332 + * (scratch1 and scratch2). 1.333 + */ 1.334 +.macro CONVERT_1x8888_TO_1x0565 in_8888, \ 1.335 + out_565, \ 1.336 + scratch1, scratch2 1.337 + ext \out_565, \in_8888, 0x3, 0x5 1.338 + srl \scratch1, \in_8888, 0x5 1.339 + andi \scratch1, \scratch1, 0x07e0 1.340 + srl \scratch2, \in_8888, 0x8 1.341 + andi \scratch2, \scratch2, 0xf800 1.342 + or \out_565, \out_565, \scratch1 1.343 + or \out_565, \out_565, \scratch2 1.344 +.endm 1.345 + 1.346 +/* 1.347 + * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5 1.348 + * pixels returned in (out1_565 and out2_565) registers. Requires two temporary 1.349 + * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB 1.350 + * for color component extractions. These masks must have following values: 1.351 + * li maskR, 0xf800f800 1.352 + * li maskG, 0x07e007e0 1.353 + * li maskB, 0x001F001F 1.354 + * Value of input register in2_8888 is lost. 1.355 + */ 1.356 +.macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888, \ 1.357 + out1_565, out2_565, \ 1.358 + maskR, maskG, maskB, \ 1.359 + scratch1, scratch2 1.360 + precrq.ph.w \scratch1, \in2_8888, \in1_8888 1.361 + precr_sra.ph.w \in2_8888, \in1_8888, 0 1.362 + shll.ph \scratch1, \scratch1, 8 1.363 + srl \in2_8888, \in2_8888, 3 1.364 + and \scratch2, \in2_8888, \maskB 1.365 + and \scratch1, \scratch1, \maskR 1.366 + srl \in2_8888, \in2_8888, 2 1.367 + and \out2_565, \in2_8888, \maskG 1.368 + or \out2_565, \out2_565, \scratch2 1.369 + or \out1_565, \out2_565, \scratch1 1.370 + srl \out2_565, \out1_565, 16 1.371 +.endm 1.372 + 1.373 +/* 1.374 + * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed 1.375 + * for rounding process. maskLSR must have following value: 1.376 + * li maskLSR, 0x00ff00ff 1.377 + */ 1.378 +.macro MIPS_UN8x4_MUL_UN8 s_8888, \ 1.379 + m_8, \ 1.380 + d_8888, \ 1.381 + maskLSR, \ 1.382 + scratch1, scratch2, scratch3 1.383 + replv.ph \m_8, \m_8 /* 0 | M | 0 | M */ 1.384 + muleu_s.ph.qbl \scratch1, \s_8888, \m_8 /* A*M | R*M */ 1.385 + muleu_s.ph.qbr \scratch2, \s_8888, \m_8 /* G*M | B*M */ 1.386 + shra_r.ph \scratch3, \scratch1, 8 1.387 + shra_r.ph \d_8888, \scratch2, 8 1.388 + and \scratch3, \scratch3, \maskLSR /* 0 |A*M| 0 |R*M */ 1.389 + and \d_8888, \d_8888, \maskLSR /* 0 |G*M| 0 |B*M */ 1.390 + addq.ph \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */ 1.391 + addq.ph \scratch2, \scratch2, \d_8888 /* G*M+G*M | B*M+B*M */ 1.392 + shra_r.ph \scratch1, \scratch1, 8 1.393 + shra_r.ph \scratch2, \scratch2, 8 1.394 + precr.qb.ph \d_8888, \scratch1, \scratch2 1.395 +.endm 1.396 + 1.397 +/* 1.398 + * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR 1.399 + * needed for rounding process. maskLSR must have following value: 1.400 + * li maskLSR, 0x00ff00ff 1.401 + */ 1.402 +.macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \ 1.403 + s2_8888, \ 1.404 + m1_8, \ 1.405 + m2_8, \ 1.406 + d1_8888, \ 1.407 + d2_8888, \ 1.408 + maskLSR, \ 1.409 + scratch1, scratch2, scratch3, \ 1.410 + scratch4, scratch5, scratch6 1.411 + replv.ph \m1_8, \m1_8 /* 0 | M1 | 0 | M1 */ 1.412 + replv.ph \m2_8, \m2_8 /* 0 | M2 | 0 | M2 */ 1.413 + muleu_s.ph.qbl \scratch1, \s1_8888, \m1_8 /* A1*M1 | R1*M1 */ 1.414 + muleu_s.ph.qbr \scratch2, \s1_8888, \m1_8 /* G1*M1 | B1*M1 */ 1.415 + muleu_s.ph.qbl \scratch3, \s2_8888, \m2_8 /* A2*M2 | R2*M2 */ 1.416 + muleu_s.ph.qbr \scratch4, \s2_8888, \m2_8 /* G2*M2 | B2*M2 */ 1.417 + shra_r.ph \scratch5, \scratch1, 8 1.418 + shra_r.ph \d1_8888, \scratch2, 8 1.419 + shra_r.ph \scratch6, \scratch3, 8 1.420 + shra_r.ph \d2_8888, \scratch4, 8 1.421 + and \scratch5, \scratch5, \maskLSR /* 0 |A1*M1| 0 |R1*M1 */ 1.422 + and \d1_8888, \d1_8888, \maskLSR /* 0 |G1*M1| 0 |B1*M1 */ 1.423 + and \scratch6, \scratch6, \maskLSR /* 0 |A2*M2| 0 |R2*M2 */ 1.424 + and \d2_8888, \d2_8888, \maskLSR /* 0 |G2*M2| 0 |B2*M2 */ 1.425 + addq.ph \scratch1, \scratch1, \scratch5 1.426 + addq.ph \scratch2, \scratch2, \d1_8888 1.427 + addq.ph \scratch3, \scratch3, \scratch6 1.428 + addq.ph \scratch4, \scratch4, \d2_8888 1.429 + shra_r.ph \scratch1, \scratch1, 8 1.430 + shra_r.ph \scratch2, \scratch2, 8 1.431 + shra_r.ph \scratch3, \scratch3, 8 1.432 + shra_r.ph \scratch4, \scratch4, 8 1.433 + precr.qb.ph \d1_8888, \scratch1, \scratch2 1.434 + precr.qb.ph \d2_8888, \scratch3, \scratch4 1.435 +.endm 1.436 + 1.437 +/* 1.438 + * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR 1.439 + * needed for rounding process. maskLSR must have following value: 1.440 + * li maskLSR, 0x00ff00ff 1.441 + */ 1.442 +.macro MIPS_UN8x4_MUL_UN8x4 s_8888, \ 1.443 + m_8888, \ 1.444 + d_8888, \ 1.445 + maskLSR, \ 1.446 + scratch1, scratch2, scratch3, scratch4 1.447 + preceu.ph.qbl \scratch1, \m_8888 /* 0 | A | 0 | R */ 1.448 + preceu.ph.qbr \scratch2, \m_8888 /* 0 | G | 0 | B */ 1.449 + muleu_s.ph.qbl \scratch3, \s_8888, \scratch1 /* A*A | R*R */ 1.450 + muleu_s.ph.qbr \scratch4, \s_8888, \scratch2 /* G*G | B*B */ 1.451 + shra_r.ph \scratch1, \scratch3, 8 1.452 + shra_r.ph \scratch2, \scratch4, 8 1.453 + and \scratch1, \scratch1, \maskLSR /* 0 |A*A| 0 |R*R */ 1.454 + and \scratch2, \scratch2, \maskLSR /* 0 |G*G| 0 |B*B */ 1.455 + addq.ph \scratch1, \scratch1, \scratch3 1.456 + addq.ph \scratch2, \scratch2, \scratch4 1.457 + shra_r.ph \scratch1, \scratch1, 8 1.458 + shra_r.ph \scratch2, \scratch2, 8 1.459 + precr.qb.ph \d_8888, \scratch1, \scratch2 1.460 +.endm 1.461 + 1.462 +/* 1.463 + * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires 1.464 + * maskLSR needed for rounding process. maskLSR must have following value: 1.465 + * li maskLSR, 0x00ff00ff 1.466 + */ 1.467 + 1.468 +.macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888, \ 1.469 + s2_8888, \ 1.470 + m1_8888, \ 1.471 + m2_8888, \ 1.472 + d1_8888, \ 1.473 + d2_8888, \ 1.474 + maskLSR, \ 1.475 + scratch1, scratch2, scratch3, \ 1.476 + scratch4, scratch5, scratch6 1.477 + preceu.ph.qbl \scratch1, \m1_8888 /* 0 | A | 0 | R */ 1.478 + preceu.ph.qbr \scratch2, \m1_8888 /* 0 | G | 0 | B */ 1.479 + preceu.ph.qbl \scratch3, \m2_8888 /* 0 | A | 0 | R */ 1.480 + preceu.ph.qbr \scratch4, \m2_8888 /* 0 | G | 0 | B */ 1.481 + muleu_s.ph.qbl \scratch5, \s1_8888, \scratch1 /* A*A | R*R */ 1.482 + muleu_s.ph.qbr \scratch6, \s1_8888, \scratch2 /* G*G | B*B */ 1.483 + muleu_s.ph.qbl \scratch1, \s2_8888, \scratch3 /* A*A | R*R */ 1.484 + muleu_s.ph.qbr \scratch2, \s2_8888, \scratch4 /* G*G | B*B */ 1.485 + shra_r.ph \scratch3, \scratch5, 8 1.486 + shra_r.ph \scratch4, \scratch6, 8 1.487 + shra_r.ph \d1_8888, \scratch1, 8 1.488 + shra_r.ph \d2_8888, \scratch2, 8 1.489 + and \scratch3, \scratch3, \maskLSR /* 0 |A*A| 0 |R*R */ 1.490 + and \scratch4, \scratch4, \maskLSR /* 0 |G*G| 0 |B*B */ 1.491 + and \d1_8888, \d1_8888, \maskLSR /* 0 |A*A| 0 |R*R */ 1.492 + and \d2_8888, \d2_8888, \maskLSR /* 0 |G*G| 0 |B*B */ 1.493 + addq.ph \scratch3, \scratch3, \scratch5 1.494 + addq.ph \scratch4, \scratch4, \scratch6 1.495 + addq.ph \d1_8888, \d1_8888, \scratch1 1.496 + addq.ph \d2_8888, \d2_8888, \scratch2 1.497 + shra_r.ph \scratch3, \scratch3, 8 1.498 + shra_r.ph \scratch4, \scratch4, 8 1.499 + shra_r.ph \scratch5, \d1_8888, 8 1.500 + shra_r.ph \scratch6, \d2_8888, 8 1.501 + precr.qb.ph \d1_8888, \scratch3, \scratch4 1.502 + precr.qb.ph \d2_8888, \scratch5, \scratch6 1.503 +.endm 1.504 + 1.505 +/* 1.506 + * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 1.507 + * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR 1.508 + * needed for rounding process. maskLSR must have following value: 1.509 + * li maskLSR, 0x00ff00ff 1.510 + */ 1.511 +.macro OVER_8888_8_8888 s_8888, \ 1.512 + m_8, \ 1.513 + d_8888, \ 1.514 + out_8888, \ 1.515 + maskLSR, \ 1.516 + scratch1, scratch2, scratch3, scratch4 1.517 + MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ 1.518 + \scratch1, \maskLSR, \ 1.519 + \scratch2, \scratch3, \scratch4 1.520 + 1.521 + not \scratch2, \scratch1 1.522 + srl \scratch2, \scratch2, 24 1.523 + 1.524 + MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \ 1.525 + \d_8888, \maskLSR, \ 1.526 + \scratch3, \scratch4, \out_8888 1.527 + 1.528 + addu_s.qb \out_8888, \d_8888, \scratch1 1.529 +.endm 1.530 + 1.531 +/* 1.532 + * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two 1.533 + * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and 1.534 + * m2_8). It also requires maskLSR needed for rounding process. maskLSR must 1.535 + * have following value: 1.536 + * li maskLSR, 0x00ff00ff 1.537 + */ 1.538 +.macro OVER_2x8888_2x8_2x8888 s1_8888, \ 1.539 + s2_8888, \ 1.540 + m1_8, \ 1.541 + m2_8, \ 1.542 + d1_8888, \ 1.543 + d2_8888, \ 1.544 + out1_8888, \ 1.545 + out2_8888, \ 1.546 + maskLSR, \ 1.547 + scratch1, scratch2, scratch3, \ 1.548 + scratch4, scratch5, scratch6 1.549 + MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ 1.550 + \m1_8, \m2_8, \ 1.551 + \scratch1, \scratch2, \ 1.552 + \maskLSR, \ 1.553 + \scratch3, \scratch4, \out1_8888, \ 1.554 + \out2_8888, \scratch5, \scratch6 1.555 + 1.556 + not \scratch3, \scratch1 1.557 + srl \scratch3, \scratch3, 24 1.558 + not \scratch4, \scratch2 1.559 + srl \scratch4, \scratch4, 24 1.560 + 1.561 + MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ 1.562 + \scratch3, \scratch4, \ 1.563 + \d1_8888, \d2_8888, \ 1.564 + \maskLSR, \ 1.565 + \scratch5, \scratch6, \out1_8888, \ 1.566 + \out2_8888, \scratch3, \scratch4 1.567 + 1.568 + addu_s.qb \out1_8888, \d1_8888, \scratch1 1.569 + addu_s.qb \out2_8888, \d2_8888, \scratch2 1.570 +.endm 1.571 + 1.572 +/* 1.573 + * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 1.574 + * destination pixel (d_8888). It also requires maskLSR needed for rounding 1.575 + * process. maskLSR must have following value: 1.576 + * li maskLSR, 0x00ff00ff 1.577 + */ 1.578 +.macro OVER_8888_8888 s_8888, \ 1.579 + d_8888, \ 1.580 + out_8888, \ 1.581 + maskLSR, \ 1.582 + scratch1, scratch2, scratch3, scratch4 1.583 + not \scratch1, \s_8888 1.584 + srl \scratch1, \scratch1, 24 1.585 + 1.586 + MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \ 1.587 + \out_8888, \maskLSR, \ 1.588 + \scratch2, \scratch3, \scratch4 1.589 + 1.590 + addu_s.qb \out_8888, \out_8888, \s_8888 1.591 +.endm 1.592 + 1.593 +.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ 1.594 + m_8, \ 1.595 + d_8888, \ 1.596 + out_8888, \ 1.597 + maskLSR, \ 1.598 + scratch1, scratch2, scratch3 1.599 + MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ 1.600 + \out_8888, \maskLSR, \ 1.601 + \scratch1, \scratch2, \scratch3 1.602 + 1.603 + addu_s.qb \out_8888, \out_8888, \d_8888 1.604 +.endm 1.605 + 1.606 +.macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888, \ 1.607 + s2_8888, \ 1.608 + m1_8, \ 1.609 + m2_8, \ 1.610 + d1_8888, \ 1.611 + d2_8888, \ 1.612 + out1_8888, \ 1.613 + out2_8888, \ 1.614 + maskLSR, \ 1.615 + scratch1, scratch2, scratch3, \ 1.616 + scratch4, scratch5, scratch6 1.617 + MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ 1.618 + \m1_8, \m2_8, \ 1.619 + \out1_8888, \out2_8888, \ 1.620 + \maskLSR, \ 1.621 + \scratch1, \scratch2, \scratch3, \ 1.622 + \scratch4, \scratch5, \scratch6 1.623 + 1.624 + addu_s.qb \out1_8888, \out1_8888, \d1_8888 1.625 + addu_s.qb \out2_8888, \out2_8888, \d2_8888 1.626 +.endm 1.627 + 1.628 +.macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \ 1.629 + scratch1, scratch2, \ 1.630 + alpha, red, green, blue \ 1.631 + wt1, wt2, wb1, wb2 1.632 + andi \scratch1, \tl, 0xff 1.633 + andi \scratch2, \tr, 0xff 1.634 + andi \alpha, \bl, 0xff 1.635 + andi \red, \br, 0xff 1.636 + 1.637 + multu $ac0, \wt1, \scratch1 1.638 + maddu $ac0, \wt2, \scratch2 1.639 + maddu $ac0, \wb1, \alpha 1.640 + maddu $ac0, \wb2, \red 1.641 + 1.642 + ext \scratch1, \tl, 8, 8 1.643 + ext \scratch2, \tr, 8, 8 1.644 + ext \alpha, \bl, 8, 8 1.645 + ext \red, \br, 8, 8 1.646 + 1.647 + multu $ac1, \wt1, \scratch1 1.648 + maddu $ac1, \wt2, \scratch2 1.649 + maddu $ac1, \wb1, \alpha 1.650 + maddu $ac1, \wb2, \red 1.651 + 1.652 + ext \scratch1, \tl, 16, 8 1.653 + ext \scratch2, \tr, 16, 8 1.654 + ext \alpha, \bl, 16, 8 1.655 + ext \red, \br, 16, 8 1.656 + 1.657 + mflo \blue, $ac0 1.658 + 1.659 + multu $ac2, \wt1, \scratch1 1.660 + maddu $ac2, \wt2, \scratch2 1.661 + maddu $ac2, \wb1, \alpha 1.662 + maddu $ac2, \wb2, \red 1.663 + 1.664 + ext \scratch1, \tl, 24, 8 1.665 + ext \scratch2, \tr, 24, 8 1.666 + ext \alpha, \bl, 24, 8 1.667 + ext \red, \br, 24, 8 1.668 + 1.669 + mflo \green, $ac1 1.670 + 1.671 + multu $ac3, \wt1, \scratch1 1.672 + maddu $ac3, \wt2, \scratch2 1.673 + maddu $ac3, \wb1, \alpha 1.674 + maddu $ac3, \wb2, \red 1.675 + 1.676 + mflo \red, $ac2 1.677 + mflo \alpha, $ac3 1.678 + 1.679 + precr.qb.ph \alpha, \alpha, \red 1.680 + precr.qb.ph \scratch1, \green, \blue 1.681 + precrq.qb.ph \tl, \alpha, \scratch1 1.682 +.endm 1.683 + 1.684 +#endif //PIXMAN_MIPS_DSPR2_ASM_H