gfx/cairo/libpixman/src/pixman-mips-memcpy-asm.S

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright (c) 2012
michael@0 3 * MIPS Technologies, Inc., California.
michael@0 4 *
michael@0 5 * Redistribution and use in source and binary forms, with or without
michael@0 6 * modification, are permitted provided that the following conditions
michael@0 7 * are met:
michael@0 8 * 1. Redistributions of source code must retain the above copyright
michael@0 9 * notice, this list of conditions and the following disclaimer.
michael@0 10 * 2. Redistributions in binary form must reproduce the above copyright
michael@0 11 * notice, this list of conditions and the following disclaimer in the
michael@0 12 * documentation and/or other materials provided with the distribution.
michael@0 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
michael@0 14 * contributors may be used to endorse or promote products derived from
michael@0 15 * this software without specific prior written permission.
michael@0 16 *
michael@0 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
michael@0 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
michael@0 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
michael@0 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
michael@0 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
michael@0 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
michael@0 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
michael@0 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
michael@0 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
michael@0 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
michael@0 27 * SUCH DAMAGE.
michael@0 28 */
michael@0 29
michael@0 30 #include "pixman-mips-dspr2-asm.h"
michael@0 31
michael@0 32 /*
michael@0 33 * This routine could be optimized for MIPS64. The current code only
michael@0 34 * uses MIPS32 instructions.
michael@0 35 */
michael@0 36
michael@0 37 #ifdef EB
michael@0 38 # define LWHI lwl /* high part is left in big-endian */
michael@0 39 # define SWHI swl /* high part is left in big-endian */
michael@0 40 # define LWLO lwr /* low part is right in big-endian */
michael@0 41 # define SWLO swr /* low part is right in big-endian */
michael@0 42 #else
michael@0 43 # define LWHI lwr /* high part is right in little-endian */
michael@0 44 # define SWHI swr /* high part is right in little-endian */
michael@0 45 # define LWLO lwl /* low part is left in big-endian */
michael@0 46 # define SWLO swl /* low part is left in big-endian */
michael@0 47 #endif
michael@0 48
michael@0 49 LEAF_MIPS32R2(pixman_mips_fast_memcpy)
michael@0 50
michael@0 51 slti AT, a2, 8
michael@0 52 bne AT, zero, $last8
michael@0 53 move v0, a0 /* memcpy returns the dst pointer */
michael@0 54
michael@0 55 /* Test if the src and dst are word-aligned, or can be made word-aligned */
michael@0 56 xor t8, a1, a0
michael@0 57 andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */
michael@0 58
michael@0 59 bne t8, zero, $unaligned
michael@0 60 negu a3, a0
michael@0 61
michael@0 62 andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */
michael@0 63 beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */
michael@0 64 subu a2, a2, a3 /* now a2 is the remining bytes count */
michael@0 65
michael@0 66 LWHI t8, 0(a1)
michael@0 67 addu a1, a1, a3
michael@0 68 SWHI t8, 0(a0)
michael@0 69 addu a0, a0, a3
michael@0 70
michael@0 71 /* Now the dst/src are mutually word-aligned with word-aligned addresses */
michael@0 72 $chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */
michael@0 73 /* t8 is the byte count after 64-byte chunks */
michael@0 74
michael@0 75 beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */
michael@0 76 /* There will be at most 1 32-byte chunk after it */
michael@0 77 subu a3, a2, t8 /* subtract from a2 the reminder */
michael@0 78 /* Here a3 counts bytes in 16w chunks */
michael@0 79 addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */
michael@0 80
michael@0 81 addu t0, a0, a2 /* t0 is the "past the end" address */
michael@0 82
michael@0 83 /*
michael@0 84 * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past
michael@0 85 * the "t0-32" address
michael@0 86 * This means: for x=128 the last "safe" a0 address is "t0-160"
michael@0 87 * Alternatively, for x=64 the last "safe" a0 address is "t0-96"
michael@0 88 * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit
michael@0 89 */
michael@0 90 subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */
michael@0 91
michael@0 92 pref 0, 0(a1) /* bring the first line of src, addr 0 */
michael@0 93 pref 0, 32(a1) /* bring the second line of src, addr 32 */
michael@0 94 pref 0, 64(a1) /* bring the third line of src, addr 64 */
michael@0 95 pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */
michael@0 96 /* In case the a0 > t9 don't use "pref 30" at all */
michael@0 97 sgtu v1, a0, t9
michael@0 98 bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */
michael@0 99 nop
michael@0 100 /* otherwise, start with using pref30 */
michael@0 101 pref 30, 64(a0)
michael@0 102 $loop16w:
michael@0 103 pref 0, 96(a1)
michael@0 104 lw t0, 0(a1)
michael@0 105 bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */
michael@0 106 lw t1, 4(a1)
michael@0 107 pref 30, 96(a0) /* continue setting up the dest, addr 96 */
michael@0 108 $skip_pref30_96:
michael@0 109 lw t2, 8(a1)
michael@0 110 lw t3, 12(a1)
michael@0 111 lw t4, 16(a1)
michael@0 112 lw t5, 20(a1)
michael@0 113 lw t6, 24(a1)
michael@0 114 lw t7, 28(a1)
michael@0 115 pref 0, 128(a1) /* bring the next lines of src, addr 128 */
michael@0 116
michael@0 117 sw t0, 0(a0)
michael@0 118 sw t1, 4(a0)
michael@0 119 sw t2, 8(a0)
michael@0 120 sw t3, 12(a0)
michael@0 121 sw t4, 16(a0)
michael@0 122 sw t5, 20(a0)
michael@0 123 sw t6, 24(a0)
michael@0 124 sw t7, 28(a0)
michael@0 125
michael@0 126 lw t0, 32(a1)
michael@0 127 bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */
michael@0 128 lw t1, 36(a1)
michael@0 129 pref 30, 128(a0) /* continue setting up the dest, addr 128 */
michael@0 130 $skip_pref30_128:
michael@0 131 lw t2, 40(a1)
michael@0 132 lw t3, 44(a1)
michael@0 133 lw t4, 48(a1)
michael@0 134 lw t5, 52(a1)
michael@0 135 lw t6, 56(a1)
michael@0 136 lw t7, 60(a1)
michael@0 137 pref 0, 160(a1) /* bring the next lines of src, addr 160 */
michael@0 138
michael@0 139 sw t0, 32(a0)
michael@0 140 sw t1, 36(a0)
michael@0 141 sw t2, 40(a0)
michael@0 142 sw t3, 44(a0)
michael@0 143 sw t4, 48(a0)
michael@0 144 sw t5, 52(a0)
michael@0 145 sw t6, 56(a0)
michael@0 146 sw t7, 60(a0)
michael@0 147
michael@0 148 addiu a0, a0, 64 /* adding 64 to dest */
michael@0 149 sgtu v1, a0, t9
michael@0 150 bne a0, a3, $loop16w
michael@0 151 addiu a1, a1, 64 /* adding 64 to src */
michael@0 152 move a2, t8
michael@0 153
michael@0 154 /* Here we have src and dest word-aligned but less than 64-bytes to go */
michael@0 155
michael@0 156 $chk8w:
michael@0 157 pref 0, 0x0(a1)
michael@0 158 andi t8, a2, 0x1f /* is there a 32-byte chunk? */
michael@0 159 /* the t8 is the reminder count past 32-bytes */
michael@0 160 beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */
michael@0 161 nop
michael@0 162
michael@0 163 lw t0, 0(a1)
michael@0 164 lw t1, 4(a1)
michael@0 165 lw t2, 8(a1)
michael@0 166 lw t3, 12(a1)
michael@0 167 lw t4, 16(a1)
michael@0 168 lw t5, 20(a1)
michael@0 169 lw t6, 24(a1)
michael@0 170 lw t7, 28(a1)
michael@0 171 addiu a1, a1, 32
michael@0 172
michael@0 173 sw t0, 0(a0)
michael@0 174 sw t1, 4(a0)
michael@0 175 sw t2, 8(a0)
michael@0 176 sw t3, 12(a0)
michael@0 177 sw t4, 16(a0)
michael@0 178 sw t5, 20(a0)
michael@0 179 sw t6, 24(a0)
michael@0 180 sw t7, 28(a0)
michael@0 181 addiu a0, a0, 32
michael@0 182
michael@0 183 $chk1w:
michael@0 184 andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */
michael@0 185 beq a2, t8, $last8
michael@0 186 subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */
michael@0 187 addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */
michael@0 188
michael@0 189 /* copying in words (4-byte chunks) */
michael@0 190 $wordCopy_loop:
michael@0 191 lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */
michael@0 192 addiu a1, a1, 4
michael@0 193 addiu a0, a0, 4
michael@0 194 bne a0, a3, $wordCopy_loop
michael@0 195 sw t3, -4(a0)
michael@0 196
michael@0 197 /* For the last (<8) bytes */
michael@0 198 $last8:
michael@0 199 blez a2, leave
michael@0 200 addu a3, a0, a2 /* a3 is the last dst address */
michael@0 201 $last8loop:
michael@0 202 lb v1, 0(a1)
michael@0 203 addiu a1, a1, 1
michael@0 204 addiu a0, a0, 1
michael@0 205 bne a0, a3, $last8loop
michael@0 206 sb v1, -1(a0)
michael@0 207
michael@0 208 leave: j ra
michael@0 209 nop
michael@0 210
michael@0 211 /*
michael@0 212 * UNALIGNED case
michael@0 213 */
michael@0 214
michael@0 215 $unaligned:
michael@0 216 /* got here with a3="negu a0" */
michael@0 217 andi a3, a3, 0x3 /* test if the a0 is word aligned */
michael@0 218 beqz a3, $ua_chk16w
michael@0 219 subu a2, a2, a3 /* bytes left after initial a3 bytes */
michael@0 220
michael@0 221 LWHI v1, 0(a1)
michael@0 222 LWLO v1, 3(a1)
michael@0 223 addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */
michael@0 224 SWHI v1, 0(a0)
michael@0 225 addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */
michael@0 226
michael@0 227 $ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */
michael@0 228 /* t8 is the byte count after 64-byte chunks */
michael@0 229 beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */
michael@0 230 /* There will be at most 1 32-byte chunk after it */
michael@0 231 subu a3, a2, t8 /* subtract from a2 the reminder */
michael@0 232 /* Here a3 counts bytes in 16w chunks */
michael@0 233 addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */
michael@0 234
michael@0 235 addu t0, a0, a2 /* t0 is the "past the end" address */
michael@0 236
michael@0 237 subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */
michael@0 238
michael@0 239 pref 0, 0(a1) /* bring the first line of src, addr 0 */
michael@0 240 pref 0, 32(a1) /* bring the second line of src, addr 32 */
michael@0 241 pref 0, 64(a1) /* bring the third line of src, addr 64 */
michael@0 242 pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */
michael@0 243 /* In case the a0 > t9 don't use "pref 30" at all */
michael@0 244 sgtu v1, a0, t9
michael@0 245 bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */
michael@0 246 nop
michael@0 247 /* otherwise, start with using pref30 */
michael@0 248 pref 30, 64(a0)
michael@0 249 $ua_loop16w:
michael@0 250 pref 0, 96(a1)
michael@0 251 LWHI t0, 0(a1)
michael@0 252 LWLO t0, 3(a1)
michael@0 253 LWHI t1, 4(a1)
michael@0 254 bgtz v1, $ua_skip_pref30_96
michael@0 255 LWLO t1, 7(a1)
michael@0 256 pref 30, 96(a0) /* continue setting up the dest, addr 96 */
michael@0 257 $ua_skip_pref30_96:
michael@0 258 LWHI t2, 8(a1)
michael@0 259 LWLO t2, 11(a1)
michael@0 260 LWHI t3, 12(a1)
michael@0 261 LWLO t3, 15(a1)
michael@0 262 LWHI t4, 16(a1)
michael@0 263 LWLO t4, 19(a1)
michael@0 264 LWHI t5, 20(a1)
michael@0 265 LWLO t5, 23(a1)
michael@0 266 LWHI t6, 24(a1)
michael@0 267 LWLO t6, 27(a1)
michael@0 268 LWHI t7, 28(a1)
michael@0 269 LWLO t7, 31(a1)
michael@0 270 pref 0, 128(a1) /* bring the next lines of src, addr 128 */
michael@0 271
michael@0 272 sw t0, 0(a0)
michael@0 273 sw t1, 4(a0)
michael@0 274 sw t2, 8(a0)
michael@0 275 sw t3, 12(a0)
michael@0 276 sw t4, 16(a0)
michael@0 277 sw t5, 20(a0)
michael@0 278 sw t6, 24(a0)
michael@0 279 sw t7, 28(a0)
michael@0 280
michael@0 281 LWHI t0, 32(a1)
michael@0 282 LWLO t0, 35(a1)
michael@0 283 LWHI t1, 36(a1)
michael@0 284 bgtz v1, $ua_skip_pref30_128
michael@0 285 LWLO t1, 39(a1)
michael@0 286 pref 30, 128(a0) /* continue setting up the dest, addr 128 */
michael@0 287 $ua_skip_pref30_128:
michael@0 288 LWHI t2, 40(a1)
michael@0 289 LWLO t2, 43(a1)
michael@0 290 LWHI t3, 44(a1)
michael@0 291 LWLO t3, 47(a1)
michael@0 292 LWHI t4, 48(a1)
michael@0 293 LWLO t4, 51(a1)
michael@0 294 LWHI t5, 52(a1)
michael@0 295 LWLO t5, 55(a1)
michael@0 296 LWHI t6, 56(a1)
michael@0 297 LWLO t6, 59(a1)
michael@0 298 LWHI t7, 60(a1)
michael@0 299 LWLO t7, 63(a1)
michael@0 300 pref 0, 160(a1) /* bring the next lines of src, addr 160 */
michael@0 301
michael@0 302 sw t0, 32(a0)
michael@0 303 sw t1, 36(a0)
michael@0 304 sw t2, 40(a0)
michael@0 305 sw t3, 44(a0)
michael@0 306 sw t4, 48(a0)
michael@0 307 sw t5, 52(a0)
michael@0 308 sw t6, 56(a0)
michael@0 309 sw t7, 60(a0)
michael@0 310
michael@0 311 addiu a0, a0, 64 /* adding 64 to dest */
michael@0 312 sgtu v1, a0, t9
michael@0 313 bne a0, a3, $ua_loop16w
michael@0 314 addiu a1, a1, 64 /* adding 64 to src */
michael@0 315 move a2, t8
michael@0 316
michael@0 317 /* Here we have src and dest word-aligned but less than 64-bytes to go */
michael@0 318
michael@0 319 $ua_chk8w:
michael@0 320 pref 0, 0x0(a1)
michael@0 321 andi t8, a2, 0x1f /* is there a 32-byte chunk? */
michael@0 322 /* the t8 is the reminder count */
michael@0 323 beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */
michael@0 324
michael@0 325 LWHI t0, 0(a1)
michael@0 326 LWLO t0, 3(a1)
michael@0 327 LWHI t1, 4(a1)
michael@0 328 LWLO t1, 7(a1)
michael@0 329 LWHI t2, 8(a1)
michael@0 330 LWLO t2, 11(a1)
michael@0 331 LWHI t3, 12(a1)
michael@0 332 LWLO t3, 15(a1)
michael@0 333 LWHI t4, 16(a1)
michael@0 334 LWLO t4, 19(a1)
michael@0 335 LWHI t5, 20(a1)
michael@0 336 LWLO t5, 23(a1)
michael@0 337 LWHI t6, 24(a1)
michael@0 338 LWLO t6, 27(a1)
michael@0 339 LWHI t7, 28(a1)
michael@0 340 LWLO t7, 31(a1)
michael@0 341 addiu a1, a1, 32
michael@0 342
michael@0 343 sw t0, 0(a0)
michael@0 344 sw t1, 4(a0)
michael@0 345 sw t2, 8(a0)
michael@0 346 sw t3, 12(a0)
michael@0 347 sw t4, 16(a0)
michael@0 348 sw t5, 20(a0)
michael@0 349 sw t6, 24(a0)
michael@0 350 sw t7, 28(a0)
michael@0 351 addiu a0, a0, 32
michael@0 352
michael@0 353 $ua_chk1w:
michael@0 354 andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */
michael@0 355 beq a2, t8, $ua_smallCopy
michael@0 356 subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */
michael@0 357 addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */
michael@0 358
michael@0 359 /* copying in words (4-byte chunks) */
michael@0 360 $ua_wordCopy_loop:
michael@0 361 LWHI v1, 0(a1)
michael@0 362 LWLO v1, 3(a1)
michael@0 363 addiu a1, a1, 4
michael@0 364 addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */
michael@0 365 bne a0, a3, $ua_wordCopy_loop
michael@0 366 sw v1, -4(a0)
michael@0 367
michael@0 368 /* Now less than 4 bytes (value in a2) left to copy */
michael@0 369 $ua_smallCopy:
michael@0 370 beqz a2, leave
michael@0 371 addu a3, a0, a2 /* a3 is the last dst address */
michael@0 372 $ua_smallCopy_loop:
michael@0 373 lb v1, 0(a1)
michael@0 374 addiu a1, a1, 1
michael@0 375 addiu a0, a0, 1
michael@0 376 bne a0, a3, $ua_smallCopy_loop
michael@0 377 sb v1, -1(a0)
michael@0 378
michael@0 379 j ra
michael@0 380 nop
michael@0 381
michael@0 382 END(pixman_mips_fast_memcpy)

mercurial