1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,165 @@ 1.4 +/* 1.5 + * Copyright © 2008 Mozilla Corporation 1.6 + * Copyright © 2010 Nokia Corporation 1.7 + * 1.8 + * Permission to use, copy, modify, distribute, and sell this software and its 1.9 + * documentation for any purpose is hereby granted without fee, provided that 1.10 + * the above copyright notice appear in all copies and that both that 1.11 + * copyright notice and this permission notice appear in supporting 1.12 + * documentation, and that the name of Mozilla Corporation not be used in 1.13 + * advertising or publicity pertaining to distribution of the software without 1.14 + * specific, written prior permission. Mozilla Corporation makes no 1.15 + * representations about the suitability of this software for any purpose. It 1.16 + * is provided "as is" without express or implied warranty. 1.17 + * 1.18 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS 1.19 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 1.20 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 1.21 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1.22 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 1.23 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 1.24 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 1.25 + * SOFTWARE. 1.26 + * 1.27 + * Author: Jeff Muizelaar (jeff@infidigm.net) 1.28 + * 1.29 + */ 1.30 + 1.31 +/* Prevent the stack from becoming executable */ 1.32 +#if defined(__linux__) && defined(__ELF__) 1.33 +.section .note.GNU-stack,"",%progbits 1.34 +#endif 1.35 + 1.36 + .text 1.37 + .arch armv6 1.38 + .object_arch armv4 1.39 + .arm 1.40 + .altmacro 1.41 + .p2align 2 1.42 + 1.43 +/* Supplementary macro for setting function attributes */ 1.44 +.macro pixman_asm_function fname 1.45 + .func fname 1.46 + .global fname 1.47 +#ifdef __ELF__ 1.48 + .hidden fname 1.49 + .type fname, %function 1.50 +#endif 1.51 +fname: 1.52 +.endm 1.53 + 1.54 +/* 1.55 + * Note: This code is only using armv5te instructions (not even armv6), 1.56 + * but is scheduled for ARM Cortex-A8 pipeline. So it might need to 1.57 + * be split into a few variants, tuned for each microarchitecture. 1.58 + * 1.59 + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't 1.60 + * have efficient write combining), it needs to be changed to use 16-byte 1.61 + * aligned writes using STM instruction. 1.62 + * 1.63 + * Nearest scanline scaler macro template uses the following arguments: 1.64 + * fname - name of the function to generate 1.65 + * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes 1.66 + * t - type suffix for LDR/STR instructions 1.67 + * prefetch_distance - prefetch in the source image by that many 1.68 + * pixels ahead 1.69 + * prefetch_braking_distance - stop prefetching when that many pixels are 1.70 + * remaining before the end of scanline 1.71 + */ 1.72 + 1.73 +.macro generate_nearest_scanline_func fname, bpp_shift, t, \ 1.74 + prefetch_distance, \ 1.75 + prefetch_braking_distance 1.76 + 1.77 +pixman_asm_function fname 1.78 + W .req r0 1.79 + DST .req r1 1.80 + SRC .req r2 1.81 + VX .req r3 1.82 + UNIT_X .req ip 1.83 + TMP1 .req r4 1.84 + TMP2 .req r5 1.85 + VXMASK .req r6 1.86 + PF_OFFS .req r7 1.87 + SRC_WIDTH_FIXED .req r8 1.88 + 1.89 + ldr UNIT_X, [sp] 1.90 + push {r4, r5, r6, r7, r8, r10} 1.91 + mvn VXMASK, #((1 << bpp_shift) - 1) 1.92 + ldr SRC_WIDTH_FIXED, [sp, #28] 1.93 + 1.94 + /* define helper macro */ 1.95 + .macro scale_2_pixels 1.96 + ldr&t TMP1, [SRC, TMP1] 1.97 + and TMP2, VXMASK, VX, asr #(16 - bpp_shift) 1.98 + adds VX, VX, UNIT_X 1.99 + str&t TMP1, [DST], #(1 << bpp_shift) 1.100 +9: subpls VX, VX, SRC_WIDTH_FIXED 1.101 + bpl 9b 1.102 + 1.103 + ldr&t TMP2, [SRC, TMP2] 1.104 + and TMP1, VXMASK, VX, asr #(16 - bpp_shift) 1.105 + adds VX, VX, UNIT_X 1.106 + str&t TMP2, [DST], #(1 << bpp_shift) 1.107 +9: subpls VX, VX, SRC_WIDTH_FIXED 1.108 + bpl 9b 1.109 + .endm 1.110 + 1.111 + /* now do the scaling */ 1.112 + and TMP1, VXMASK, VX, asr #(16 - bpp_shift) 1.113 + adds VX, VX, UNIT_X 1.114 +9: subpls VX, VX, SRC_WIDTH_FIXED 1.115 + bpl 9b 1.116 + subs W, W, #(8 + prefetch_braking_distance) 1.117 + blt 2f 1.118 + /* calculate prefetch offset */ 1.119 + mov PF_OFFS, #prefetch_distance 1.120 + mla PF_OFFS, UNIT_X, PF_OFFS, VX 1.121 +1: /* main loop, process 8 pixels per iteration with prefetch */ 1.122 + pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] 1.123 + add PF_OFFS, UNIT_X, lsl #3 1.124 + scale_2_pixels 1.125 + scale_2_pixels 1.126 + scale_2_pixels 1.127 + scale_2_pixels 1.128 + subs W, W, #8 1.129 + bge 1b 1.130 +2: 1.131 + subs W, W, #(4 - 8 - prefetch_braking_distance) 1.132 + blt 2f 1.133 +1: /* process the remaining pixels */ 1.134 + scale_2_pixels 1.135 + scale_2_pixels 1.136 + subs W, W, #4 1.137 + bge 1b 1.138 +2: 1.139 + tst W, #2 1.140 + beq 2f 1.141 + scale_2_pixels 1.142 +2: 1.143 + tst W, #1 1.144 + ldrne&t TMP1, [SRC, TMP1] 1.145 + strne&t TMP1, [DST] 1.146 + /* cleanup helper macro */ 1.147 + .purgem scale_2_pixels 1.148 + .unreq DST 1.149 + .unreq SRC 1.150 + .unreq W 1.151 + .unreq VX 1.152 + .unreq UNIT_X 1.153 + .unreq TMP1 1.154 + .unreq TMP2 1.155 + .unreq VXMASK 1.156 + .unreq PF_OFFS 1.157 + .unreq SRC_WIDTH_FIXED 1.158 + /* return */ 1.159 + pop {r4, r5, r6, r7, r8, r10} 1.160 + bx lr 1.161 +.endfunc 1.162 +.endm 1.163 + 1.164 +generate_nearest_scanline_func \ 1.165 + pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 1.166 + 1.167 +generate_nearest_scanline_func \ 1.168 + pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32