gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,165 @@
     1.4 +/*
     1.5 + * Copyright © 2008 Mozilla Corporation
     1.6 + * Copyright © 2010 Nokia Corporation
     1.7 + *
     1.8 + * Permission to use, copy, modify, distribute, and sell this software and its
     1.9 + * documentation for any purpose is hereby granted without fee, provided that
    1.10 + * the above copyright notice appear in all copies and that both that
    1.11 + * copyright notice and this permission notice appear in supporting
    1.12 + * documentation, and that the name of Mozilla Corporation not be used in
    1.13 + * advertising or publicity pertaining to distribution of the software without
    1.14 + * specific, written prior permission.  Mozilla Corporation makes no
    1.15 + * representations about the suitability of this software for any purpose.  It
    1.16 + * is provided "as is" without express or implied warranty.
    1.17 + *
    1.18 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
    1.19 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
    1.20 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
    1.21 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    1.22 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
    1.23 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
    1.24 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
    1.25 + * SOFTWARE.
    1.26 + *
    1.27 + * Author:  Jeff Muizelaar (jeff@infidigm.net)
    1.28 + *
    1.29 + */
    1.30 +
    1.31 +/* Prevent the stack from becoming executable */
    1.32 +#if defined(__linux__) && defined(__ELF__)
    1.33 +.section .note.GNU-stack,"",%progbits
    1.34 +#endif
    1.35 +
    1.36 +	.text
    1.37 +	.arch armv6
    1.38 +	.object_arch armv4
    1.39 +	.arm
    1.40 +	.altmacro
    1.41 +	.p2align 2
    1.42 +
    1.43 +/* Supplementary macro for setting function attributes */
    1.44 +.macro pixman_asm_function fname
    1.45 +	.func fname
    1.46 +	.global fname
    1.47 +#ifdef __ELF__
    1.48 +	.hidden fname
    1.49 +	.type fname, %function
    1.50 +#endif
    1.51 +fname:
    1.52 +.endm
    1.53 +
    1.54 +/*
    1.55 + * Note: This code is only using armv5te instructions (not even armv6),
    1.56 + *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
    1.57 + *       be split into a few variants, tuned for each microarchitecture.
    1.58 + *
    1.59 + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
    1.60 + * have efficient write combining), it needs to be changed to use 16-byte
    1.61 + * aligned writes using STM instruction.
    1.62 + *
    1.63 + * Nearest scanline scaler macro template uses the following arguments:
    1.64 + *  fname                     - name of the function to generate
    1.65 + *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
    1.66 + *  t                         - type suffix for LDR/STR instructions
    1.67 + *  prefetch_distance         - prefetch in the source image by that many
    1.68 + *                              pixels ahead
    1.69 + *  prefetch_braking_distance - stop prefetching when that many pixels are
    1.70 + *                              remaining before the end of scanline
    1.71 + */
    1.72 +
    1.73 +.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
    1.74 +                                      prefetch_distance,        \
    1.75 +                                      prefetch_braking_distance
    1.76 +
    1.77 +pixman_asm_function fname
    1.78 +	W		.req	r0
    1.79 +	DST		.req	r1
    1.80 +	SRC		.req	r2
    1.81 +	VX		.req	r3
    1.82 +	UNIT_X		.req	ip
    1.83 +	TMP1		.req	r4
    1.84 +	TMP2		.req	r5
    1.85 +	VXMASK		.req	r6
    1.86 +	PF_OFFS		.req	r7
    1.87 +	SRC_WIDTH_FIXED	.req	r8
    1.88 +
    1.89 +	ldr	UNIT_X, [sp]
    1.90 +	push	{r4, r5, r6, r7, r8, r10}
    1.91 +	mvn	VXMASK, #((1 << bpp_shift) - 1)
    1.92 +	ldr	SRC_WIDTH_FIXED, [sp, #28]
    1.93 +
    1.94 +	/* define helper macro */
    1.95 +	.macro	scale_2_pixels
    1.96 +		ldr&t	TMP1, [SRC, TMP1]
    1.97 +		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
    1.98 +		adds	VX, VX, UNIT_X
    1.99 +		str&t	TMP1, [DST], #(1 << bpp_shift)
   1.100 +9:		subpls	VX, VX, SRC_WIDTH_FIXED
   1.101 +		bpl	9b
   1.102 +
   1.103 +		ldr&t	TMP2, [SRC, TMP2]
   1.104 +		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
   1.105 +		adds	VX, VX, UNIT_X
   1.106 +		str&t	TMP2, [DST], #(1 << bpp_shift)
   1.107 +9:		subpls	VX, VX, SRC_WIDTH_FIXED
   1.108 +		bpl	9b
   1.109 +	.endm
   1.110 +
   1.111 +	/* now do the scaling */
   1.112 +	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
   1.113 +	adds	VX, VX, UNIT_X
   1.114 +9:	subpls	VX, VX, SRC_WIDTH_FIXED
   1.115 +	bpl	9b
   1.116 +	subs	W, W, #(8 + prefetch_braking_distance)
   1.117 +	blt	2f
   1.118 +	/* calculate prefetch offset */
   1.119 +	mov	PF_OFFS, #prefetch_distance
   1.120 +	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
   1.121 +1:	/* main loop, process 8 pixels per iteration with prefetch */
   1.122 +	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
   1.123 +	add	PF_OFFS, UNIT_X, lsl #3
   1.124 +	scale_2_pixels
   1.125 +	scale_2_pixels
   1.126 +	scale_2_pixels
   1.127 +	scale_2_pixels
   1.128 +	subs	W, W, #8
   1.129 +	bge	1b
   1.130 +2:
   1.131 +	subs	W, W, #(4 - 8 - prefetch_braking_distance)
   1.132 +	blt	2f
   1.133 +1:	/* process the remaining pixels */
   1.134 +	scale_2_pixels
   1.135 +	scale_2_pixels
   1.136 +	subs	W, W, #4
   1.137 +	bge	1b
   1.138 +2:
   1.139 +	tst	W, #2
   1.140 +	beq	2f
   1.141 +	scale_2_pixels
   1.142 +2:
   1.143 +	tst	W, #1
   1.144 +	ldrne&t	TMP1, [SRC, TMP1]
   1.145 +	strne&t	TMP1, [DST]
   1.146 +	/* cleanup helper macro */
   1.147 +	.purgem	scale_2_pixels
   1.148 +	.unreq	DST
   1.149 +	.unreq	SRC
   1.150 +	.unreq	W
   1.151 +	.unreq	VX
   1.152 +	.unreq	UNIT_X
   1.153 +	.unreq	TMP1
   1.154 +	.unreq	TMP2
   1.155 +	.unreq	VXMASK
   1.156 +	.unreq	PF_OFFS
   1.157 +	.unreq  SRC_WIDTH_FIXED
   1.158 +	/* return */
   1.159 +	pop	{r4, r5, r6, r7, r8, r10}
   1.160 +	bx	lr
   1.161 +.endfunc
   1.162 +.endm
   1.163 +
   1.164 +generate_nearest_scanline_func \
   1.165 +    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
   1.166 +
   1.167 +generate_nearest_scanline_func \
   1.168 +    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32

mercurial