gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  * Copyright © 2008 Mozilla Corporation
     3  * Copyright © 2010 Nokia Corporation
     4  *
     5  * Permission to use, copy, modify, distribute, and sell this software and its
     6  * documentation for any purpose is hereby granted without fee, provided that
     7  * the above copyright notice appear in all copies and that both that
     8  * copyright notice and this permission notice appear in supporting
     9  * documentation, and that the name of Mozilla Corporation not be used in
    10  * advertising or publicity pertaining to distribution of the software without
    11  * specific, written prior permission.  Mozilla Corporation makes no
    12  * representations about the suitability of this software for any purpose.  It
    13  * is provided "as is" without express or implied warranty.
    14  *
    15  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
    16  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
    18  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
    20  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
    21  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
    22  * SOFTWARE.
    23  *
    24  * Author:  Jeff Muizelaar (jeff@infidigm.net)
    25  *
    26  */
    28 /* Prevent the stack from becoming executable */
    29 #if defined(__linux__) && defined(__ELF__)
    30 .section .note.GNU-stack,"",%progbits
    31 #endif
    33 	.text
    34 	.arch armv6
    35 	.object_arch armv4
    36 	.arm
    37 	.altmacro
    38 	.p2align 2
    40 /* Supplementary macro for setting function attributes */
    41 .macro pixman_asm_function fname
    42 	.func fname
    43 	.global fname
    44 #ifdef __ELF__
    45 	.hidden fname
    46 	.type fname, %function
    47 #endif
    48 fname:
    49 .endm
    51 /*
    52  * Note: This code is only using armv5te instructions (not even armv6),
    53  *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
    54  *       be split into a few variants, tuned for each microarchitecture.
    55  *
    56  * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
    57  * have efficient write combining), it needs to be changed to use 16-byte
    58  * aligned writes using STM instruction.
    59  *
    60  * Nearest scanline scaler macro template uses the following arguments:
    61  *  fname                     - name of the function to generate
    62  *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
    63  *  t                         - type suffix for LDR/STR instructions
    64  *  prefetch_distance         - prefetch in the source image by that many
    65  *                              pixels ahead
    66  *  prefetch_braking_distance - stop prefetching when that many pixels are
    67  *                              remaining before the end of scanline
    68  */
    70 .macro generate_nearest_scanline_func fname, bpp_shift, t,      \
    71                                       prefetch_distance,        \
    72                                       prefetch_braking_distance
    74 pixman_asm_function fname
    75 	W		.req	r0
    76 	DST		.req	r1
    77 	SRC		.req	r2
    78 	VX		.req	r3
    79 	UNIT_X		.req	ip
    80 	TMP1		.req	r4
    81 	TMP2		.req	r5
    82 	VXMASK		.req	r6
    83 	PF_OFFS		.req	r7
    84 	SRC_WIDTH_FIXED	.req	r8
    86 	ldr	UNIT_X, [sp]
    87 	push	{r4, r5, r6, r7, r8, r10}
    88 	mvn	VXMASK, #((1 << bpp_shift) - 1)
    89 	ldr	SRC_WIDTH_FIXED, [sp, #28]
    91 	/* define helper macro */
    92 	.macro	scale_2_pixels
    93 		ldr&t	TMP1, [SRC, TMP1]
    94 		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
    95 		adds	VX, VX, UNIT_X
    96 		str&t	TMP1, [DST], #(1 << bpp_shift)
    97 9:		subpls	VX, VX, SRC_WIDTH_FIXED
    98 		bpl	9b
   100 		ldr&t	TMP2, [SRC, TMP2]
   101 		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
   102 		adds	VX, VX, UNIT_X
   103 		str&t	TMP2, [DST], #(1 << bpp_shift)
   104 9:		subpls	VX, VX, SRC_WIDTH_FIXED
   105 		bpl	9b
   106 	.endm
   108 	/* now do the scaling */
   109 	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
   110 	adds	VX, VX, UNIT_X
   111 9:	subpls	VX, VX, SRC_WIDTH_FIXED
   112 	bpl	9b
   113 	subs	W, W, #(8 + prefetch_braking_distance)
   114 	blt	2f
   115 	/* calculate prefetch offset */
   116 	mov	PF_OFFS, #prefetch_distance
   117 	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
   118 1:	/* main loop, process 8 pixels per iteration with prefetch */
   119 	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
   120 	add	PF_OFFS, UNIT_X, lsl #3
   121 	scale_2_pixels
   122 	scale_2_pixels
   123 	scale_2_pixels
   124 	scale_2_pixels
   125 	subs	W, W, #8
   126 	bge	1b
   127 2:
   128 	subs	W, W, #(4 - 8 - prefetch_braking_distance)
   129 	blt	2f
   130 1:	/* process the remaining pixels */
   131 	scale_2_pixels
   132 	scale_2_pixels
   133 	subs	W, W, #4
   134 	bge	1b
   135 2:
   136 	tst	W, #2
   137 	beq	2f
   138 	scale_2_pixels
   139 2:
   140 	tst	W, #1
   141 	ldrne&t	TMP1, [SRC, TMP1]
   142 	strne&t	TMP1, [DST]
   143 	/* cleanup helper macro */
   144 	.purgem	scale_2_pixels
   145 	.unreq	DST
   146 	.unreq	SRC
   147 	.unreq	W
   148 	.unreq	VX
   149 	.unreq	UNIT_X
   150 	.unreq	TMP1
   151 	.unreq	TMP2
   152 	.unreq	VXMASK
   153 	.unreq	PF_OFFS
   154 	.unreq  SRC_WIDTH_FIXED
   155 	/* return */
   156 	pop	{r4, r5, r6, r7, r8, r10}
   157 	bx	lr
   158 .endfunc
   159 .endm
   161 generate_nearest_scanline_func \
   162     pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
   164 generate_nearest_scanline_func \
   165     pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32

mercurial