gfx/cairo/libpixman/src/pixman-arm-simd-asm-scaled.S

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright © 2008 Mozilla Corporation
michael@0 3 * Copyright © 2010 Nokia Corporation
michael@0 4 *
michael@0 5 * Permission to use, copy, modify, distribute, and sell this software and its
michael@0 6 * documentation for any purpose is hereby granted without fee, provided that
michael@0 7 * the above copyright notice appear in all copies and that both that
michael@0 8 * copyright notice and this permission notice appear in supporting
michael@0 9 * documentation, and that the name of Mozilla Corporation not be used in
michael@0 10 * advertising or publicity pertaining to distribution of the software without
michael@0 11 * specific, written prior permission. Mozilla Corporation makes no
michael@0 12 * representations about the suitability of this software for any purpose. It
michael@0 13 * is provided "as is" without express or implied warranty.
michael@0 14 *
michael@0 15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
michael@0 16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
michael@0 17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
michael@0 18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
michael@0 19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
michael@0 20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
michael@0 21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
michael@0 22 * SOFTWARE.
michael@0 23 *
michael@0 24 * Author: Jeff Muizelaar (jeff@infidigm.net)
michael@0 25 *
michael@0 26 */
michael@0 27
michael@0 28 /* Prevent the stack from becoming executable */
michael@0 29 #if defined(__linux__) && defined(__ELF__)
michael@0 30 .section .note.GNU-stack,"",%progbits
michael@0 31 #endif
michael@0 32
michael@0 33 .text
michael@0 34 .arch armv6
michael@0 35 .object_arch armv4
michael@0 36 .arm
michael@0 37 .altmacro
michael@0 38 .p2align 2
michael@0 39
michael@0 40 /* Supplementary macro for setting function attributes */
michael@0 41 .macro pixman_asm_function fname
michael@0 42 .func fname
michael@0 43 .global fname
michael@0 44 #ifdef __ELF__
michael@0 45 .hidden fname
michael@0 46 .type fname, %function
michael@0 47 #endif
michael@0 48 fname:
michael@0 49 .endm
michael@0 50
michael@0 51 /*
michael@0 52 * Note: This code is only using armv5te instructions (not even armv6),
michael@0 53 * but is scheduled for ARM Cortex-A8 pipeline. So it might need to
michael@0 54 * be split into a few variants, tuned for each microarchitecture.
michael@0 55 *
michael@0 56 * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
michael@0 57 * have efficient write combining), it needs to be changed to use 16-byte
michael@0 58 * aligned writes using STM instruction.
michael@0 59 *
michael@0 60 * Nearest scanline scaler macro template uses the following arguments:
michael@0 61 * fname - name of the function to generate
michael@0 62 * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
michael@0 63 * t - type suffix for LDR/STR instructions
michael@0 64 * prefetch_distance - prefetch in the source image by that many
michael@0 65 * pixels ahead
michael@0 66 * prefetch_braking_distance - stop prefetching when that many pixels are
michael@0 67 * remaining before the end of scanline
michael@0 68 */
michael@0 69
michael@0 70 .macro generate_nearest_scanline_func fname, bpp_shift, t, \
michael@0 71 prefetch_distance, \
michael@0 72 prefetch_braking_distance
michael@0 73
michael@0 74 pixman_asm_function fname
michael@0 75 W .req r0
michael@0 76 DST .req r1
michael@0 77 SRC .req r2
michael@0 78 VX .req r3
michael@0 79 UNIT_X .req ip
michael@0 80 TMP1 .req r4
michael@0 81 TMP2 .req r5
michael@0 82 VXMASK .req r6
michael@0 83 PF_OFFS .req r7
michael@0 84 SRC_WIDTH_FIXED .req r8
michael@0 85
michael@0 86 ldr UNIT_X, [sp]
michael@0 87 push {r4, r5, r6, r7, r8, r10}
michael@0 88 mvn VXMASK, #((1 << bpp_shift) - 1)
michael@0 89 ldr SRC_WIDTH_FIXED, [sp, #28]
michael@0 90
michael@0 91 /* define helper macro */
michael@0 92 .macro scale_2_pixels
michael@0 93 ldr&t TMP1, [SRC, TMP1]
michael@0 94 and TMP2, VXMASK, VX, asr #(16 - bpp_shift)
michael@0 95 adds VX, VX, UNIT_X
michael@0 96 str&t TMP1, [DST], #(1 << bpp_shift)
michael@0 97 9: subpls VX, VX, SRC_WIDTH_FIXED
michael@0 98 bpl 9b
michael@0 99
michael@0 100 ldr&t TMP2, [SRC, TMP2]
michael@0 101 and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
michael@0 102 adds VX, VX, UNIT_X
michael@0 103 str&t TMP2, [DST], #(1 << bpp_shift)
michael@0 104 9: subpls VX, VX, SRC_WIDTH_FIXED
michael@0 105 bpl 9b
michael@0 106 .endm
michael@0 107
michael@0 108 /* now do the scaling */
michael@0 109 and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
michael@0 110 adds VX, VX, UNIT_X
michael@0 111 9: subpls VX, VX, SRC_WIDTH_FIXED
michael@0 112 bpl 9b
michael@0 113 subs W, W, #(8 + prefetch_braking_distance)
michael@0 114 blt 2f
michael@0 115 /* calculate prefetch offset */
michael@0 116 mov PF_OFFS, #prefetch_distance
michael@0 117 mla PF_OFFS, UNIT_X, PF_OFFS, VX
michael@0 118 1: /* main loop, process 8 pixels per iteration with prefetch */
michael@0 119 pld [SRC, PF_OFFS, asr #(16 - bpp_shift)]
michael@0 120 add PF_OFFS, UNIT_X, lsl #3
michael@0 121 scale_2_pixels
michael@0 122 scale_2_pixels
michael@0 123 scale_2_pixels
michael@0 124 scale_2_pixels
michael@0 125 subs W, W, #8
michael@0 126 bge 1b
michael@0 127 2:
michael@0 128 subs W, W, #(4 - 8 - prefetch_braking_distance)
michael@0 129 blt 2f
michael@0 130 1: /* process the remaining pixels */
michael@0 131 scale_2_pixels
michael@0 132 scale_2_pixels
michael@0 133 subs W, W, #4
michael@0 134 bge 1b
michael@0 135 2:
michael@0 136 tst W, #2
michael@0 137 beq 2f
michael@0 138 scale_2_pixels
michael@0 139 2:
michael@0 140 tst W, #1
michael@0 141 ldrne&t TMP1, [SRC, TMP1]
michael@0 142 strne&t TMP1, [DST]
michael@0 143 /* cleanup helper macro */
michael@0 144 .purgem scale_2_pixels
michael@0 145 .unreq DST
michael@0 146 .unreq SRC
michael@0 147 .unreq W
michael@0 148 .unreq VX
michael@0 149 .unreq UNIT_X
michael@0 150 .unreq TMP1
michael@0 151 .unreq TMP2
michael@0 152 .unreq VXMASK
michael@0 153 .unreq PF_OFFS
michael@0 154 .unreq SRC_WIDTH_FIXED
michael@0 155 /* return */
michael@0 156 pop {r4, r5, r6, r7, r8, r10}
michael@0 157 bx lr
michael@0 158 .endfunc
michael@0 159 .endm
michael@0 160
michael@0 161 generate_nearest_scanline_func \
michael@0 162 pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
michael@0 163
michael@0 164 generate_nearest_scanline_func \
michael@0 165 pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32

mercurial