michael@0: /* michael@0: * Copyright © 2008 Mozilla Corporation michael@0: * Copyright © 2010 Nokia Corporation michael@0: * michael@0: * Permission to use, copy, modify, distribute, and sell this software and its michael@0: * documentation for any purpose is hereby granted without fee, provided that michael@0: * the above copyright notice appear in all copies and that both that michael@0: * copyright notice and this permission notice appear in supporting michael@0: * documentation, and that the name of Mozilla Corporation not be used in michael@0: * advertising or publicity pertaining to distribution of the software without michael@0: * specific, written prior permission. Mozilla Corporation makes no michael@0: * representations about the suitability of this software for any purpose. It michael@0: * is provided "as is" without express or implied warranty. michael@0: * michael@0: * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS michael@0: * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND michael@0: * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY michael@0: * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES michael@0: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN michael@0: * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING michael@0: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS michael@0: * SOFTWARE. michael@0: * michael@0: * Author: Jeff Muizelaar (jeff@infidigm.net) michael@0: * michael@0: */ michael@0: michael@0: /* Prevent the stack from becoming executable */ michael@0: #if defined(__linux__) && defined(__ELF__) michael@0: .section .note.GNU-stack,"",%progbits michael@0: #endif michael@0: michael@0: .text michael@0: .arch armv6 michael@0: .object_arch armv4 michael@0: .arm michael@0: .altmacro michael@0: .p2align 2 michael@0: michael@0: /* Supplementary macro for setting function attributes */ michael@0: .macro pixman_asm_function fname michael@0: .func fname michael@0: .global fname michael@0: #ifdef __ELF__ michael@0: .hidden fname michael@0: .type fname, %function michael@0: #endif michael@0: fname: michael@0: .endm michael@0: michael@0: /* michael@0: * Note: This code is only using armv5te instructions (not even armv6), michael@0: * but is scheduled for ARM Cortex-A8 pipeline. So it might need to michael@0: * be split into a few variants, tuned for each microarchitecture. michael@0: * michael@0: * TODO: In order to get good performance on ARM9/ARM11 cores (which don't michael@0: * have efficient write combining), it needs to be changed to use 16-byte michael@0: * aligned writes using STM instruction. michael@0: * michael@0: * Nearest scanline scaler macro template uses the following arguments: michael@0: * fname - name of the function to generate michael@0: * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes michael@0: * t - type suffix for LDR/STR instructions michael@0: * prefetch_distance - prefetch in the source image by that many michael@0: * pixels ahead michael@0: * prefetch_braking_distance - stop prefetching when that many pixels are michael@0: * remaining before the end of scanline michael@0: */ michael@0: michael@0: .macro generate_nearest_scanline_func fname, bpp_shift, t, \ michael@0: prefetch_distance, \ michael@0: prefetch_braking_distance michael@0: michael@0: pixman_asm_function fname michael@0: W .req r0 michael@0: DST .req r1 michael@0: SRC .req r2 michael@0: VX .req r3 michael@0: UNIT_X .req ip michael@0: TMP1 .req r4 michael@0: TMP2 .req r5 michael@0: VXMASK .req r6 michael@0: PF_OFFS .req r7 michael@0: SRC_WIDTH_FIXED .req r8 michael@0: michael@0: ldr UNIT_X, [sp] michael@0: push {r4, r5, r6, r7, r8, r10} michael@0: mvn VXMASK, #((1 << bpp_shift) - 1) michael@0: ldr SRC_WIDTH_FIXED, [sp, #28] michael@0: michael@0: /* define helper macro */ michael@0: .macro scale_2_pixels michael@0: ldr&t TMP1, [SRC, TMP1] michael@0: and TMP2, VXMASK, VX, asr #(16 - bpp_shift) michael@0: adds VX, VX, UNIT_X michael@0: str&t TMP1, [DST], #(1 << bpp_shift) michael@0: 9: subpls VX, VX, SRC_WIDTH_FIXED michael@0: bpl 9b michael@0: michael@0: ldr&t TMP2, [SRC, TMP2] michael@0: and TMP1, VXMASK, VX, asr #(16 - bpp_shift) michael@0: adds VX, VX, UNIT_X michael@0: str&t TMP2, [DST], #(1 << bpp_shift) michael@0: 9: subpls VX, VX, SRC_WIDTH_FIXED michael@0: bpl 9b michael@0: .endm michael@0: michael@0: /* now do the scaling */ michael@0: and TMP1, VXMASK, VX, asr #(16 - bpp_shift) michael@0: adds VX, VX, UNIT_X michael@0: 9: subpls VX, VX, SRC_WIDTH_FIXED michael@0: bpl 9b michael@0: subs W, W, #(8 + prefetch_braking_distance) michael@0: blt 2f michael@0: /* calculate prefetch offset */ michael@0: mov PF_OFFS, #prefetch_distance michael@0: mla PF_OFFS, UNIT_X, PF_OFFS, VX michael@0: 1: /* main loop, process 8 pixels per iteration with prefetch */ michael@0: pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] michael@0: add PF_OFFS, UNIT_X, lsl #3 michael@0: scale_2_pixels michael@0: scale_2_pixels michael@0: scale_2_pixels michael@0: scale_2_pixels michael@0: subs W, W, #8 michael@0: bge 1b michael@0: 2: michael@0: subs W, W, #(4 - 8 - prefetch_braking_distance) michael@0: blt 2f michael@0: 1: /* process the remaining pixels */ michael@0: scale_2_pixels michael@0: scale_2_pixels michael@0: subs W, W, #4 michael@0: bge 1b michael@0: 2: michael@0: tst W, #2 michael@0: beq 2f michael@0: scale_2_pixels michael@0: 2: michael@0: tst W, #1 michael@0: ldrne&t TMP1, [SRC, TMP1] michael@0: strne&t TMP1, [DST] michael@0: /* cleanup helper macro */ michael@0: .purgem scale_2_pixels michael@0: .unreq DST michael@0: .unreq SRC michael@0: .unreq W michael@0: .unreq VX michael@0: .unreq UNIT_X michael@0: .unreq TMP1 michael@0: .unreq TMP2 michael@0: .unreq VXMASK michael@0: .unreq PF_OFFS michael@0: .unreq SRC_WIDTH_FIXED michael@0: /* return */ michael@0: pop {r4, r5, r6, r7, r8, r10} michael@0: bx lr michael@0: .endfunc michael@0: .endm michael@0: michael@0: generate_nearest_scanline_func \ michael@0: pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 michael@0: michael@0: generate_nearest_scanline_func \ michael@0: pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32