|
1 /* |
|
2 * Copyright © 2008 Mozilla Corporation |
|
3 * Copyright © 2010 Nokia Corporation |
|
4 * |
|
5 * Permission to use, copy, modify, distribute, and sell this software and its |
|
6 * documentation for any purpose is hereby granted without fee, provided that |
|
7 * the above copyright notice appear in all copies and that both that |
|
8 * copyright notice and this permission notice appear in supporting |
|
9 * documentation, and that the name of Mozilla Corporation not be used in |
|
10 * advertising or publicity pertaining to distribution of the software without |
|
11 * specific, written prior permission. Mozilla Corporation makes no |
|
12 * representations about the suitability of this software for any purpose. It |
|
13 * is provided "as is" without express or implied warranty. |
|
14 * |
|
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS |
|
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN |
|
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING |
|
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
|
22 * SOFTWARE. |
|
23 * |
|
24 * Author: Jeff Muizelaar (jeff@infidigm.net) |
|
25 * |
|
26 */ |
|
27 |
|
28 /* Prevent the stack from becoming executable */ |
|
29 #if defined(__linux__) && defined(__ELF__) |
|
30 .section .note.GNU-stack,"",%progbits |
|
31 #endif |
|
32 |
|
33 .text |
|
34 .arch armv6 |
|
35 .object_arch armv4 |
|
36 .arm |
|
37 .altmacro |
|
38 .p2align 2 |
|
39 |
|
40 /* Supplementary macro for setting function attributes */ |
|
41 .macro pixman_asm_function fname |
|
42 .func fname |
|
43 .global fname |
|
44 #ifdef __ELF__ |
|
45 .hidden fname |
|
46 .type fname, %function |
|
47 #endif |
|
48 fname: |
|
49 .endm |
|
50 |
|
51 /* |
|
52 * Note: This code is only using armv5te instructions (not even armv6), |
|
53 * but is scheduled for ARM Cortex-A8 pipeline. So it might need to |
|
54 * be split into a few variants, tuned for each microarchitecture. |
|
55 * |
|
56 * TODO: In order to get good performance on ARM9/ARM11 cores (which don't |
|
57 * have efficient write combining), it needs to be changed to use 16-byte |
|
58 * aligned writes using STM instruction. |
|
59 * |
|
60 * Nearest scanline scaler macro template uses the following arguments: |
|
61 * fname - name of the function to generate |
|
62 * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes |
|
63 * t - type suffix for LDR/STR instructions |
|
64 * prefetch_distance - prefetch in the source image by that many |
|
65 * pixels ahead |
|
66 * prefetch_braking_distance - stop prefetching when that many pixels are |
|
67 * remaining before the end of scanline |
|
68 */ |
|
69 |
|
70 .macro generate_nearest_scanline_func fname, bpp_shift, t, \ |
|
71 prefetch_distance, \ |
|
72 prefetch_braking_distance |
|
73 |
|
74 pixman_asm_function fname |
|
75 W .req r0 |
|
76 DST .req r1 |
|
77 SRC .req r2 |
|
78 VX .req r3 |
|
79 UNIT_X .req ip |
|
80 TMP1 .req r4 |
|
81 TMP2 .req r5 |
|
82 VXMASK .req r6 |
|
83 PF_OFFS .req r7 |
|
84 SRC_WIDTH_FIXED .req r8 |
|
85 |
|
86 ldr UNIT_X, [sp] |
|
87 push {r4, r5, r6, r7, r8, r10} |
|
88 mvn VXMASK, #((1 << bpp_shift) - 1) |
|
89 ldr SRC_WIDTH_FIXED, [sp, #28] |
|
90 |
|
91 /* define helper macro */ |
|
92 .macro scale_2_pixels |
|
93 ldr&t TMP1, [SRC, TMP1] |
|
94 and TMP2, VXMASK, VX, asr #(16 - bpp_shift) |
|
95 adds VX, VX, UNIT_X |
|
96 str&t TMP1, [DST], #(1 << bpp_shift) |
|
97 9: subpls VX, VX, SRC_WIDTH_FIXED |
|
98 bpl 9b |
|
99 |
|
100 ldr&t TMP2, [SRC, TMP2] |
|
101 and TMP1, VXMASK, VX, asr #(16 - bpp_shift) |
|
102 adds VX, VX, UNIT_X |
|
103 str&t TMP2, [DST], #(1 << bpp_shift) |
|
104 9: subpls VX, VX, SRC_WIDTH_FIXED |
|
105 bpl 9b |
|
106 .endm |
|
107 |
|
108 /* now do the scaling */ |
|
109 and TMP1, VXMASK, VX, asr #(16 - bpp_shift) |
|
110 adds VX, VX, UNIT_X |
|
111 9: subpls VX, VX, SRC_WIDTH_FIXED |
|
112 bpl 9b |
|
113 subs W, W, #(8 + prefetch_braking_distance) |
|
114 blt 2f |
|
115 /* calculate prefetch offset */ |
|
116 mov PF_OFFS, #prefetch_distance |
|
117 mla PF_OFFS, UNIT_X, PF_OFFS, VX |
|
118 1: /* main loop, process 8 pixels per iteration with prefetch */ |
|
119 pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] |
|
120 add PF_OFFS, UNIT_X, lsl #3 |
|
121 scale_2_pixels |
|
122 scale_2_pixels |
|
123 scale_2_pixels |
|
124 scale_2_pixels |
|
125 subs W, W, #8 |
|
126 bge 1b |
|
127 2: |
|
128 subs W, W, #(4 - 8 - prefetch_braking_distance) |
|
129 blt 2f |
|
130 1: /* process the remaining pixels */ |
|
131 scale_2_pixels |
|
132 scale_2_pixels |
|
133 subs W, W, #4 |
|
134 bge 1b |
|
135 2: |
|
136 tst W, #2 |
|
137 beq 2f |
|
138 scale_2_pixels |
|
139 2: |
|
140 tst W, #1 |
|
141 ldrne&t TMP1, [SRC, TMP1] |
|
142 strne&t TMP1, [DST] |
|
143 /* cleanup helper macro */ |
|
144 .purgem scale_2_pixels |
|
145 .unreq DST |
|
146 .unreq SRC |
|
147 .unreq W |
|
148 .unreq VX |
|
149 .unreq UNIT_X |
|
150 .unreq TMP1 |
|
151 .unreq TMP2 |
|
152 .unreq VXMASK |
|
153 .unreq PF_OFFS |
|
154 .unreq SRC_WIDTH_FIXED |
|
155 /* return */ |
|
156 pop {r4, r5, r6, r7, r8, r10} |
|
157 bx lr |
|
158 .endfunc |
|
159 .endm |
|
160 |
|
161 generate_nearest_scanline_func \ |
|
162 pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 |
|
163 |
|
164 generate_nearest_scanline_func \ |
|
165 pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 |