gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * Copyright © 2011 SCore Corporation
michael@0 3 *
michael@0 4 * Permission is hereby granted, free of charge, to any person obtaining a
michael@0 5 * copy of this software and associated documentation files (the "Software"),
michael@0 6 * to deal in the Software without restriction, including without limitation
michael@0 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
michael@0 8 * and/or sell copies of the Software, and to permit persons to whom the
michael@0 9 * Software is furnished to do so, subject to the following conditions:
michael@0 10 *
michael@0 11 * The above copyright notice and this permission notice (including the next
michael@0 12 * paragraph) shall be included in all copies or substantial portions of the
michael@0 13 * Software.
michael@0 14 *
michael@0 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
michael@0 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
michael@0 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
michael@0 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
michael@0 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
michael@0 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
michael@0 21 * DEALINGS IN THE SOFTWARE.
michael@0 22 *
michael@0 23 * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
michael@0 24 * Author: Taekyun Kim (tkq.kim@samsung.com)
michael@0 25 */
michael@0 26
michael@0 27 /*
michael@0 28 * This file contains scaled bilinear scanline functions implemented
michael@0 29 * using older siarhei's bilinear macro template.
michael@0 30 *
michael@0 31 * << General scanline function procedures >>
michael@0 32 * 1. bilinear interpolate source pixels
michael@0 33 * 2. load mask pixels
michael@0 34 * 3. load destination pixels
michael@0 35 * 4. duplicate mask to fill whole register
michael@0 36 * 5. interleave source & destination pixels
michael@0 37 * 6. apply mask to source pixels
michael@0 38 * 7. combine source & destination pixels
michael@0 39 * 8, Deinterleave final result
michael@0 40 * 9. store destination pixels
michael@0 41 *
michael@0 42 * All registers with single number (i.e. src0, tmp0) are 64-bits registers.
michael@0 43 * Registers with double numbers(src01, dst01) are 128-bits registers.
michael@0 44 * All temp registers can be used freely outside the code block.
michael@0 45 * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks.
michael@0 46 *
michael@0 47 * Remarks
michael@0 48 * There can be lots of pipeline stalls inside code block and between code blocks.
michael@0 49 * Further optimizations will be done by new macro templates using head/tail_head/tail scheme.
michael@0 50 */
michael@0 51
michael@0 52 /* Prevent the stack from becoming executable for no reason... */
michael@0 53 #if defined(__linux__) && defined (__ELF__)
michael@0 54 .section .note.GNU-stack,"",%progbits
michael@0 55 #endif
michael@0 56
michael@0 57 .text
michael@0 58 .fpu neon
michael@0 59 .arch armv7a
michael@0 60 .object_arch armv4
michael@0 61 .eabi_attribute 10, 0
michael@0 62 .eabi_attribute 12, 0
michael@0 63 .arm
michael@0 64 .altmacro
michael@0 65 .p2align 2
michael@0 66
michael@0 67 #include "pixman-private.h"
michael@0 68 #include "pixman-arm-neon-asm.h"
michael@0 69
michael@0 70 /*
michael@0 71 * Bilinear macros from pixman-arm-neon-asm.S
michael@0 72 */
michael@0 73
michael@0 74 /* Supplementary macro for setting function attributes */
michael@0 75 .macro pixman_asm_function fname
michael@0 76 .func fname
michael@0 77 .global fname
michael@0 78 #ifdef __ELF__
michael@0 79 .hidden fname
michael@0 80 .type fname, %function
michael@0 81 #endif
michael@0 82 fname:
michael@0 83 .endm
michael@0 84
michael@0 85 /*
michael@0 86 * Bilinear scaling support code which tries to provide pixel fetching, color
michael@0 87 * format conversion, and interpolation as separate macros which can be used
michael@0 88 * as the basic building blocks for constructing bilinear scanline functions.
michael@0 89 */
michael@0 90
michael@0 91 .macro bilinear_load_8888 reg1, reg2, tmp
michael@0 92 mov TMP1, X, asr #16
michael@0 93 add X, X, UX
michael@0 94 add TMP1, TOP, TMP1, asl #2
michael@0 95 vld1.32 {reg1}, [TMP1], STRIDE
michael@0 96 vld1.32 {reg2}, [TMP1]
michael@0 97 .endm
michael@0 98
michael@0 99 .macro bilinear_load_0565 reg1, reg2, tmp
michael@0 100 mov TMP1, X, asr #16
michael@0 101 add X, X, UX
michael@0 102 add TMP1, TOP, TMP1, asl #1
michael@0 103 vld1.32 {reg2[0]}, [TMP1], STRIDE
michael@0 104 vld1.32 {reg2[1]}, [TMP1]
michael@0 105 convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
michael@0 106 .endm
michael@0 107
michael@0 108 .macro bilinear_load_and_vertical_interpolate_two_8888 \
michael@0 109 acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
michael@0 110
michael@0 111 bilinear_load_8888 reg1, reg2, tmp1
michael@0 112 vmull.u8 acc1, reg1, d28
michael@0 113 vmlal.u8 acc1, reg2, d29
michael@0 114 bilinear_load_8888 reg3, reg4, tmp2
michael@0 115 vmull.u8 acc2, reg3, d28
michael@0 116 vmlal.u8 acc2, reg4, d29
michael@0 117 .endm
michael@0 118
michael@0 119 .macro bilinear_load_and_vertical_interpolate_four_8888 \
michael@0 120 xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
michael@0 121 yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
michael@0 122
michael@0 123 bilinear_load_and_vertical_interpolate_two_8888 \
michael@0 124 xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
michael@0 125 bilinear_load_and_vertical_interpolate_two_8888 \
michael@0 126 yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
michael@0 127 .endm
michael@0 128
michael@0 129 .macro bilinear_load_and_vertical_interpolate_two_0565 \
michael@0 130 acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
michael@0 131
michael@0 132 mov TMP1, X, asr #16
michael@0 133 add X, X, UX
michael@0 134 add TMP1, TOP, TMP1, asl #1
michael@0 135 mov TMP2, X, asr #16
michael@0 136 add X, X, UX
michael@0 137 add TMP2, TOP, TMP2, asl #1
michael@0 138 vld1.32 {acc2lo[0]}, [TMP1], STRIDE
michael@0 139 vld1.32 {acc2hi[0]}, [TMP2], STRIDE
michael@0 140 vld1.32 {acc2lo[1]}, [TMP1]
michael@0 141 vld1.32 {acc2hi[1]}, [TMP2]
michael@0 142 convert_0565_to_x888 acc2, reg3, reg2, reg1
michael@0 143 vzip.u8 reg1, reg3
michael@0 144 vzip.u8 reg2, reg4
michael@0 145 vzip.u8 reg3, reg4
michael@0 146 vzip.u8 reg1, reg2
michael@0 147 vmull.u8 acc1, reg1, d28
michael@0 148 vmlal.u8 acc1, reg2, d29
michael@0 149 vmull.u8 acc2, reg3, d28
michael@0 150 vmlal.u8 acc2, reg4, d29
michael@0 151 .endm
michael@0 152
michael@0 153 .macro bilinear_load_and_vertical_interpolate_four_0565 \
michael@0 154 xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
michael@0 155 yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
michael@0 156
michael@0 157 mov TMP1, X, asr #16
michael@0 158 add X, X, UX
michael@0 159 add TMP1, TOP, TMP1, asl #1
michael@0 160 mov TMP2, X, asr #16
michael@0 161 add X, X, UX
michael@0 162 add TMP2, TOP, TMP2, asl #1
michael@0 163 vld1.32 {xacc2lo[0]}, [TMP1], STRIDE
michael@0 164 vld1.32 {xacc2hi[0]}, [TMP2], STRIDE
michael@0 165 vld1.32 {xacc2lo[1]}, [TMP1]
michael@0 166 vld1.32 {xacc2hi[1]}, [TMP2]
michael@0 167 convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
michael@0 168 mov TMP1, X, asr #16
michael@0 169 add X, X, UX
michael@0 170 add TMP1, TOP, TMP1, asl #1
michael@0 171 mov TMP2, X, asr #16
michael@0 172 add X, X, UX
michael@0 173 add TMP2, TOP, TMP2, asl #1
michael@0 174 vld1.32 {yacc2lo[0]}, [TMP1], STRIDE
michael@0 175 vzip.u8 xreg1, xreg3
michael@0 176 vld1.32 {yacc2hi[0]}, [TMP2], STRIDE
michael@0 177 vzip.u8 xreg2, xreg4
michael@0 178 vld1.32 {yacc2lo[1]}, [TMP1]
michael@0 179 vzip.u8 xreg3, xreg4
michael@0 180 vld1.32 {yacc2hi[1]}, [TMP2]
michael@0 181 vzip.u8 xreg1, xreg2
michael@0 182 convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
michael@0 183 vmull.u8 xacc1, xreg1, d28
michael@0 184 vzip.u8 yreg1, yreg3
michael@0 185 vmlal.u8 xacc1, xreg2, d29
michael@0 186 vzip.u8 yreg2, yreg4
michael@0 187 vmull.u8 xacc2, xreg3, d28
michael@0 188 vzip.u8 yreg3, yreg4
michael@0 189 vmlal.u8 xacc2, xreg4, d29
michael@0 190 vzip.u8 yreg1, yreg2
michael@0 191 vmull.u8 yacc1, yreg1, d28
michael@0 192 vmlal.u8 yacc1, yreg2, d29
michael@0 193 vmull.u8 yacc2, yreg3, d28
michael@0 194 vmlal.u8 yacc2, yreg4, d29
michael@0 195 .endm
michael@0 196
michael@0 197 .macro bilinear_store_8888 numpix, tmp1, tmp2
michael@0 198 .if numpix == 4
michael@0 199 vst1.32 {d0, d1}, [OUT]!
michael@0 200 .elseif numpix == 2
michael@0 201 vst1.32 {d0}, [OUT]!
michael@0 202 .elseif numpix == 1
michael@0 203 vst1.32 {d0[0]}, [OUT, :32]!
michael@0 204 .else
michael@0 205 .error bilinear_store_8888 numpix is unsupported
michael@0 206 .endif
michael@0 207 .endm
michael@0 208
michael@0 209 .macro bilinear_store_0565 numpix, tmp1, tmp2
michael@0 210 vuzp.u8 d0, d1
michael@0 211 vuzp.u8 d2, d3
michael@0 212 vuzp.u8 d1, d3
michael@0 213 vuzp.u8 d0, d2
michael@0 214 convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
michael@0 215 .if numpix == 4
michael@0 216 vst1.16 {d2}, [OUT]!
michael@0 217 .elseif numpix == 2
michael@0 218 vst1.32 {d2[0]}, [OUT]!
michael@0 219 .elseif numpix == 1
michael@0 220 vst1.16 {d2[0]}, [OUT]!
michael@0 221 .else
michael@0 222 .error bilinear_store_0565 numpix is unsupported
michael@0 223 .endif
michael@0 224 .endm
michael@0 225
michael@0 226
michael@0 227 /*
michael@0 228 * Macros for loading mask pixels into register 'mask'.
michael@0 229 * vdup must be done in somewhere else.
michael@0 230 */
michael@0 231 .macro bilinear_load_mask_x numpix, mask
michael@0 232 .endm
michael@0 233
michael@0 234 .macro bilinear_load_mask_8 numpix, mask
michael@0 235 .if numpix == 4
michael@0 236 vld1.32 {mask[0]}, [MASK]!
michael@0 237 .elseif numpix == 2
michael@0 238 vld1.16 {mask[0]}, [MASK]!
michael@0 239 .elseif numpix == 1
michael@0 240 vld1.8 {mask[0]}, [MASK]!
michael@0 241 .else
michael@0 242 .error bilinear_load_mask_8 numpix is unsupported
michael@0 243 .endif
michael@0 244 pld [MASK, #prefetch_offset]
michael@0 245 .endm
michael@0 246
michael@0 247 .macro bilinear_load_mask mask_fmt, numpix, mask
michael@0 248 bilinear_load_mask_&mask_fmt numpix, mask
michael@0 249 .endm
michael@0 250
michael@0 251
michael@0 252 /*
michael@0 253 * Macros for loading destination pixels into register 'dst0' and 'dst1'.
michael@0 254 * Interleave should be done somewhere else.
michael@0 255 */
michael@0 256 .macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01
michael@0 257 .endm
michael@0 258
michael@0 259 .macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01
michael@0 260 .endm
michael@0 261
michael@0 262 .macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01
michael@0 263 .if numpix == 4
michael@0 264 vld1.32 {dst0, dst1}, [OUT]
michael@0 265 .elseif numpix == 2
michael@0 266 vld1.32 {dst0}, [OUT]
michael@0 267 .elseif numpix == 1
michael@0 268 vld1.32 {dst0[0]}, [OUT]
michael@0 269 .else
michael@0 270 .error bilinear_load_dst_8888 numpix is unsupported
michael@0 271 .endif
michael@0 272 pld [OUT, #(prefetch_offset * 4)]
michael@0 273 .endm
michael@0 274
michael@0 275 .macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01
michael@0 276 bilinear_load_dst_8888 numpix, dst0, dst1, dst01
michael@0 277 .endm
michael@0 278
michael@0 279 .macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01
michael@0 280 bilinear_load_dst_8888 numpix, dst0, dst1, dst01
michael@0 281 .endm
michael@0 282
michael@0 283 .macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01
michael@0 284 bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01
michael@0 285 .endm
michael@0 286
michael@0 287 /*
michael@0 288 * Macros for duplicating partially loaded mask to fill entire register.
michael@0 289 * We will apply mask to interleaved source pixels, that is
michael@0 290 * (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3)
michael@0 291 * (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3)
michael@0 292 * So, we need to duplicate loaded mask into whole register.
michael@0 293 *
michael@0 294 * For two pixel case
michael@0 295 * (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
michael@0 296 * (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
michael@0 297 * We can do some optimizations for this including last pixel cases.
michael@0 298 */
michael@0 299 .macro bilinear_duplicate_mask_x numpix, mask
michael@0 300 .endm
michael@0 301
michael@0 302 .macro bilinear_duplicate_mask_8 numpix, mask
michael@0 303 .if numpix == 4
michael@0 304 vdup.32 mask, mask[0]
michael@0 305 .elseif numpix == 2
michael@0 306 vdup.16 mask, mask[0]
michael@0 307 .elseif numpix == 1
michael@0 308 vdup.8 mask, mask[0]
michael@0 309 .else
michael@0 310 .error bilinear_duplicate_mask_8 is unsupported
michael@0 311 .endif
michael@0 312 .endm
michael@0 313
michael@0 314 .macro bilinear_duplicate_mask mask_fmt, numpix, mask
michael@0 315 bilinear_duplicate_mask_&mask_fmt numpix, mask
michael@0 316 .endm
michael@0 317
michael@0 318 /*
michael@0 319 * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form.
michael@0 320 * Interleave should be done when maks is enabled or operator is 'over'.
michael@0 321 */
michael@0 322 .macro bilinear_interleave src0, src1, dst0, dst1
michael@0 323 vuzp.8 src0, src1
michael@0 324 vuzp.8 dst0, dst1
michael@0 325 vuzp.8 src0, src1
michael@0 326 vuzp.8 dst0, dst1
michael@0 327 .endm
michael@0 328
michael@0 329 .macro bilinear_interleave_src_dst_x_src \
michael@0 330 numpix, src0, src1, src01, dst0, dst1, dst01
michael@0 331 .endm
michael@0 332
michael@0 333 .macro bilinear_interleave_src_dst_x_over \
michael@0 334 numpix, src0, src1, src01, dst0, dst1, dst01
michael@0 335
michael@0 336 bilinear_interleave src0, src1, dst0, dst1
michael@0 337 .endm
michael@0 338
michael@0 339 .macro bilinear_interleave_src_dst_x_add \
michael@0 340 numpix, src0, src1, src01, dst0, dst1, dst01
michael@0 341 .endm
michael@0 342
michael@0 343 .macro bilinear_interleave_src_dst_8_src \
michael@0 344 numpix, src0, src1, src01, dst0, dst1, dst01
michael@0 345
michael@0 346 bilinear_interleave src0, src1, dst0, dst1
michael@0 347 .endm
michael@0 348
michael@0 349 .macro bilinear_interleave_src_dst_8_over \
michael@0 350 numpix, src0, src1, src01, dst0, dst1, dst01
michael@0 351
michael@0 352 bilinear_interleave src0, src1, dst0, dst1
michael@0 353 .endm
michael@0 354
michael@0 355 .macro bilinear_interleave_src_dst_8_add \
michael@0 356 numpix, src0, src1, src01, dst0, dst1, dst01
michael@0 357
michael@0 358 bilinear_interleave src0, src1, dst0, dst1
michael@0 359 .endm
michael@0 360
michael@0 361 .macro bilinear_interleave_src_dst \
michael@0 362 mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01
michael@0 363
michael@0 364 bilinear_interleave_src_dst_&mask_fmt&_&op \
michael@0 365 numpix, src0, src1, src01, dst0, dst1, dst01
michael@0 366 .endm
michael@0 367
michael@0 368
michael@0 369 /*
michael@0 370 * Macros for applying masks to src pixels. (see combine_mask_u() function)
michael@0 371 * src, dst should be in interleaved form.
michael@0 372 * mask register should be in form (m0, m1, m2, m3).
michael@0 373 */
michael@0 374 .macro bilinear_apply_mask_to_src_x \
michael@0 375 numpix, src0, src1, src01, mask, \
michael@0 376 tmp01, tmp23, tmp45, tmp67
michael@0 377 .endm
michael@0 378
michael@0 379 .macro bilinear_apply_mask_to_src_8 \
michael@0 380 numpix, src0, src1, src01, mask, \
michael@0 381 tmp01, tmp23, tmp45, tmp67
michael@0 382
michael@0 383 vmull.u8 tmp01, src0, mask
michael@0 384 vmull.u8 tmp23, src1, mask
michael@0 385 /* bubbles */
michael@0 386 vrshr.u16 tmp45, tmp01, #8
michael@0 387 vrshr.u16 tmp67, tmp23, #8
michael@0 388 /* bubbles */
michael@0 389 vraddhn.u16 src0, tmp45, tmp01
michael@0 390 vraddhn.u16 src1, tmp67, tmp23
michael@0 391 .endm
michael@0 392
michael@0 393 .macro bilinear_apply_mask_to_src \
michael@0 394 mask_fmt, numpix, src0, src1, src01, mask, \
michael@0 395 tmp01, tmp23, tmp45, tmp67
michael@0 396
michael@0 397 bilinear_apply_mask_to_src_&mask_fmt \
michael@0 398 numpix, src0, src1, src01, mask, \
michael@0 399 tmp01, tmp23, tmp45, tmp67
michael@0 400 .endm
michael@0 401
michael@0 402
michael@0 403 /*
michael@0 404 * Macros for combining src and destination pixels.
michael@0 405 * Interleave or not is depending on operator 'op'.
michael@0 406 */
michael@0 407 .macro bilinear_combine_src \
michael@0 408 numpix, src0, src1, src01, dst0, dst1, dst01, \
michael@0 409 tmp01, tmp23, tmp45, tmp67, tmp8
michael@0 410 .endm
michael@0 411
michael@0 412 .macro bilinear_combine_over \
michael@0 413 numpix, src0, src1, src01, dst0, dst1, dst01, \
michael@0 414 tmp01, tmp23, tmp45, tmp67, tmp8
michael@0 415
michael@0 416 vdup.32 tmp8, src1[1]
michael@0 417 /* bubbles */
michael@0 418 vmvn.8 tmp8, tmp8
michael@0 419 /* bubbles */
michael@0 420 vmull.u8 tmp01, dst0, tmp8
michael@0 421 /* bubbles */
michael@0 422 vmull.u8 tmp23, dst1, tmp8
michael@0 423 /* bubbles */
michael@0 424 vrshr.u16 tmp45, tmp01, #8
michael@0 425 vrshr.u16 tmp67, tmp23, #8
michael@0 426 /* bubbles */
michael@0 427 vraddhn.u16 dst0, tmp45, tmp01
michael@0 428 vraddhn.u16 dst1, tmp67, tmp23
michael@0 429 /* bubbles */
michael@0 430 vqadd.u8 src01, dst01, src01
michael@0 431 .endm
michael@0 432
michael@0 433 .macro bilinear_combine_add \
michael@0 434 numpix, src0, src1, src01, dst0, dst1, dst01, \
michael@0 435 tmp01, tmp23, tmp45, tmp67, tmp8
michael@0 436
michael@0 437 vqadd.u8 src01, dst01, src01
michael@0 438 .endm
michael@0 439
michael@0 440 .macro bilinear_combine \
michael@0 441 op, numpix, src0, src1, src01, dst0, dst1, dst01, \
michael@0 442 tmp01, tmp23, tmp45, tmp67, tmp8
michael@0 443
michael@0 444 bilinear_combine_&op \
michael@0 445 numpix, src0, src1, src01, dst0, dst1, dst01, \
michael@0 446 tmp01, tmp23, tmp45, tmp67, tmp8
michael@0 447 .endm
michael@0 448
michael@0 449 /*
michael@0 450 * Macros for final deinterleaving of destination pixels if needed.
michael@0 451 */
michael@0 452 .macro bilinear_deinterleave numpix, dst0, dst1, dst01
michael@0 453 vuzp.8 dst0, dst1
michael@0 454 /* bubbles */
michael@0 455 vuzp.8 dst0, dst1
michael@0 456 .endm
michael@0 457
michael@0 458 .macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01
michael@0 459 .endm
michael@0 460
michael@0 461 .macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01
michael@0 462 bilinear_deinterleave numpix, dst0, dst1, dst01
michael@0 463 .endm
michael@0 464
michael@0 465 .macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01
michael@0 466 .endm
michael@0 467
michael@0 468 .macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01
michael@0 469 bilinear_deinterleave numpix, dst0, dst1, dst01
michael@0 470 .endm
michael@0 471
michael@0 472 .macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01
michael@0 473 bilinear_deinterleave numpix, dst0, dst1, dst01
michael@0 474 .endm
michael@0 475
michael@0 476 .macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01
michael@0 477 bilinear_deinterleave numpix, dst0, dst1, dst01
michael@0 478 .endm
michael@0 479
michael@0 480 .macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01
michael@0 481 bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01
michael@0 482 .endm
michael@0 483
michael@0 484
michael@0 485 .macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op
michael@0 486 bilinear_load_&src_fmt d0, d1, d2
michael@0 487 bilinear_load_mask mask_fmt, 1, d4
michael@0 488 bilinear_load_dst dst_fmt, op, 1, d18, d19, q9
michael@0 489 vmull.u8 q1, d0, d28
michael@0 490 vmlal.u8 q1, d1, d29
michael@0 491 /* 5 cycles bubble */
michael@0 492 vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
michael@0 493 vmlsl.u16 q0, d2, d30
michael@0 494 vmlal.u16 q0, d3, d30
michael@0 495 /* 5 cycles bubble */
michael@0 496 bilinear_duplicate_mask mask_fmt, 1, d4
michael@0 497 vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 498 /* 3 cycles bubble */
michael@0 499 vmovn.u16 d0, q0
michael@0 500 /* 1 cycle bubble */
michael@0 501 bilinear_interleave_src_dst \
michael@0 502 mask_fmt, op, 1, d0, d1, q0, d18, d19, q9
michael@0 503 bilinear_apply_mask_to_src \
michael@0 504 mask_fmt, 1, d0, d1, q0, d4, \
michael@0 505 q3, q8, q10, q11
michael@0 506 bilinear_combine \
michael@0 507 op, 1, d0, d1, q0, d18, d19, q9, \
michael@0 508 q3, q8, q10, q11, d5
michael@0 509 bilinear_deinterleave_dst mask_fmt, op, 1, d0, d1, q0
michael@0 510 bilinear_store_&dst_fmt 1, q2, q3
michael@0 511 .endm
michael@0 512
michael@0 513 .macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op
michael@0 514 bilinear_load_and_vertical_interpolate_two_&src_fmt \
michael@0 515 q1, q11, d0, d1, d20, d21, d22, d23
michael@0 516 bilinear_load_mask mask_fmt, 2, d4
michael@0 517 bilinear_load_dst dst_fmt, op, 2, d18, d19, q9
michael@0 518 vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
michael@0 519 vmlsl.u16 q0, d2, d30
michael@0 520 vmlal.u16 q0, d3, d30
michael@0 521 vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
michael@0 522 vmlsl.u16 q10, d22, d31
michael@0 523 vmlal.u16 q10, d23, d31
michael@0 524 vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 525 vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 526 bilinear_duplicate_mask mask_fmt, 2, d4
michael@0 527 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 528 vadd.u16 q12, q12, q13
michael@0 529 vmovn.u16 d0, q0
michael@0 530 bilinear_interleave_src_dst \
michael@0 531 mask_fmt, op, 2, d0, d1, q0, d18, d19, q9
michael@0 532 bilinear_apply_mask_to_src \
michael@0 533 mask_fmt, 2, d0, d1, q0, d4, \
michael@0 534 q3, q8, q10, q11
michael@0 535 bilinear_combine \
michael@0 536 op, 2, d0, d1, q0, d18, d19, q9, \
michael@0 537 q3, q8, q10, q11, d5
michael@0 538 bilinear_deinterleave_dst mask_fmt, op, 2, d0, d1, q0
michael@0 539 bilinear_store_&dst_fmt 2, q2, q3
michael@0 540 .endm
michael@0 541
michael@0 542 .macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op
michael@0 543 bilinear_load_and_vertical_interpolate_four_&src_fmt \
michael@0 544 q1, q11, d0, d1, d20, d21, d22, d23 \
michael@0 545 q3, q9, d4, d5, d16, d17, d18, d19
michael@0 546 pld [TMP1, PF_OFFS]
michael@0 547 sub TMP1, TMP1, STRIDE
michael@0 548 vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
michael@0 549 vmlsl.u16 q0, d2, d30
michael@0 550 vmlal.u16 q0, d3, d30
michael@0 551 vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
michael@0 552 vmlsl.u16 q10, d22, d31
michael@0 553 vmlal.u16 q10, d23, d31
michael@0 554 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 555 vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS
michael@0 556 vmlsl.u16 q2, d6, d30
michael@0 557 vmlal.u16 q2, d7, d30
michael@0 558 vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS
michael@0 559 bilinear_load_mask mask_fmt, 4, d22
michael@0 560 bilinear_load_dst dst_fmt, op, 4, d2, d3, q1
michael@0 561 pld [TMP1, PF_OFFS]
michael@0 562 vmlsl.u16 q8, d18, d31
michael@0 563 vmlal.u16 q8, d19, d31
michael@0 564 vadd.u16 q12, q12, q13
michael@0 565 vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 566 vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 567 vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 568 vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 569 bilinear_duplicate_mask mask_fmt, 4, d22
michael@0 570 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 571 vmovn.u16 d0, q0
michael@0 572 vmovn.u16 d1, q2
michael@0 573 vadd.u16 q12, q12, q13
michael@0 574 bilinear_interleave_src_dst \
michael@0 575 mask_fmt, op, 4, d0, d1, q0, d2, d3, q1
michael@0 576 bilinear_apply_mask_to_src \
michael@0 577 mask_fmt, 4, d0, d1, q0, d22, \
michael@0 578 q3, q8, q9, q10
michael@0 579 bilinear_combine \
michael@0 580 op, 4, d0, d1, q0, d2, d3, q1, \
michael@0 581 q3, q8, q9, q10, d23
michael@0 582 bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0
michael@0 583 bilinear_store_&dst_fmt 4, q2, q3
michael@0 584 .endm
michael@0 585
michael@0 586 .set BILINEAR_FLAG_USE_MASK, 1
michael@0 587 .set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
michael@0 588
michael@0 589 /*
michael@0 590 * Main template macro for generating NEON optimized bilinear scanline functions.
michael@0 591 *
michael@0 592 * Bilinear scanline generator macro take folling arguments:
michael@0 593 * fname - name of the function to generate
michael@0 594 * src_fmt - source color format (8888 or 0565)
michael@0 595 * dst_fmt - destination color format (8888 or 0565)
michael@0 596 * src/dst_bpp_shift - (1 << bpp_shift) is the size of src/dst pixel in bytes
michael@0 597 * process_last_pixel - code block that interpolate one pixel and does not
michael@0 598 * update horizontal weight
michael@0 599 * process_two_pixels - code block that interpolate two pixels and update
michael@0 600 * horizontal weight
michael@0 601 * process_four_pixels - code block that interpolate four pixels and update
michael@0 602 * horizontal weight
michael@0 603 * process_pixblock_head - head part of middle loop
michael@0 604 * process_pixblock_tail - tail part of middle loop
michael@0 605 * process_pixblock_tail_head - tail_head of middle loop
michael@0 606 * pixblock_size - number of pixels processed in a single middle loop
michael@0 607 * prefetch_distance - prefetch in the source image by that many pixels ahead
michael@0 608 */
michael@0 609
michael@0 610 .macro generate_bilinear_scanline_func \
michael@0 611 fname, \
michael@0 612 src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \
michael@0 613 bilinear_process_last_pixel, \
michael@0 614 bilinear_process_two_pixels, \
michael@0 615 bilinear_process_four_pixels, \
michael@0 616 bilinear_process_pixblock_head, \
michael@0 617 bilinear_process_pixblock_tail, \
michael@0 618 bilinear_process_pixblock_tail_head, \
michael@0 619 pixblock_size, \
michael@0 620 prefetch_distance, \
michael@0 621 flags
michael@0 622
michael@0 623 pixman_asm_function fname
michael@0 624 .if pixblock_size == 8
michael@0 625 .elseif pixblock_size == 4
michael@0 626 .else
michael@0 627 .error unsupported pixblock size
michael@0 628 .endif
michael@0 629
michael@0 630 .if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
michael@0 631 OUT .req r0
michael@0 632 TOP .req r1
michael@0 633 BOTTOM .req r2
michael@0 634 WT .req r3
michael@0 635 WB .req r4
michael@0 636 X .req r5
michael@0 637 UX .req r6
michael@0 638 WIDTH .req ip
michael@0 639 TMP1 .req r3
michael@0 640 TMP2 .req r4
michael@0 641 PF_OFFS .req r7
michael@0 642 TMP3 .req r8
michael@0 643 TMP4 .req r9
michael@0 644 STRIDE .req r2
michael@0 645
michael@0 646 mov ip, sp
michael@0 647 push {r4, r5, r6, r7, r8, r9}
michael@0 648 mov PF_OFFS, #prefetch_distance
michael@0 649 ldmia ip, {WB, X, UX, WIDTH}
michael@0 650 .else
michael@0 651 OUT .req r0
michael@0 652 MASK .req r1
michael@0 653 TOP .req r2
michael@0 654 BOTTOM .req r3
michael@0 655 WT .req r4
michael@0 656 WB .req r5
michael@0 657 X .req r6
michael@0 658 UX .req r7
michael@0 659 WIDTH .req ip
michael@0 660 TMP1 .req r4
michael@0 661 TMP2 .req r5
michael@0 662 PF_OFFS .req r8
michael@0 663 TMP3 .req r9
michael@0 664 TMP4 .req r10
michael@0 665 STRIDE .req r3
michael@0 666
michael@0 667 .set prefetch_offset, prefetch_distance
michael@0 668
michael@0 669 mov ip, sp
michael@0 670 push {r4, r5, r6, r7, r8, r9, r10, ip}
michael@0 671 mov PF_OFFS, #prefetch_distance
michael@0 672 ldmia ip, {WT, WB, X, UX, WIDTH}
michael@0 673 .endif
michael@0 674
michael@0 675 mul PF_OFFS, PF_OFFS, UX
michael@0 676
michael@0 677 .if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
michael@0 678 vpush {d8-d15}
michael@0 679 .endif
michael@0 680
michael@0 681 sub STRIDE, BOTTOM, TOP
michael@0 682 .unreq BOTTOM
michael@0 683
michael@0 684 cmp WIDTH, #0
michael@0 685 ble 3f
michael@0 686
michael@0 687 vdup.u16 q12, X
michael@0 688 vdup.u16 q13, UX
michael@0 689 vdup.u8 d28, WT
michael@0 690 vdup.u8 d29, WB
michael@0 691 vadd.u16 d25, d25, d26
michael@0 692
michael@0 693 /* ensure good destination alignment */
michael@0 694 cmp WIDTH, #1
michael@0 695 blt 0f
michael@0 696 tst OUT, #(1 << dst_bpp_shift)
michael@0 697 beq 0f
michael@0 698 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 699 vadd.u16 q12, q12, q13
michael@0 700 bilinear_process_last_pixel
michael@0 701 sub WIDTH, WIDTH, #1
michael@0 702 0:
michael@0 703 vadd.u16 q13, q13, q13
michael@0 704 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 705 vadd.u16 q12, q12, q13
michael@0 706
michael@0 707 cmp WIDTH, #2
michael@0 708 blt 0f
michael@0 709 tst OUT, #(1 << (dst_bpp_shift + 1))
michael@0 710 beq 0f
michael@0 711 bilinear_process_two_pixels
michael@0 712 sub WIDTH, WIDTH, #2
michael@0 713 0:
michael@0 714 .if pixblock_size == 8
michael@0 715 cmp WIDTH, #4
michael@0 716 blt 0f
michael@0 717 tst OUT, #(1 << (dst_bpp_shift + 2))
michael@0 718 beq 0f
michael@0 719 bilinear_process_four_pixels
michael@0 720 sub WIDTH, WIDTH, #4
michael@0 721 0:
michael@0 722 .endif
michael@0 723 subs WIDTH, WIDTH, #pixblock_size
michael@0 724 blt 1f
michael@0 725 mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
michael@0 726 bilinear_process_pixblock_head
michael@0 727 subs WIDTH, WIDTH, #pixblock_size
michael@0 728 blt 5f
michael@0 729 0:
michael@0 730 bilinear_process_pixblock_tail_head
michael@0 731 subs WIDTH, WIDTH, #pixblock_size
michael@0 732 bge 0b
michael@0 733 5:
michael@0 734 bilinear_process_pixblock_tail
michael@0 735 1:
michael@0 736 .if pixblock_size == 8
michael@0 737 tst WIDTH, #4
michael@0 738 beq 2f
michael@0 739 bilinear_process_four_pixels
michael@0 740 2:
michael@0 741 .endif
michael@0 742 /* handle the remaining trailing pixels */
michael@0 743 tst WIDTH, #2
michael@0 744 beq 2f
michael@0 745 bilinear_process_two_pixels
michael@0 746 2:
michael@0 747 tst WIDTH, #1
michael@0 748 beq 3f
michael@0 749 bilinear_process_last_pixel
michael@0 750 3:
michael@0 751 .if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
michael@0 752 vpop {d8-d15}
michael@0 753 .endif
michael@0 754
michael@0 755 .if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
michael@0 756 pop {r4, r5, r6, r7, r8, r9}
michael@0 757 .else
michael@0 758 pop {r4, r5, r6, r7, r8, r9, r10, ip}
michael@0 759 .endif
michael@0 760 bx lr
michael@0 761
michael@0 762 .unreq OUT
michael@0 763 .unreq TOP
michael@0 764 .unreq WT
michael@0 765 .unreq WB
michael@0 766 .unreq X
michael@0 767 .unreq UX
michael@0 768 .unreq WIDTH
michael@0 769 .unreq TMP1
michael@0 770 .unreq TMP2
michael@0 771 .unreq PF_OFFS
michael@0 772 .unreq TMP3
michael@0 773 .unreq TMP4
michael@0 774 .unreq STRIDE
michael@0 775 .if ((flags) & BILINEAR_FLAG_USE_MASK) != 0
michael@0 776 .unreq MASK
michael@0 777 .endif
michael@0 778
michael@0 779 .endfunc
michael@0 780
michael@0 781 .endm
michael@0 782
michael@0 783 /* src_8888_8_8888 */
michael@0 784 .macro bilinear_src_8888_8_8888_process_last_pixel
michael@0 785 bilinear_interpolate_last_pixel 8888, 8, 8888, src
michael@0 786 .endm
michael@0 787
michael@0 788 .macro bilinear_src_8888_8_8888_process_two_pixels
michael@0 789 bilinear_interpolate_two_pixels 8888, 8, 8888, src
michael@0 790 .endm
michael@0 791
michael@0 792 .macro bilinear_src_8888_8_8888_process_four_pixels
michael@0 793 bilinear_interpolate_four_pixels 8888, 8, 8888, src
michael@0 794 .endm
michael@0 795
michael@0 796 .macro bilinear_src_8888_8_8888_process_pixblock_head
michael@0 797 bilinear_src_8888_8_8888_process_four_pixels
michael@0 798 .endm
michael@0 799
michael@0 800 .macro bilinear_src_8888_8_8888_process_pixblock_tail
michael@0 801 .endm
michael@0 802
michael@0 803 .macro bilinear_src_8888_8_8888_process_pixblock_tail_head
michael@0 804 bilinear_src_8888_8_8888_process_pixblock_tail
michael@0 805 bilinear_src_8888_8_8888_process_pixblock_head
michael@0 806 .endm
michael@0 807
michael@0 808 /* src_8888_8_0565 */
michael@0 809 .macro bilinear_src_8888_8_0565_process_last_pixel
michael@0 810 bilinear_interpolate_last_pixel 8888, 8, 0565, src
michael@0 811 .endm
michael@0 812
michael@0 813 .macro bilinear_src_8888_8_0565_process_two_pixels
michael@0 814 bilinear_interpolate_two_pixels 8888, 8, 0565, src
michael@0 815 .endm
michael@0 816
michael@0 817 .macro bilinear_src_8888_8_0565_process_four_pixels
michael@0 818 bilinear_interpolate_four_pixels 8888, 8, 0565, src
michael@0 819 .endm
michael@0 820
michael@0 821 .macro bilinear_src_8888_8_0565_process_pixblock_head
michael@0 822 bilinear_src_8888_8_0565_process_four_pixels
michael@0 823 .endm
michael@0 824
michael@0 825 .macro bilinear_src_8888_8_0565_process_pixblock_tail
michael@0 826 .endm
michael@0 827
michael@0 828 .macro bilinear_src_8888_8_0565_process_pixblock_tail_head
michael@0 829 bilinear_src_8888_8_0565_process_pixblock_tail
michael@0 830 bilinear_src_8888_8_0565_process_pixblock_head
michael@0 831 .endm
michael@0 832
michael@0 833 /* src_0565_8_x888 */
michael@0 834 .macro bilinear_src_0565_8_x888_process_last_pixel
michael@0 835 bilinear_interpolate_last_pixel 0565, 8, 8888, src
michael@0 836 .endm
michael@0 837
michael@0 838 .macro bilinear_src_0565_8_x888_process_two_pixels
michael@0 839 bilinear_interpolate_two_pixels 0565, 8, 8888, src
michael@0 840 .endm
michael@0 841
michael@0 842 .macro bilinear_src_0565_8_x888_process_four_pixels
michael@0 843 bilinear_interpolate_four_pixels 0565, 8, 8888, src
michael@0 844 .endm
michael@0 845
michael@0 846 .macro bilinear_src_0565_8_x888_process_pixblock_head
michael@0 847 bilinear_src_0565_8_x888_process_four_pixels
michael@0 848 .endm
michael@0 849
michael@0 850 .macro bilinear_src_0565_8_x888_process_pixblock_tail
michael@0 851 .endm
michael@0 852
michael@0 853 .macro bilinear_src_0565_8_x888_process_pixblock_tail_head
michael@0 854 bilinear_src_0565_8_x888_process_pixblock_tail
michael@0 855 bilinear_src_0565_8_x888_process_pixblock_head
michael@0 856 .endm
michael@0 857
michael@0 858 /* src_0565_8_0565 */
michael@0 859 .macro bilinear_src_0565_8_0565_process_last_pixel
michael@0 860 bilinear_interpolate_last_pixel 0565, 8, 0565, src
michael@0 861 .endm
michael@0 862
michael@0 863 .macro bilinear_src_0565_8_0565_process_two_pixels
michael@0 864 bilinear_interpolate_two_pixels 0565, 8, 0565, src
michael@0 865 .endm
michael@0 866
michael@0 867 .macro bilinear_src_0565_8_0565_process_four_pixels
michael@0 868 bilinear_interpolate_four_pixels 0565, 8, 0565, src
michael@0 869 .endm
michael@0 870
michael@0 871 .macro bilinear_src_0565_8_0565_process_pixblock_head
michael@0 872 bilinear_src_0565_8_0565_process_four_pixels
michael@0 873 .endm
michael@0 874
michael@0 875 .macro bilinear_src_0565_8_0565_process_pixblock_tail
michael@0 876 .endm
michael@0 877
michael@0 878 .macro bilinear_src_0565_8_0565_process_pixblock_tail_head
michael@0 879 bilinear_src_0565_8_0565_process_pixblock_tail
michael@0 880 bilinear_src_0565_8_0565_process_pixblock_head
michael@0 881 .endm
michael@0 882
michael@0 883 /* over_8888_8888 */
michael@0 884 .macro bilinear_over_8888_8888_process_last_pixel
michael@0 885 bilinear_interpolate_last_pixel 8888, x, 8888, over
michael@0 886 .endm
michael@0 887
michael@0 888 .macro bilinear_over_8888_8888_process_two_pixels
michael@0 889 bilinear_interpolate_two_pixels 8888, x, 8888, over
michael@0 890 .endm
michael@0 891
michael@0 892 .macro bilinear_over_8888_8888_process_four_pixels
michael@0 893 bilinear_interpolate_four_pixels 8888, x, 8888, over
michael@0 894 .endm
michael@0 895
michael@0 896 .macro bilinear_over_8888_8888_process_pixblock_head
michael@0 897 mov TMP1, X, asr #16
michael@0 898 add X, X, UX
michael@0 899 add TMP1, TOP, TMP1, asl #2
michael@0 900 mov TMP2, X, asr #16
michael@0 901 add X, X, UX
michael@0 902 add TMP2, TOP, TMP2, asl #2
michael@0 903
michael@0 904 vld1.32 {d22}, [TMP1], STRIDE
michael@0 905 vld1.32 {d23}, [TMP1]
michael@0 906 mov TMP3, X, asr #16
michael@0 907 add X, X, UX
michael@0 908 add TMP3, TOP, TMP3, asl #2
michael@0 909 vmull.u8 q8, d22, d28
michael@0 910 vmlal.u8 q8, d23, d29
michael@0 911
michael@0 912 vld1.32 {d22}, [TMP2], STRIDE
michael@0 913 vld1.32 {d23}, [TMP2]
michael@0 914 mov TMP4, X, asr #16
michael@0 915 add X, X, UX
michael@0 916 add TMP4, TOP, TMP4, asl #2
michael@0 917 vmull.u8 q9, d22, d28
michael@0 918 vmlal.u8 q9, d23, d29
michael@0 919
michael@0 920 vld1.32 {d22}, [TMP3], STRIDE
michael@0 921 vld1.32 {d23}, [TMP3]
michael@0 922 vmull.u8 q10, d22, d28
michael@0 923 vmlal.u8 q10, d23, d29
michael@0 924
michael@0 925 vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS
michael@0 926 vmlsl.u16 q0, d16, d30
michael@0 927 vmlal.u16 q0, d17, d30
michael@0 928
michael@0 929 pld [TMP4, PF_OFFS]
michael@0 930 vld1.32 {d16}, [TMP4], STRIDE
michael@0 931 vld1.32 {d17}, [TMP4]
michael@0 932 pld [TMP4, PF_OFFS]
michael@0 933 vmull.u8 q11, d16, d28
michael@0 934 vmlal.u8 q11, d17, d29
michael@0 935
michael@0 936 vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
michael@0 937 vmlsl.u16 q1, d18, d31
michael@0 938 vmlal.u16 q1, d19, d31
michael@0 939 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 940 vadd.u16 q12, q12, q13
michael@0 941 .endm
michael@0 942
michael@0 943 .macro bilinear_over_8888_8888_process_pixblock_tail
michael@0 944 vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
michael@0 945 vmlsl.u16 q2, d20, d30
michael@0 946 vmlal.u16 q2, d21, d30
michael@0 947 vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
michael@0 948 vmlsl.u16 q3, d22, d31
michael@0 949 vmlal.u16 q3, d23, d31
michael@0 950 vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 951 vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 952 vld1.32 {d2, d3}, [OUT, :128]
michael@0 953 pld [OUT, #(prefetch_offset * 4)]
michael@0 954 vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 955 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 956 vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 957 vmovn.u16 d6, q0
michael@0 958 vmovn.u16 d7, q2
michael@0 959 vuzp.8 d6, d7
michael@0 960 vuzp.8 d2, d3
michael@0 961 vuzp.8 d6, d7
michael@0 962 vuzp.8 d2, d3
michael@0 963 vdup.32 d4, d7[1]
michael@0 964 vmvn.8 d4, d4
michael@0 965 vmull.u8 q11, d2, d4
michael@0 966 vmull.u8 q2, d3, d4
michael@0 967 vrshr.u16 q1, q11, #8
michael@0 968 vrshr.u16 q10, q2, #8
michael@0 969 vraddhn.u16 d2, q1, q11
michael@0 970 vraddhn.u16 d3, q10, q2
michael@0 971 vqadd.u8 q3, q1, q3
michael@0 972 vuzp.8 d6, d7
michael@0 973 vuzp.8 d6, d7
michael@0 974 vadd.u16 q12, q12, q13
michael@0 975 vst1.32 {d6, d7}, [OUT, :128]!
michael@0 976 .endm
michael@0 977
michael@0 978 .macro bilinear_over_8888_8888_process_pixblock_tail_head
michael@0 979 vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS
michael@0 980 mov TMP1, X, asr #16
michael@0 981 add X, X, UX
michael@0 982 add TMP1, TOP, TMP1, asl #2
michael@0 983 vmlsl.u16 q2, d20, d30
michael@0 984 mov TMP2, X, asr #16
michael@0 985 add X, X, UX
michael@0 986 add TMP2, TOP, TMP2, asl #2
michael@0 987 vmlal.u16 q2, d21, d30
michael@0 988 vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS
michael@0 989 vld1.32 {d20}, [TMP1], STRIDE
michael@0 990 vmlsl.u16 q3, d22, d31
michael@0 991 vmlal.u16 q3, d23, d31
michael@0 992 vld1.32 {d21}, [TMP1]
michael@0 993 vmull.u8 q8, d20, d28
michael@0 994 vmlal.u8 q8, d21, d29
michael@0 995 vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 996 vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 997 vld1.32 {d2, d3}, [OUT, :128]
michael@0 998 pld [OUT, PF_OFFS]
michael@0 999 vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1000 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 1001 vld1.32 {d22}, [TMP2], STRIDE
michael@0 1002 vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1003 vmovn.u16 d6, q0
michael@0 1004 vld1.32 {d23}, [TMP2]
michael@0 1005 vmull.u8 q9, d22, d28
michael@0 1006 mov TMP3, X, asr #16
michael@0 1007 add X, X, UX
michael@0 1008 add TMP3, TOP, TMP3, asl #2
michael@0 1009 mov TMP4, X, asr #16
michael@0 1010 add X, X, UX
michael@0 1011 add TMP4, TOP, TMP4, asl #2
michael@0 1012 vmlal.u8 q9, d23, d29
michael@0 1013 vmovn.u16 d7, q2
michael@0 1014 vld1.32 {d22}, [TMP3], STRIDE
michael@0 1015 vuzp.8 d6, d7
michael@0 1016 vuzp.8 d2, d3
michael@0 1017 vuzp.8 d6, d7
michael@0 1018 vuzp.8 d2, d3
michael@0 1019 vdup.32 d4, d7[1]
michael@0 1020 vld1.32 {d23}, [TMP3]
michael@0 1021 vmvn.8 d4, d4
michael@0 1022 vmull.u8 q10, d22, d28
michael@0 1023 vmlal.u8 q10, d23, d29
michael@0 1024 vmull.u8 q11, d2, d4
michael@0 1025 vmull.u8 q2, d3, d4
michael@0 1026 vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS
michael@0 1027 vmlsl.u16 q0, d16, d30
michael@0 1028 vrshr.u16 q1, q11, #8
michael@0 1029 vmlal.u16 q0, d17, d30
michael@0 1030 vrshr.u16 q8, q2, #8
michael@0 1031 vraddhn.u16 d2, q1, q11
michael@0 1032 vraddhn.u16 d3, q8, q2
michael@0 1033 pld [TMP4, PF_OFFS]
michael@0 1034 vld1.32 {d16}, [TMP4], STRIDE
michael@0 1035 vqadd.u8 q3, q1, q3
michael@0 1036 vld1.32 {d17}, [TMP4]
michael@0 1037 pld [TMP4, PF_OFFS]
michael@0 1038 vmull.u8 q11, d16, d28
michael@0 1039 vmlal.u8 q11, d17, d29
michael@0 1040 vuzp.8 d6, d7
michael@0 1041 vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS
michael@0 1042 vuzp.8 d6, d7
michael@0 1043 vmlsl.u16 q1, d18, d31
michael@0 1044 vadd.u16 q12, q12, q13
michael@0 1045 vmlal.u16 q1, d19, d31
michael@0 1046 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 1047 vadd.u16 q12, q12, q13
michael@0 1048 vst1.32 {d6, d7}, [OUT, :128]!
michael@0 1049 .endm
michael@0 1050
michael@0 1051 /* over_8888_8_8888 */
michael@0 1052 .macro bilinear_over_8888_8_8888_process_last_pixel
michael@0 1053 bilinear_interpolate_last_pixel 8888, 8, 8888, over
michael@0 1054 .endm
michael@0 1055
michael@0 1056 .macro bilinear_over_8888_8_8888_process_two_pixels
michael@0 1057 bilinear_interpolate_two_pixels 8888, 8, 8888, over
michael@0 1058 .endm
michael@0 1059
michael@0 1060 .macro bilinear_over_8888_8_8888_process_four_pixels
michael@0 1061 bilinear_interpolate_four_pixels 8888, 8, 8888, over
michael@0 1062 .endm
michael@0 1063
michael@0 1064 .macro bilinear_over_8888_8_8888_process_pixblock_head
michael@0 1065 mov TMP1, X, asr #16
michael@0 1066 add X, X, UX
michael@0 1067 add TMP1, TOP, TMP1, asl #2
michael@0 1068 vld1.32 {d0}, [TMP1], STRIDE
michael@0 1069 mov TMP2, X, asr #16
michael@0 1070 add X, X, UX
michael@0 1071 add TMP2, TOP, TMP2, asl #2
michael@0 1072 vld1.32 {d1}, [TMP1]
michael@0 1073 mov TMP3, X, asr #16
michael@0 1074 add X, X, UX
michael@0 1075 add TMP3, TOP, TMP3, asl #2
michael@0 1076 vld1.32 {d2}, [TMP2], STRIDE
michael@0 1077 mov TMP4, X, asr #16
michael@0 1078 add X, X, UX
michael@0 1079 add TMP4, TOP, TMP4, asl #2
michael@0 1080 vld1.32 {d3}, [TMP2]
michael@0 1081 vmull.u8 q2, d0, d28
michael@0 1082 vmull.u8 q3, d2, d28
michael@0 1083 vmlal.u8 q2, d1, d29
michael@0 1084 vmlal.u8 q3, d3, d29
michael@0 1085 vshll.u16 q0, d4, #BILINEAR_INTERPOLATION_BITS
michael@0 1086 vshll.u16 q1, d6, #BILINEAR_INTERPOLATION_BITS
michael@0 1087 vmlsl.u16 q0, d4, d30
michael@0 1088 vmlsl.u16 q1, d6, d31
michael@0 1089 vmlal.u16 q0, d5, d30
michael@0 1090 vmlal.u16 q1, d7, d31
michael@0 1091 vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1092 vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1093 vld1.32 {d2}, [TMP3], STRIDE
michael@0 1094 vld1.32 {d3}, [TMP3]
michael@0 1095 pld [TMP4, PF_OFFS]
michael@0 1096 vld1.32 {d4}, [TMP4], STRIDE
michael@0 1097 vld1.32 {d5}, [TMP4]
michael@0 1098 pld [TMP4, PF_OFFS]
michael@0 1099 vmull.u8 q3, d2, d28
michael@0 1100 vmlal.u8 q3, d3, d29
michael@0 1101 vmull.u8 q1, d4, d28
michael@0 1102 vmlal.u8 q1, d5, d29
michael@0 1103 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 1104 vld1.32 {d22[0]}, [MASK]!
michael@0 1105 pld [MASK, #prefetch_offset]
michael@0 1106 vadd.u16 q12, q12, q13
michael@0 1107 vmovn.u16 d16, q0
michael@0 1108 .endm
michael@0 1109
michael@0 1110 .macro bilinear_over_8888_8_8888_process_pixblock_tail
michael@0 1111 vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS
michael@0 1112 vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS
michael@0 1113 vmlsl.u16 q9, d6, d30
michael@0 1114 vmlsl.u16 q10, d2, d31
michael@0 1115 vmlal.u16 q9, d7, d30
michael@0 1116 vmlal.u16 q10, d3, d31
michael@0 1117 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 1118 vadd.u16 q12, q12, q13
michael@0 1119 vdup.32 d22, d22[0]
michael@0 1120 vshrn.u32 d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1121 vshrn.u32 d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1122 vmovn.u16 d17, q9
michael@0 1123 vld1.32 {d18, d19}, [OUT, :128]
michael@0 1124 pld [OUT, PF_OFFS]
michael@0 1125 vuzp.8 d16, d17
michael@0 1126 vuzp.8 d18, d19
michael@0 1127 vuzp.8 d16, d17
michael@0 1128 vuzp.8 d18, d19
michael@0 1129 vmull.u8 q10, d16, d22
michael@0 1130 vmull.u8 q11, d17, d22
michael@0 1131 vrsra.u16 q10, q10, #8
michael@0 1132 vrsra.u16 q11, q11, #8
michael@0 1133 vrshrn.u16 d16, q10, #8
michael@0 1134 vrshrn.u16 d17, q11, #8
michael@0 1135 vdup.32 d22, d17[1]
michael@0 1136 vmvn.8 d22, d22
michael@0 1137 vmull.u8 q10, d18, d22
michael@0 1138 vmull.u8 q11, d19, d22
michael@0 1139 vrshr.u16 q9, q10, #8
michael@0 1140 vrshr.u16 q0, q11, #8
michael@0 1141 vraddhn.u16 d18, q9, q10
michael@0 1142 vraddhn.u16 d19, q0, q11
michael@0 1143 vqadd.u8 q9, q8, q9
michael@0 1144 vuzp.8 d18, d19
michael@0 1145 vuzp.8 d18, d19
michael@0 1146 vst1.32 {d18, d19}, [OUT, :128]!
michael@0 1147 .endm
michael@0 1148
michael@0 1149 .macro bilinear_over_8888_8_8888_process_pixblock_tail_head
michael@0 1150 vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS
michael@0 1151 mov TMP1, X, asr #16
michael@0 1152 add X, X, UX
michael@0 1153 add TMP1, TOP, TMP1, asl #2
michael@0 1154 vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS
michael@0 1155 vld1.32 {d0}, [TMP1], STRIDE
michael@0 1156 mov TMP2, X, asr #16
michael@0 1157 add X, X, UX
michael@0 1158 add TMP2, TOP, TMP2, asl #2
michael@0 1159 vmlsl.u16 q9, d6, d30
michael@0 1160 vmlsl.u16 q10, d2, d31
michael@0 1161 vld1.32 {d1}, [TMP1]
michael@0 1162 mov TMP3, X, asr #16
michael@0 1163 add X, X, UX
michael@0 1164 add TMP3, TOP, TMP3, asl #2
michael@0 1165 vmlal.u16 q9, d7, d30
michael@0 1166 vmlal.u16 q10, d3, d31
michael@0 1167 vld1.32 {d2}, [TMP2], STRIDE
michael@0 1168 mov TMP4, X, asr #16
michael@0 1169 add X, X, UX
michael@0 1170 add TMP4, TOP, TMP4, asl #2
michael@0 1171 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 1172 vadd.u16 q12, q12, q13
michael@0 1173 vld1.32 {d3}, [TMP2]
michael@0 1174 vdup.32 d22, d22[0]
michael@0 1175 vshrn.u32 d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1176 vshrn.u32 d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1177 vmull.u8 q2, d0, d28
michael@0 1178 vmull.u8 q3, d2, d28
michael@0 1179 vmovn.u16 d17, q9
michael@0 1180 vld1.32 {d18, d19}, [OUT, :128]
michael@0 1181 pld [OUT, #(prefetch_offset * 4)]
michael@0 1182 vmlal.u8 q2, d1, d29
michael@0 1183 vmlal.u8 q3, d3, d29
michael@0 1184 vuzp.8 d16, d17
michael@0 1185 vuzp.8 d18, d19
michael@0 1186 vshll.u16 q0, d4, #BILINEAR_INTERPOLATION_BITS
michael@0 1187 vshll.u16 q1, d6, #BILINEAR_INTERPOLATION_BITS
michael@0 1188 vuzp.8 d16, d17
michael@0 1189 vuzp.8 d18, d19
michael@0 1190 vmlsl.u16 q0, d4, d30
michael@0 1191 vmlsl.u16 q1, d6, d31
michael@0 1192 vmull.u8 q10, d16, d22
michael@0 1193 vmull.u8 q11, d17, d22
michael@0 1194 vmlal.u16 q0, d5, d30
michael@0 1195 vmlal.u16 q1, d7, d31
michael@0 1196 vrsra.u16 q10, q10, #8
michael@0 1197 vrsra.u16 q11, q11, #8
michael@0 1198 vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1199 vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
michael@0 1200 vrshrn.u16 d16, q10, #8
michael@0 1201 vrshrn.u16 d17, q11, #8
michael@0 1202 vld1.32 {d2}, [TMP3], STRIDE
michael@0 1203 vdup.32 d22, d17[1]
michael@0 1204 vld1.32 {d3}, [TMP3]
michael@0 1205 vmvn.8 d22, d22
michael@0 1206 pld [TMP4, PF_OFFS]
michael@0 1207 vld1.32 {d4}, [TMP4], STRIDE
michael@0 1208 vmull.u8 q10, d18, d22
michael@0 1209 vmull.u8 q11, d19, d22
michael@0 1210 vld1.32 {d5}, [TMP4]
michael@0 1211 pld [TMP4, PF_OFFS]
michael@0 1212 vmull.u8 q3, d2, d28
michael@0 1213 vrshr.u16 q9, q10, #8
michael@0 1214 vrshr.u16 q15, q11, #8
michael@0 1215 vmlal.u8 q3, d3, d29
michael@0 1216 vmull.u8 q1, d4, d28
michael@0 1217 vraddhn.u16 d18, q9, q10
michael@0 1218 vraddhn.u16 d19, q15, q11
michael@0 1219 vmlal.u8 q1, d5, d29
michael@0 1220 vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
michael@0 1221 vqadd.u8 q9, q8, q9
michael@0 1222 vld1.32 {d22[0]}, [MASK]!
michael@0 1223 vuzp.8 d18, d19
michael@0 1224 vadd.u16 q12, q12, q13
michael@0 1225 vuzp.8 d18, d19
michael@0 1226 vmovn.u16 d16, q0
michael@0 1227 vst1.32 {d18, d19}, [OUT, :128]!
michael@0 1228 .endm
michael@0 1229
michael@0 1230 /* add_8888_8888 */
michael@0 1231 .macro bilinear_add_8888_8888_process_last_pixel
michael@0 1232 bilinear_interpolate_last_pixel 8888, x, 8888, add
michael@0 1233 .endm
michael@0 1234
michael@0 1235 .macro bilinear_add_8888_8888_process_two_pixels
michael@0 1236 bilinear_interpolate_two_pixels 8888, x, 8888, add
michael@0 1237 .endm
michael@0 1238
michael@0 1239 .macro bilinear_add_8888_8888_process_four_pixels
michael@0 1240 bilinear_interpolate_four_pixels 8888, x, 8888, add
michael@0 1241 .endm
michael@0 1242
michael@0 1243 .macro bilinear_add_8888_8888_process_pixblock_head
michael@0 1244 bilinear_add_8888_8888_process_four_pixels
michael@0 1245 .endm
michael@0 1246
michael@0 1247 .macro bilinear_add_8888_8888_process_pixblock_tail
michael@0 1248 .endm
michael@0 1249
michael@0 1250 .macro bilinear_add_8888_8888_process_pixblock_tail_head
michael@0 1251 bilinear_add_8888_8888_process_pixblock_tail
michael@0 1252 bilinear_add_8888_8888_process_pixblock_head
michael@0 1253 .endm
michael@0 1254
michael@0 1255 /* add_8888_8_8888 */
michael@0 1256 .macro bilinear_add_8888_8_8888_process_last_pixel
michael@0 1257 bilinear_interpolate_last_pixel 8888, 8, 8888, add
michael@0 1258 .endm
michael@0 1259
michael@0 1260 .macro bilinear_add_8888_8_8888_process_two_pixels
michael@0 1261 bilinear_interpolate_two_pixels 8888, 8, 8888, add
michael@0 1262 .endm
michael@0 1263
michael@0 1264 .macro bilinear_add_8888_8_8888_process_four_pixels
michael@0 1265 bilinear_interpolate_four_pixels 8888, 8, 8888, add
michael@0 1266 .endm
michael@0 1267
michael@0 1268 .macro bilinear_add_8888_8_8888_process_pixblock_head
michael@0 1269 bilinear_add_8888_8_8888_process_four_pixels
michael@0 1270 .endm
michael@0 1271
michael@0 1272 .macro bilinear_add_8888_8_8888_process_pixblock_tail
michael@0 1273 .endm
michael@0 1274
michael@0 1275 .macro bilinear_add_8888_8_8888_process_pixblock_tail_head
michael@0 1276 bilinear_add_8888_8_8888_process_pixblock_tail
michael@0 1277 bilinear_add_8888_8_8888_process_pixblock_head
michael@0 1278 .endm
michael@0 1279
michael@0 1280
michael@0 1281 /* Bilinear scanline functions */
michael@0 1282 generate_bilinear_scanline_func \
michael@0 1283 pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \
michael@0 1284 8888, 8888, 2, 2, \
michael@0 1285 bilinear_src_8888_8_8888_process_last_pixel, \
michael@0 1286 bilinear_src_8888_8_8888_process_two_pixels, \
michael@0 1287 bilinear_src_8888_8_8888_process_four_pixels, \
michael@0 1288 bilinear_src_8888_8_8888_process_pixblock_head, \
michael@0 1289 bilinear_src_8888_8_8888_process_pixblock_tail, \
michael@0 1290 bilinear_src_8888_8_8888_process_pixblock_tail_head, \
michael@0 1291 4, 28, BILINEAR_FLAG_USE_MASK
michael@0 1292
michael@0 1293 generate_bilinear_scanline_func \
michael@0 1294 pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \
michael@0 1295 8888, 0565, 2, 1, \
michael@0 1296 bilinear_src_8888_8_0565_process_last_pixel, \
michael@0 1297 bilinear_src_8888_8_0565_process_two_pixels, \
michael@0 1298 bilinear_src_8888_8_0565_process_four_pixels, \
michael@0 1299 bilinear_src_8888_8_0565_process_pixblock_head, \
michael@0 1300 bilinear_src_8888_8_0565_process_pixblock_tail, \
michael@0 1301 bilinear_src_8888_8_0565_process_pixblock_tail_head, \
michael@0 1302 4, 28, BILINEAR_FLAG_USE_MASK
michael@0 1303
michael@0 1304 generate_bilinear_scanline_func \
michael@0 1305 pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \
michael@0 1306 0565, 8888, 1, 2, \
michael@0 1307 bilinear_src_0565_8_x888_process_last_pixel, \
michael@0 1308 bilinear_src_0565_8_x888_process_two_pixels, \
michael@0 1309 bilinear_src_0565_8_x888_process_four_pixels, \
michael@0 1310 bilinear_src_0565_8_x888_process_pixblock_head, \
michael@0 1311 bilinear_src_0565_8_x888_process_pixblock_tail, \
michael@0 1312 bilinear_src_0565_8_x888_process_pixblock_tail_head, \
michael@0 1313 4, 28, BILINEAR_FLAG_USE_MASK
michael@0 1314
michael@0 1315 generate_bilinear_scanline_func \
michael@0 1316 pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \
michael@0 1317 0565, 0565, 1, 1, \
michael@0 1318 bilinear_src_0565_8_0565_process_last_pixel, \
michael@0 1319 bilinear_src_0565_8_0565_process_two_pixels, \
michael@0 1320 bilinear_src_0565_8_0565_process_four_pixels, \
michael@0 1321 bilinear_src_0565_8_0565_process_pixblock_head, \
michael@0 1322 bilinear_src_0565_8_0565_process_pixblock_tail, \
michael@0 1323 bilinear_src_0565_8_0565_process_pixblock_tail_head, \
michael@0 1324 4, 28, BILINEAR_FLAG_USE_MASK
michael@0 1325
michael@0 1326 generate_bilinear_scanline_func \
michael@0 1327 pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \
michael@0 1328 8888, 8888, 2, 2, \
michael@0 1329 bilinear_over_8888_8888_process_last_pixel, \
michael@0 1330 bilinear_over_8888_8888_process_two_pixels, \
michael@0 1331 bilinear_over_8888_8888_process_four_pixels, \
michael@0 1332 bilinear_over_8888_8888_process_pixblock_head, \
michael@0 1333 bilinear_over_8888_8888_process_pixblock_tail, \
michael@0 1334 bilinear_over_8888_8888_process_pixblock_tail_head, \
michael@0 1335 4, 28, 0
michael@0 1336
michael@0 1337 generate_bilinear_scanline_func \
michael@0 1338 pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \
michael@0 1339 8888, 8888, 2, 2, \
michael@0 1340 bilinear_over_8888_8_8888_process_last_pixel, \
michael@0 1341 bilinear_over_8888_8_8888_process_two_pixels, \
michael@0 1342 bilinear_over_8888_8_8888_process_four_pixels, \
michael@0 1343 bilinear_over_8888_8_8888_process_pixblock_head, \
michael@0 1344 bilinear_over_8888_8_8888_process_pixblock_tail, \
michael@0 1345 bilinear_over_8888_8_8888_process_pixblock_tail_head, \
michael@0 1346 4, 28, BILINEAR_FLAG_USE_MASK
michael@0 1347
michael@0 1348 generate_bilinear_scanline_func \
michael@0 1349 pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \
michael@0 1350 8888, 8888, 2, 2, \
michael@0 1351 bilinear_add_8888_8888_process_last_pixel, \
michael@0 1352 bilinear_add_8888_8888_process_two_pixels, \
michael@0 1353 bilinear_add_8888_8888_process_four_pixels, \
michael@0 1354 bilinear_add_8888_8888_process_pixblock_head, \
michael@0 1355 bilinear_add_8888_8888_process_pixblock_tail, \
michael@0 1356 bilinear_add_8888_8888_process_pixblock_tail_head, \
michael@0 1357 4, 28, 0
michael@0 1358
michael@0 1359 generate_bilinear_scanline_func \
michael@0 1360 pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
michael@0 1361 8888, 8888, 2, 2, \
michael@0 1362 bilinear_add_8888_8_8888_process_last_pixel, \
michael@0 1363 bilinear_add_8888_8_8888_process_two_pixels, \
michael@0 1364 bilinear_add_8888_8_8888_process_four_pixels, \
michael@0 1365 bilinear_add_8888_8_8888_process_pixblock_head, \
michael@0 1366 bilinear_add_8888_8_8888_process_pixblock_tail, \
michael@0 1367 bilinear_add_8888_8_8888_process_pixblock_tail_head, \
michael@0 1368 4, 28, BILINEAR_FLAG_USE_MASK

mercurial