1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp9/common/arm/neon/vp9_copy_neon.asm Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,84 @@ 1.4 +; 1.5 +; Copyright (c) 2013 The WebM project authors. All Rights Reserved. 1.6 +; 1.7 +; Use of this source code is governed by a BSD-style license 1.8 +; that can be found in the LICENSE file in the root of the source 1.9 +; tree. An additional intellectual property rights grant can be found 1.10 +; in the file PATENTS. All contributing project authors may 1.11 +; be found in the AUTHORS file in the root of the source tree. 1.12 +; 1.13 + 1.14 + EXPORT |vp9_convolve_copy_neon| 1.15 + ARM 1.16 + REQUIRE8 1.17 + PRESERVE8 1.18 + 1.19 + AREA ||.text||, CODE, READONLY, ALIGN=2 1.20 + 1.21 +|vp9_convolve_copy_neon| PROC 1.22 + push {r4-r5, lr} 1.23 + ldrd r4, r5, [sp, #28] 1.24 + 1.25 + cmp r4, #32 1.26 + bgt copy64 1.27 + beq copy32 1.28 + cmp r4, #8 1.29 + bgt copy16 1.30 + beq copy8 1.31 + b copy4 1.32 + 1.33 +copy64 1.34 + sub lr, r1, #32 1.35 + sub r3, r3, #32 1.36 +copy64_h 1.37 + pld [r0, r1, lsl #1] 1.38 + vld1.8 {q0-q1}, [r0]! 1.39 + vld1.8 {q2-q3}, [r0], lr 1.40 + vst1.8 {q0-q1}, [r2@128]! 1.41 + vst1.8 {q2-q3}, [r2@128], r3 1.42 + subs r5, r5, #1 1.43 + bgt copy64_h 1.44 + pop {r4-r5, pc} 1.45 + 1.46 +copy32 1.47 + pld [r0, r1, lsl #1] 1.48 + vld1.8 {q0-q1}, [r0], r1 1.49 + pld [r0, r1, lsl #1] 1.50 + vld1.8 {q2-q3}, [r0], r1 1.51 + vst1.8 {q0-q1}, [r2@128], r3 1.52 + vst1.8 {q2-q3}, [r2@128], r3 1.53 + subs r5, r5, #2 1.54 + bgt copy32 1.55 + pop {r4-r5, pc} 1.56 + 1.57 +copy16 1.58 + pld [r0, r1, lsl #1] 1.59 + vld1.8 {q0}, [r0], r1 1.60 + pld [r0, r1, lsl #1] 1.61 + vld1.8 {q1}, [r0], r1 1.62 + vst1.8 {q0}, [r2@128], r3 1.63 + vst1.8 {q1}, [r2@128], r3 1.64 + subs r5, r5, #2 1.65 + bgt copy16 1.66 + pop {r4-r5, pc} 1.67 + 1.68 +copy8 1.69 + pld [r0, r1, lsl #1] 1.70 + vld1.8 {d0}, [r0], r1 1.71 + pld [r0, r1, lsl #1] 1.72 + vld1.8 {d2}, [r0], r1 1.73 + vst1.8 {d0}, [r2@64], r3 1.74 + vst1.8 {d2}, [r2@64], r3 1.75 + subs r5, r5, #2 1.76 + bgt copy8 1.77 + pop {r4-r5, pc} 1.78 + 1.79 +copy4 1.80 + ldr r12, [r0], r1 1.81 + str r12, [r2], r3 1.82 + subs r5, r5, #1 1.83 + bgt copy4 1.84 + pop {r4-r5, pc} 1.85 + ENDP 1.86 + 1.87 + END