1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,70 @@ 1.4 +; 1.5 +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 +; 1.7 +; Use of this source code is governed by a BSD-style license 1.8 +; that can be found in the LICENSE file in the root of the source 1.9 +; tree. An additional intellectual property rights grant can be found 1.10 +; in the file PATENTS. All contributing project authors may 1.11 +; be found in the AUTHORS file in the root of the source tree. 1.12 +; 1.13 + 1.14 + 1.15 + EXPORT |vp8_memcpy_partial_neon| 1.16 + 1.17 + ARM 1.18 + REQUIRE8 1.19 + PRESERVE8 1.20 + 1.21 + AREA ||.text||, CODE, READONLY, ALIGN=2 1.22 +;========================================= 1.23 +;this is not a full memcpy function!!! 1.24 +;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr, 1.25 +; int sz); 1.26 +|vp8_memcpy_partial_neon| PROC 1.27 + ;pld [r1] ;preload pred data 1.28 + ;pld [r1, #128] 1.29 + ;pld [r1, #256] 1.30 + ;pld [r1, #384] 1.31 + 1.32 + mov r12, r2, lsr #8 ;copy 256 bytes data at one time 1.33 + 1.34 +memcpy_neon_loop 1.35 + vld1.8 {q0, q1}, [r1]! ;load src data 1.36 + subs r12, r12, #1 1.37 + vld1.8 {q2, q3}, [r1]! 1.38 + vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr 1.39 + vld1.8 {q4, q5}, [r1]! 1.40 + vst1.8 {q2, q3}, [r0]! 1.41 + vld1.8 {q6, q7}, [r1]! 1.42 + vst1.8 {q4, q5}, [r0]! 1.43 + vld1.8 {q8, q9}, [r1]! 1.44 + vst1.8 {q6, q7}, [r0]! 1.45 + vld1.8 {q10, q11}, [r1]! 1.46 + vst1.8 {q8, q9}, [r0]! 1.47 + vld1.8 {q12, q13}, [r1]! 1.48 + vst1.8 {q10, q11}, [r0]! 1.49 + vld1.8 {q14, q15}, [r1]! 1.50 + vst1.8 {q12, q13}, [r0]! 1.51 + vst1.8 {q14, q15}, [r0]! 1.52 + 1.53 + ;pld [r1] ;preload pred data -- need to adjust for real device 1.54 + ;pld [r1, #128] 1.55 + ;pld [r1, #256] 1.56 + ;pld [r1, #384] 1.57 + 1.58 + bne memcpy_neon_loop 1.59 + 1.60 + ands r3, r2, #0xff ;extra copy 1.61 + beq done_copy_neon_loop 1.62 + 1.63 +extra_copy_neon_loop 1.64 + vld1.8 {q0}, [r1]! ;load src data 1.65 + subs r3, r3, #16 1.66 + vst1.8 {q0}, [r0]! 1.67 + bne extra_copy_neon_loop 1.68 + 1.69 +done_copy_neon_loop 1.70 + bx lr 1.71 + ENDP 1.72 + 1.73 + END