michael@0: ; michael@0: ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. michael@0: ; michael@0: ; Use of this source code is governed by a BSD-style license michael@0: ; that can be found in the LICENSE file in the root of the source michael@0: ; tree. An additional intellectual property rights grant can be found michael@0: ; in the file PATENTS. All contributing project authors may michael@0: ; be found in the AUTHORS file in the root of the source tree. michael@0: ; michael@0: michael@0: michael@0: EXPORT |vp8_memcpy_partial_neon| michael@0: michael@0: ARM michael@0: REQUIRE8 michael@0: PRESERVE8 michael@0: michael@0: AREA ||.text||, CODE, READONLY, ALIGN=2 michael@0: ;========================================= michael@0: ;this is not a full memcpy function!!! michael@0: ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr, michael@0: ; int sz); michael@0: |vp8_memcpy_partial_neon| PROC michael@0: ;pld [r1] ;preload pred data michael@0: ;pld [r1, #128] michael@0: ;pld [r1, #256] michael@0: ;pld [r1, #384] michael@0: michael@0: mov r12, r2, lsr #8 ;copy 256 bytes data at one time michael@0: michael@0: memcpy_neon_loop michael@0: vld1.8 {q0, q1}, [r1]! ;load src data michael@0: subs r12, r12, #1 michael@0: vld1.8 {q2, q3}, [r1]! michael@0: vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr michael@0: vld1.8 {q4, q5}, [r1]! michael@0: vst1.8 {q2, q3}, [r0]! michael@0: vld1.8 {q6, q7}, [r1]! michael@0: vst1.8 {q4, q5}, [r0]! michael@0: vld1.8 {q8, q9}, [r1]! michael@0: vst1.8 {q6, q7}, [r0]! michael@0: vld1.8 {q10, q11}, [r1]! michael@0: vst1.8 {q8, q9}, [r0]! michael@0: vld1.8 {q12, q13}, [r1]! michael@0: vst1.8 {q10, q11}, [r0]! michael@0: vld1.8 {q14, q15}, [r1]! michael@0: vst1.8 {q12, q13}, [r0]! michael@0: vst1.8 {q14, q15}, [r0]! michael@0: michael@0: ;pld [r1] ;preload pred data -- need to adjust for real device michael@0: ;pld [r1, #128] michael@0: ;pld [r1, #256] michael@0: ;pld [r1, #384] michael@0: michael@0: bne memcpy_neon_loop michael@0: michael@0: ands r3, r2, #0xff ;extra copy michael@0: beq done_copy_neon_loop michael@0: michael@0: extra_copy_neon_loop michael@0: vld1.8 {q0}, [r1]! ;load src data michael@0: subs r3, r3, #16 michael@0: vst1.8 {q0}, [r0]! michael@0: bne extra_copy_neon_loop michael@0: michael@0: done_copy_neon_loop michael@0: bx lr michael@0: ENDP michael@0: michael@0: END