|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 |
|
12 EXPORT |vp8_memcpy_partial_neon| |
|
13 |
|
14 ARM |
|
15 REQUIRE8 |
|
16 PRESERVE8 |
|
17 |
|
18 AREA ||.text||, CODE, READONLY, ALIGN=2 |
|
19 ;========================================= |
|
20 ;this is not a full memcpy function!!! |
|
21 ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr, |
|
22 ; int sz); |
|
23 |vp8_memcpy_partial_neon| PROC |
|
24 ;pld [r1] ;preload pred data |
|
25 ;pld [r1, #128] |
|
26 ;pld [r1, #256] |
|
27 ;pld [r1, #384] |
|
28 |
|
29 mov r12, r2, lsr #8 ;copy 256 bytes data at one time |
|
30 |
|
31 memcpy_neon_loop |
|
32 vld1.8 {q0, q1}, [r1]! ;load src data |
|
33 subs r12, r12, #1 |
|
34 vld1.8 {q2, q3}, [r1]! |
|
35 vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr |
|
36 vld1.8 {q4, q5}, [r1]! |
|
37 vst1.8 {q2, q3}, [r0]! |
|
38 vld1.8 {q6, q7}, [r1]! |
|
39 vst1.8 {q4, q5}, [r0]! |
|
40 vld1.8 {q8, q9}, [r1]! |
|
41 vst1.8 {q6, q7}, [r0]! |
|
42 vld1.8 {q10, q11}, [r1]! |
|
43 vst1.8 {q8, q9}, [r0]! |
|
44 vld1.8 {q12, q13}, [r1]! |
|
45 vst1.8 {q10, q11}, [r0]! |
|
46 vld1.8 {q14, q15}, [r1]! |
|
47 vst1.8 {q12, q13}, [r0]! |
|
48 vst1.8 {q14, q15}, [r0]! |
|
49 |
|
50 ;pld [r1] ;preload pred data -- need to adjust for real device |
|
51 ;pld [r1, #128] |
|
52 ;pld [r1, #256] |
|
53 ;pld [r1, #384] |
|
54 |
|
55 bne memcpy_neon_loop |
|
56 |
|
57 ands r3, r2, #0xff ;extra copy |
|
58 beq done_copy_neon_loop |
|
59 |
|
60 extra_copy_neon_loop |
|
61 vld1.8 {q0}, [r1]! ;load src data |
|
62 subs r3, r3, #16 |
|
63 vst1.8 {q0}, [r0]! |
|
64 bne extra_copy_neon_loop |
|
65 |
|
66 done_copy_neon_loop |
|
67 bx lr |
|
68 ENDP |
|
69 |
|
70 END |