Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | ; |
michael@0 | 2 | ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
michael@0 | 3 | ; |
michael@0 | 4 | ; Use of this source code is governed by a BSD-style license |
michael@0 | 5 | ; that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | ; tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | ; in the file PATENTS. All contributing project authors may |
michael@0 | 8 | ; be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | ; |
michael@0 | 10 | |
michael@0 | 11 | |
michael@0 | 12 | EXPORT |vp8_memcpy_partial_neon| |
michael@0 | 13 | |
michael@0 | 14 | ARM |
michael@0 | 15 | REQUIRE8 |
michael@0 | 16 | PRESERVE8 |
michael@0 | 17 | |
michael@0 | 18 | AREA ||.text||, CODE, READONLY, ALIGN=2 |
michael@0 | 19 | ;========================================= |
michael@0 | 20 | ;this is not a full memcpy function!!! |
michael@0 | 21 | ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr, |
michael@0 | 22 | ; int sz); |
michael@0 | 23 | |vp8_memcpy_partial_neon| PROC |
michael@0 | 24 | ;pld [r1] ;preload pred data |
michael@0 | 25 | ;pld [r1, #128] |
michael@0 | 26 | ;pld [r1, #256] |
michael@0 | 27 | ;pld [r1, #384] |
michael@0 | 28 | |
michael@0 | 29 | mov r12, r2, lsr #8 ;copy 256 bytes data at one time |
michael@0 | 30 | |
michael@0 | 31 | memcpy_neon_loop |
michael@0 | 32 | vld1.8 {q0, q1}, [r1]! ;load src data |
michael@0 | 33 | subs r12, r12, #1 |
michael@0 | 34 | vld1.8 {q2, q3}, [r1]! |
michael@0 | 35 | vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr |
michael@0 | 36 | vld1.8 {q4, q5}, [r1]! |
michael@0 | 37 | vst1.8 {q2, q3}, [r0]! |
michael@0 | 38 | vld1.8 {q6, q7}, [r1]! |
michael@0 | 39 | vst1.8 {q4, q5}, [r0]! |
michael@0 | 40 | vld1.8 {q8, q9}, [r1]! |
michael@0 | 41 | vst1.8 {q6, q7}, [r0]! |
michael@0 | 42 | vld1.8 {q10, q11}, [r1]! |
michael@0 | 43 | vst1.8 {q8, q9}, [r0]! |
michael@0 | 44 | vld1.8 {q12, q13}, [r1]! |
michael@0 | 45 | vst1.8 {q10, q11}, [r0]! |
michael@0 | 46 | vld1.8 {q14, q15}, [r1]! |
michael@0 | 47 | vst1.8 {q12, q13}, [r0]! |
michael@0 | 48 | vst1.8 {q14, q15}, [r0]! |
michael@0 | 49 | |
michael@0 | 50 | ;pld [r1] ;preload pred data -- need to adjust for real device |
michael@0 | 51 | ;pld [r1, #128] |
michael@0 | 52 | ;pld [r1, #256] |
michael@0 | 53 | ;pld [r1, #384] |
michael@0 | 54 | |
michael@0 | 55 | bne memcpy_neon_loop |
michael@0 | 56 | |
michael@0 | 57 | ands r3, r2, #0xff ;extra copy |
michael@0 | 58 | beq done_copy_neon_loop |
michael@0 | 59 | |
michael@0 | 60 | extra_copy_neon_loop |
michael@0 | 61 | vld1.8 {q0}, [r1]! ;load src data |
michael@0 | 62 | subs r3, r3, #16 |
michael@0 | 63 | vst1.8 {q0}, [r0]! |
michael@0 | 64 | bne extra_copy_neon_loop |
michael@0 | 65 | |
michael@0 | 66 | done_copy_neon_loop |
michael@0 | 67 | bx lr |
michael@0 | 68 | ENDP |
michael@0 | 69 | |
michael@0 | 70 | END |