media/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,70 @@
     1.4 +;
     1.5 +;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     1.6 +;
     1.7 +;  Use of this source code is governed by a BSD-style license
     1.8 +;  that can be found in the LICENSE file in the root of the source
     1.9 +;  tree. An additional intellectual property rights grant can be found
    1.10 +;  in the file PATENTS.  All contributing project authors may
    1.11 +;  be found in the AUTHORS file in the root of the source tree.
    1.12 +;
    1.13 +
    1.14 +
    1.15 +    EXPORT |vp8_memcpy_partial_neon|
    1.16 +
    1.17 +    ARM
    1.18 +    REQUIRE8
    1.19 +    PRESERVE8
    1.20 +
    1.21 +    AREA ||.text||, CODE, READONLY, ALIGN=2
    1.22 +;=========================================
    1.23 +;this is not a full memcpy function!!!
    1.24 +;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
    1.25 +;                             int sz);
    1.26 +|vp8_memcpy_partial_neon| PROC
    1.27 +    ;pld                [r1]                        ;preload pred data
    1.28 +    ;pld                [r1, #128]
    1.29 +    ;pld                [r1, #256]
    1.30 +    ;pld                [r1, #384]
    1.31 +
    1.32 +    mov             r12, r2, lsr #8                 ;copy 256 bytes data at one time
    1.33 +
    1.34 +memcpy_neon_loop
    1.35 +    vld1.8          {q0, q1}, [r1]!                 ;load src data
    1.36 +    subs            r12, r12, #1
    1.37 +    vld1.8          {q2, q3}, [r1]!
    1.38 +    vst1.8          {q0, q1}, [r0]!                 ;copy to dst_ptr
    1.39 +    vld1.8          {q4, q5}, [r1]!
    1.40 +    vst1.8          {q2, q3}, [r0]!
    1.41 +    vld1.8          {q6, q7}, [r1]!
    1.42 +    vst1.8          {q4, q5}, [r0]!
    1.43 +    vld1.8          {q8, q9}, [r1]!
    1.44 +    vst1.8          {q6, q7}, [r0]!
    1.45 +    vld1.8          {q10, q11}, [r1]!
    1.46 +    vst1.8          {q8, q9}, [r0]!
    1.47 +    vld1.8          {q12, q13}, [r1]!
    1.48 +    vst1.8          {q10, q11}, [r0]!
    1.49 +    vld1.8          {q14, q15}, [r1]!
    1.50 +    vst1.8          {q12, q13}, [r0]!
    1.51 +    vst1.8          {q14, q15}, [r0]!
    1.52 +
    1.53 +    ;pld                [r1]                        ;preload pred data -- need to adjust for real device
    1.54 +    ;pld                [r1, #128]
    1.55 +    ;pld                [r1, #256]
    1.56 +    ;pld                [r1, #384]
    1.57 +
    1.58 +    bne             memcpy_neon_loop
    1.59 +
    1.60 +    ands            r3, r2, #0xff                   ;extra copy
    1.61 +    beq             done_copy_neon_loop
    1.62 +
    1.63 +extra_copy_neon_loop
    1.64 +    vld1.8          {q0}, [r1]!                 ;load src data
    1.65 +    subs            r3, r3, #16
    1.66 +    vst1.8          {q0}, [r0]!
    1.67 +    bne             extra_copy_neon_loop
    1.68 +
    1.69 +done_copy_neon_loop
    1.70 +    bx              lr
    1.71 +    ENDP
    1.72 +
    1.73 +    END

mercurial