media/libvpx/vp9/common/arm/neon/vp9_copy_neon.asm

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libvpx/vp9/common/arm/neon/vp9_copy_neon.asm	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,84 @@
     1.4 +;
     1.5 +;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
     1.6 +;
     1.7 +;  Use of this source code is governed by a BSD-style license
     1.8 +;  that can be found in the LICENSE file in the root of the source
     1.9 +;  tree. An additional intellectual property rights grant can be found
    1.10 +;  in the file PATENTS.  All contributing project authors may
    1.11 +;  be found in the AUTHORS file in the root of the source tree.
    1.12 +;
    1.13 +
    1.14 +    EXPORT  |vp9_convolve_copy_neon|
    1.15 +    ARM
    1.16 +    REQUIRE8
    1.17 +    PRESERVE8
    1.18 +
    1.19 +    AREA ||.text||, CODE, READONLY, ALIGN=2
    1.20 +
    1.21 +|vp9_convolve_copy_neon| PROC
    1.22 +    push                {r4-r5, lr}
    1.23 +    ldrd                r4, r5, [sp, #28]
    1.24 +
    1.25 +    cmp                 r4, #32
    1.26 +    bgt                 copy64
    1.27 +    beq                 copy32
    1.28 +    cmp                 r4, #8
    1.29 +    bgt                 copy16
    1.30 +    beq                 copy8
    1.31 +    b                   copy4
    1.32 +
    1.33 +copy64
    1.34 +    sub                 lr, r1, #32
    1.35 +    sub                 r3, r3, #32
    1.36 +copy64_h
    1.37 +    pld                 [r0, r1, lsl #1]
    1.38 +    vld1.8              {q0-q1}, [r0]!
    1.39 +    vld1.8              {q2-q3}, [r0], lr
    1.40 +    vst1.8              {q0-q1}, [r2@128]!
    1.41 +    vst1.8              {q2-q3}, [r2@128], r3
    1.42 +    subs                r5, r5, #1
    1.43 +    bgt                 copy64_h
    1.44 +    pop                 {r4-r5, pc}
    1.45 +
    1.46 +copy32
    1.47 +    pld                 [r0, r1, lsl #1]
    1.48 +    vld1.8              {q0-q1}, [r0], r1
    1.49 +    pld                 [r0, r1, lsl #1]
    1.50 +    vld1.8              {q2-q3}, [r0], r1
    1.51 +    vst1.8              {q0-q1}, [r2@128], r3
    1.52 +    vst1.8              {q2-q3}, [r2@128], r3
    1.53 +    subs                r5, r5, #2
    1.54 +    bgt                 copy32
    1.55 +    pop                 {r4-r5, pc}
    1.56 +
    1.57 +copy16
    1.58 +    pld                 [r0, r1, lsl #1]
    1.59 +    vld1.8              {q0}, [r0], r1
    1.60 +    pld                 [r0, r1, lsl #1]
    1.61 +    vld1.8              {q1}, [r0], r1
    1.62 +    vst1.8              {q0}, [r2@128], r3
    1.63 +    vst1.8              {q1}, [r2@128], r3
    1.64 +    subs                r5, r5, #2
    1.65 +    bgt                 copy16
    1.66 +    pop                 {r4-r5, pc}
    1.67 +
    1.68 +copy8
    1.69 +    pld                 [r0, r1, lsl #1]
    1.70 +    vld1.8              {d0}, [r0], r1
    1.71 +    pld                 [r0, r1, lsl #1]
    1.72 +    vld1.8              {d2}, [r0], r1
    1.73 +    vst1.8              {d0}, [r2@64], r3
    1.74 +    vst1.8              {d2}, [r2@64], r3
    1.75 +    subs                r5, r5, #2
    1.76 +    bgt                 copy8
    1.77 +    pop                 {r4-r5, pc}
    1.78 +
    1.79 +copy4
    1.80 +    ldr                 r12, [r0], r1
    1.81 +    str                 r12, [r2], r3
    1.82 +    subs                r5, r5, #1
    1.83 +    bgt                 copy4
    1.84 +    pop                 {r4-r5, pc}
    1.85 +    ENDP
    1.86 +
    1.87 +    END

mercurial