media/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12     EXPORT |vp8_memcpy_partial_neon|
    14     ARM
    15     REQUIRE8
    16     PRESERVE8
    18     AREA ||.text||, CODE, READONLY, ALIGN=2
    19 ;=========================================
    20 ;this is not a full memcpy function!!!
    21 ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
    22 ;                             int sz);
    23 |vp8_memcpy_partial_neon| PROC
    24     ;pld                [r1]                        ;preload pred data
    25     ;pld                [r1, #128]
    26     ;pld                [r1, #256]
    27     ;pld                [r1, #384]
    29     mov             r12, r2, lsr #8                 ;copy 256 bytes data at one time
    31 memcpy_neon_loop
    32     vld1.8          {q0, q1}, [r1]!                 ;load src data
    33     subs            r12, r12, #1
    34     vld1.8          {q2, q3}, [r1]!
    35     vst1.8          {q0, q1}, [r0]!                 ;copy to dst_ptr
    36     vld1.8          {q4, q5}, [r1]!
    37     vst1.8          {q2, q3}, [r0]!
    38     vld1.8          {q6, q7}, [r1]!
    39     vst1.8          {q4, q5}, [r0]!
    40     vld1.8          {q8, q9}, [r1]!
    41     vst1.8          {q6, q7}, [r0]!
    42     vld1.8          {q10, q11}, [r1]!
    43     vst1.8          {q8, q9}, [r0]!
    44     vld1.8          {q12, q13}, [r1]!
    45     vst1.8          {q10, q11}, [r0]!
    46     vld1.8          {q14, q15}, [r1]!
    47     vst1.8          {q12, q13}, [r0]!
    48     vst1.8          {q14, q15}, [r0]!
    50     ;pld                [r1]                        ;preload pred data -- need to adjust for real device
    51     ;pld                [r1, #128]
    52     ;pld                [r1, #256]
    53     ;pld                [r1, #384]
    55     bne             memcpy_neon_loop
    57     ands            r3, r2, #0xff                   ;extra copy
    58     beq             done_copy_neon_loop
    60 extra_copy_neon_loop
    61     vld1.8          {q0}, [r1]!                 ;load src data
    62     subs            r3, r3, #16
    63     vst1.8          {q0}, [r0]!
    64     bne             extra_copy_neon_loop
    66 done_copy_neon_loop
    67     bx              lr
    68     ENDP
    70     END

mercurial