media/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 EXPORT |vp8_mse16x16_neon|
michael@0 13 EXPORT |vp8_get4x4sse_cs_neon|
michael@0 14
michael@0 15 ARM
michael@0 16 REQUIRE8
michael@0 17 PRESERVE8
michael@0 18
michael@0 19 AREA ||.text||, CODE, READONLY, ALIGN=2
michael@0 20 ;============================
michael@0 21 ; r0 unsigned char *src_ptr
michael@0 22 ; r1 int source_stride
michael@0 23 ; r2 unsigned char *ref_ptr
michael@0 24 ; r3 int recon_stride
michael@0 25 ; stack unsigned int *sse
michael@0 26 ;note: in this function, sum is never used. So, we can remove this part of calculation
michael@0 27 ;from vp8_variance().
michael@0 28
michael@0 29 |vp8_mse16x16_neon| PROC
michael@0 30 vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse
michael@0 31 vmov.i8 q8, #0
michael@0 32 vmov.i8 q9, #0
michael@0 33 vmov.i8 q10, #0
michael@0 34
michael@0 35 mov r12, #8
michael@0 36
michael@0 37 mse16x16_neon_loop
michael@0 38 vld1.8 {q0}, [r0], r1 ;Load up source and reference
michael@0 39 vld1.8 {q2}, [r2], r3
michael@0 40 vld1.8 {q1}, [r0], r1
michael@0 41 vld1.8 {q3}, [r2], r3
michael@0 42
michael@0 43 vsubl.u8 q11, d0, d4
michael@0 44 vsubl.u8 q12, d1, d5
michael@0 45 vsubl.u8 q13, d2, d6
michael@0 46 vsubl.u8 q14, d3, d7
michael@0 47
michael@0 48 vmlal.s16 q7, d22, d22
michael@0 49 vmlal.s16 q8, d23, d23
michael@0 50
michael@0 51 subs r12, r12, #1
michael@0 52
michael@0 53 vmlal.s16 q9, d24, d24
michael@0 54 vmlal.s16 q10, d25, d25
michael@0 55 vmlal.s16 q7, d26, d26
michael@0 56 vmlal.s16 q8, d27, d27
michael@0 57 vmlal.s16 q9, d28, d28
michael@0 58 vmlal.s16 q10, d29, d29
michael@0 59
michael@0 60 bne mse16x16_neon_loop
michael@0 61
michael@0 62 vadd.u32 q7, q7, q8
michael@0 63 vadd.u32 q9, q9, q10
michael@0 64
michael@0 65 ldr r12, [sp] ;load *sse from stack
michael@0 66
michael@0 67 vadd.u32 q10, q7, q9
michael@0 68 vpaddl.u32 q1, q10
michael@0 69 vadd.u64 d0, d2, d3
michael@0 70
michael@0 71 vst1.32 {d0[0]}, [r12]
michael@0 72 vmov.32 r0, d0[0]
michael@0 73
michael@0 74 bx lr
michael@0 75
michael@0 76 ENDP
michael@0 77
michael@0 78
michael@0 79 ;=============================
michael@0 80 ; r0 unsigned char *src_ptr,
michael@0 81 ; r1 int source_stride,
michael@0 82 ; r2 unsigned char *ref_ptr,
michael@0 83 ; r3 int recon_stride
michael@0 84 |vp8_get4x4sse_cs_neon| PROC
michael@0 85 vld1.8 {d0}, [r0], r1 ;Load up source and reference
michael@0 86 vld1.8 {d4}, [r2], r3
michael@0 87 vld1.8 {d1}, [r0], r1
michael@0 88 vld1.8 {d5}, [r2], r3
michael@0 89 vld1.8 {d2}, [r0], r1
michael@0 90 vld1.8 {d6}, [r2], r3
michael@0 91 vld1.8 {d3}, [r0], r1
michael@0 92 vld1.8 {d7}, [r2], r3
michael@0 93
michael@0 94 vsubl.u8 q11, d0, d4
michael@0 95 vsubl.u8 q12, d1, d5
michael@0 96 vsubl.u8 q13, d2, d6
michael@0 97 vsubl.u8 q14, d3, d7
michael@0 98
michael@0 99 vmull.s16 q7, d22, d22
michael@0 100 vmull.s16 q8, d24, d24
michael@0 101 vmull.s16 q9, d26, d26
michael@0 102 vmull.s16 q10, d28, d28
michael@0 103
michael@0 104 vadd.u32 q7, q7, q8
michael@0 105 vadd.u32 q9, q9, q10
michael@0 106 vadd.u32 q9, q7, q9
michael@0 107
michael@0 108 vpaddl.u32 q1, q9
michael@0 109 vadd.u64 d0, d2, d3
michael@0 110
michael@0 111 vmov.32 r0, d0[0]
michael@0 112 bx lr
michael@0 113
michael@0 114 ENDP
michael@0 115
michael@0 116 END

mercurial