media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 ;
michael@0 2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 EXPORT |vp8_mse16x16_armv6|
michael@0 13
michael@0 14 ARM
michael@0 15
michael@0 16 AREA ||.text||, CODE, READONLY, ALIGN=2
michael@0 17
michael@0 18 ; r0 unsigned char *src_ptr
michael@0 19 ; r1 int source_stride
michael@0 20 ; r2 unsigned char *ref_ptr
michael@0 21 ; r3 int recon_stride
michael@0 22 ; stack unsigned int *sse
michael@0 23 ;
michael@0 24 ;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
michael@0 25 ; So, we can remove this part of calculation.
michael@0 26
michael@0 27 |vp8_mse16x16_armv6| PROC
michael@0 28
michael@0 29 push {r4-r9, lr}
michael@0 30
michael@0 31 pld [r0, r1, lsl #0]
michael@0 32 pld [r2, r3, lsl #0]
michael@0 33
michael@0 34 mov r12, #16 ; set loop counter to 16 (=block height)
michael@0 35 mov r4, #0 ; initialize sse = 0
michael@0 36
michael@0 37 loop
michael@0 38 ; 1st 4 pixels
michael@0 39 ldr r5, [r0, #0x0] ; load 4 src pixels
michael@0 40 ldr r6, [r2, #0x0] ; load 4 ref pixels
michael@0 41
michael@0 42 mov lr, #0 ; constant zero
michael@0 43
michael@0 44 usub8 r8, r5, r6 ; calculate difference
michael@0 45 pld [r0, r1, lsl #1]
michael@0 46 sel r7, r8, lr ; select bytes with positive difference
michael@0 47 usub8 r9, r6, r5 ; calculate difference with reversed operands
michael@0 48 pld [r2, r3, lsl #1]
michael@0 49 sel r8, r9, lr ; select bytes with negative difference
michael@0 50
michael@0 51 ; calculate partial sums
michael@0 52 usad8 r5, r7, lr ; calculate sum of positive differences
michael@0 53 usad8 r6, r8, lr ; calculate sum of negative differences
michael@0 54 orr r8, r8, r7 ; differences of all 4 pixels
michael@0 55
michael@0 56 ldr r5, [r0, #0x4] ; load 4 src pixels
michael@0 57
michael@0 58 ; calculate sse
michael@0 59 uxtb16 r6, r8 ; byte (two pixels) to halfwords
michael@0 60 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
michael@0 61 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
michael@0 62
michael@0 63 ; 2nd 4 pixels
michael@0 64 ldr r6, [r2, #0x4] ; load 4 ref pixels
michael@0 65 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
michael@0 66
michael@0 67 usub8 r8, r5, r6 ; calculate difference
michael@0 68 sel r7, r8, lr ; select bytes with positive difference
michael@0 69 usub8 r9, r6, r5 ; calculate difference with reversed operands
michael@0 70 sel r8, r9, lr ; select bytes with negative difference
michael@0 71
michael@0 72 ; calculate partial sums
michael@0 73 usad8 r5, r7, lr ; calculate sum of positive differences
michael@0 74 usad8 r6, r8, lr ; calculate sum of negative differences
michael@0 75 orr r8, r8, r7 ; differences of all 4 pixels
michael@0 76 ldr r5, [r0, #0x8] ; load 4 src pixels
michael@0 77 ; calculate sse
michael@0 78 uxtb16 r6, r8 ; byte (two pixels) to halfwords
michael@0 79 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
michael@0 80 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
michael@0 81
michael@0 82 ; 3rd 4 pixels
michael@0 83 ldr r6, [r2, #0x8] ; load 4 ref pixels
michael@0 84 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
michael@0 85
michael@0 86 usub8 r8, r5, r6 ; calculate difference
michael@0 87 sel r7, r8, lr ; select bytes with positive difference
michael@0 88 usub8 r9, r6, r5 ; calculate difference with reversed operands
michael@0 89 sel r8, r9, lr ; select bytes with negative difference
michael@0 90
michael@0 91 ; calculate partial sums
michael@0 92 usad8 r5, r7, lr ; calculate sum of positive differences
michael@0 93 usad8 r6, r8, lr ; calculate sum of negative differences
michael@0 94 orr r8, r8, r7 ; differences of all 4 pixels
michael@0 95
michael@0 96 ldr r5, [r0, #0xc] ; load 4 src pixels
michael@0 97
michael@0 98 ; calculate sse
michael@0 99 uxtb16 r6, r8 ; byte (two pixels) to halfwords
michael@0 100 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
michael@0 101 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
michael@0 102
michael@0 103 ; 4th 4 pixels
michael@0 104 ldr r6, [r2, #0xc] ; load 4 ref pixels
michael@0 105 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
michael@0 106
michael@0 107 usub8 r8, r5, r6 ; calculate difference
michael@0 108 add r0, r0, r1 ; set src_ptr to next row
michael@0 109 sel r7, r8, lr ; select bytes with positive difference
michael@0 110 usub8 r9, r6, r5 ; calculate difference with reversed operands
michael@0 111 add r2, r2, r3 ; set dst_ptr to next row
michael@0 112 sel r8, r9, lr ; select bytes with negative difference
michael@0 113
michael@0 114 ; calculate partial sums
michael@0 115 usad8 r5, r7, lr ; calculate sum of positive differences
michael@0 116 usad8 r6, r8, lr ; calculate sum of negative differences
michael@0 117 orr r8, r8, r7 ; differences of all 4 pixels
michael@0 118
michael@0 119 subs r12, r12, #1 ; next row
michael@0 120
michael@0 121 ; calculate sse
michael@0 122 uxtb16 r6, r8 ; byte (two pixels) to halfwords
michael@0 123 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
michael@0 124 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
michael@0 125 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
michael@0 126
michael@0 127 bne loop
michael@0 128
michael@0 129 ; return stuff
michael@0 130 ldr r1, [sp, #28] ; get address of sse
michael@0 131 mov r0, r4 ; return sse
michael@0 132 str r4, [r1] ; store sse
michael@0 133
michael@0 134 pop {r4-r9, pc}
michael@0 135
michael@0 136 ENDP
michael@0 137
michael@0 138 END

mercurial