1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,272 @@ 1.4 +; 1.5 +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 1.6 +; 1.7 +; Use of this source code is governed by a BSD-style license 1.8 +; that can be found in the LICENSE file in the root of the source 1.9 +; tree. An additional intellectual property rights grant can be found 1.10 +; in the file PATENTS. All contributing project authors may 1.11 +; be found in the AUTHORS file in the root of the source tree. 1.12 +; 1.13 + 1.14 + 1.15 + EXPORT |vp8_subtract_mby_armv6| 1.16 + EXPORT |vp8_subtract_mbuv_armv6| 1.17 + EXPORT |vp8_subtract_b_armv6| 1.18 + 1.19 + INCLUDE vp8_asm_enc_offsets.asm 1.20 + 1.21 + ARM 1.22 + REQUIRE8 1.23 + PRESERVE8 1.24 + 1.25 + AREA ||.text||, CODE, READONLY, ALIGN=2 1.26 + 1.27 +; r0 BLOCK *be 1.28 +; r1 BLOCKD *bd 1.29 +; r2 int pitch 1.30 +|vp8_subtract_b_armv6| PROC 1.31 + 1.32 + stmfd sp!, {r4-r9} 1.33 + 1.34 + ldr r4, [r0, #vp8_block_base_src] 1.35 + ldr r5, [r0, #vp8_block_src] 1.36 + ldr r6, [r0, #vp8_block_src_diff] 1.37 + 1.38 + ldr r3, [r4] 1.39 + ldr r7, [r0, #vp8_block_src_stride] 1.40 + add r3, r3, r5 ; src = *base_src + src 1.41 + ldr r8, [r1, #vp8_blockd_predictor] 1.42 + 1.43 + mov r9, #4 ; loop count 1.44 + 1.45 +loop_block 1.46 + 1.47 + ldr r0, [r3], r7 ; src 1.48 + ldr r1, [r8], r2 ; pred 1.49 + 1.50 + uxtb16 r4, r0 ; [s2 | s0] 1.51 + uxtb16 r5, r1 ; [p2 | p0] 1.52 + uxtb16 r0, r0, ror #8 ; [s3 | s1] 1.53 + uxtb16 r1, r1, ror #8 ; [p3 | p1] 1.54 + 1.55 + usub16 r4, r4, r5 ; [d2 | d0] 1.56 + usub16 r5, r0, r1 ; [d3 | d1] 1.57 + 1.58 + subs r9, r9, #1 ; decrement loop counter 1.59 + 1.60 + pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] 1.61 + pkhtb r1, r5, r4, asr #16 ; [d3 | d2] 1.62 + 1.63 + str r0, [r6, #0] ; diff 1.64 + str r1, [r6, #4] ; diff 1.65 + 1.66 + add r6, r6, r2, lsl #1 ; update diff pointer 1.67 + bne loop_block 1.68 + 1.69 + ldmfd sp!, {r4-r9} 1.70 + mov pc, lr 1.71 + 1.72 + ENDP 1.73 + 1.74 + 1.75 +; r0 short *diff 1.76 +; r1 unsigned char *usrc 1.77 +; r2 unsigned char *vsrc 1.78 +; r3 int src_stride 1.79 +; sp unsigned char *upred 1.80 +; sp unsigned char *vpred 1.81 +; sp int pred_stride 1.82 +|vp8_subtract_mbuv_armv6| PROC 1.83 + 1.84 + stmfd sp!, {r4-r11} 1.85 + 1.86 + add r0, r0, #512 ; set *diff point to Cb 1.87 + mov r4, #8 ; loop count 1.88 + ldr r5, [sp, #32] ; upred 1.89 + ldr r12, [sp, #40] ; pred_stride 1.90 + 1.91 + ; Subtract U block 1.92 +loop_u 1.93 + ldr r6, [r1] ; usrc (A) 1.94 + ldr r7, [r5] ; upred (A) 1.95 + 1.96 + uxtb16 r8, r6 ; [s2 | s0] (A) 1.97 + uxtb16 r9, r7 ; [p2 | p0] (A) 1.98 + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) 1.99 + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) 1.100 + 1.101 + usub16 r6, r8, r9 ; [d2 | d0] (A) 1.102 + usub16 r7, r10, r11 ; [d3 | d1] (A) 1.103 + 1.104 + ldr r10, [r1, #4] ; usrc (B) 1.105 + ldr r11, [r5, #4] ; upred (B) 1.106 + 1.107 + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 1.108 + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) 1.109 + 1.110 + str r8, [r0], #4 ; diff (A) 1.111 + uxtb16 r8, r10 ; [s2 | s0] (B) 1.112 + str r9, [r0], #4 ; diff (A) 1.113 + 1.114 + uxtb16 r9, r11 ; [p2 | p0] (B) 1.115 + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) 1.116 + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) 1.117 + 1.118 + usub16 r6, r8, r9 ; [d2 | d0] (B) 1.119 + usub16 r7, r10, r11 ; [d3 | d1] (B) 1.120 + 1.121 + add r1, r1, r3 ; update usrc pointer 1.122 + add r5, r5, r12 ; update upred pointer 1.123 + 1.124 + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) 1.125 + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) 1.126 + 1.127 + str r8, [r0], #4 ; diff (B) 1.128 + subs r4, r4, #1 ; update loop counter 1.129 + str r9, [r0], #4 ; diff (B) 1.130 + 1.131 + bne loop_u 1.132 + 1.133 + ldr r5, [sp, #36] ; vpred 1.134 + mov r4, #8 ; loop count 1.135 + 1.136 + ; Subtract V block 1.137 +loop_v 1.138 + ldr r6, [r2] ; vsrc (A) 1.139 + ldr r7, [r5] ; vpred (A) 1.140 + 1.141 + uxtb16 r8, r6 ; [s2 | s0] (A) 1.142 + uxtb16 r9, r7 ; [p2 | p0] (A) 1.143 + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) 1.144 + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) 1.145 + 1.146 + usub16 r6, r8, r9 ; [d2 | d0] (A) 1.147 + usub16 r7, r10, r11 ; [d3 | d1] (A) 1.148 + 1.149 + ldr r10, [r2, #4] ; vsrc (B) 1.150 + ldr r11, [r5, #4] ; vpred (B) 1.151 + 1.152 + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 1.153 + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) 1.154 + 1.155 + str r8, [r0], #4 ; diff (A) 1.156 + uxtb16 r8, r10 ; [s2 | s0] (B) 1.157 + str r9, [r0], #4 ; diff (A) 1.158 + 1.159 + uxtb16 r9, r11 ; [p2 | p0] (B) 1.160 + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) 1.161 + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) 1.162 + 1.163 + usub16 r6, r8, r9 ; [d2 | d0] (B) 1.164 + usub16 r7, r10, r11 ; [d3 | d1] (B) 1.165 + 1.166 + add r2, r2, r3 ; update vsrc pointer 1.167 + add r5, r5, r12 ; update vpred pointer 1.168 + 1.169 + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) 1.170 + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) 1.171 + 1.172 + str r8, [r0], #4 ; diff (B) 1.173 + subs r4, r4, #1 ; update loop counter 1.174 + str r9, [r0], #4 ; diff (B) 1.175 + 1.176 + bne loop_v 1.177 + 1.178 + ldmfd sp!, {r4-r11} 1.179 + bx lr 1.180 + 1.181 + ENDP 1.182 + 1.183 + 1.184 +; r0 short *diff 1.185 +; r1 unsigned char *src 1.186 +; r2 int src_stride 1.187 +; r3 unsigned char *pred 1.188 +; sp int pred_stride 1.189 +|vp8_subtract_mby_armv6| PROC 1.190 + 1.191 + stmfd sp!, {r4-r11} 1.192 + ldr r12, [sp, #32] ; pred_stride 1.193 + mov r4, #16 1.194 +loop 1.195 + ldr r6, [r1] ; src (A) 1.196 + ldr r7, [r3] ; pred (A) 1.197 + 1.198 + uxtb16 r8, r6 ; [s2 | s0] (A) 1.199 + uxtb16 r9, r7 ; [p2 | p0] (A) 1.200 + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) 1.201 + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) 1.202 + 1.203 + usub16 r6, r8, r9 ; [d2 | d0] (A) 1.204 + usub16 r7, r10, r11 ; [d3 | d1] (A) 1.205 + 1.206 + ldr r10, [r1, #4] ; src (B) 1.207 + ldr r11, [r3, #4] ; pred (B) 1.208 + 1.209 + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 1.210 + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) 1.211 + 1.212 + str r8, [r0], #4 ; diff (A) 1.213 + uxtb16 r8, r10 ; [s2 | s0] (B) 1.214 + str r9, [r0], #4 ; diff (A) 1.215 + 1.216 + uxtb16 r9, r11 ; [p2 | p0] (B) 1.217 + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) 1.218 + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) 1.219 + 1.220 + usub16 r6, r8, r9 ; [d2 | d0] (B) 1.221 + usub16 r7, r10, r11 ; [d3 | d1] (B) 1.222 + 1.223 + ldr r10, [r1, #8] ; src (C) 1.224 + ldr r11, [r3, #8] ; pred (C) 1.225 + 1.226 + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) 1.227 + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) 1.228 + 1.229 + str r8, [r0], #4 ; diff (B) 1.230 + uxtb16 r8, r10 ; [s2 | s0] (C) 1.231 + str r9, [r0], #4 ; diff (B) 1.232 + 1.233 + uxtb16 r9, r11 ; [p2 | p0] (C) 1.234 + uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) 1.235 + uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) 1.236 + 1.237 + usub16 r6, r8, r9 ; [d2 | d0] (C) 1.238 + usub16 r7, r10, r11 ; [d3 | d1] (C) 1.239 + 1.240 + ldr r10, [r1, #12] ; src (D) 1.241 + ldr r11, [r3, #12] ; pred (D) 1.242 + 1.243 + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) 1.244 + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) 1.245 + 1.246 + str r8, [r0], #4 ; diff (C) 1.247 + uxtb16 r8, r10 ; [s2 | s0] (D) 1.248 + str r9, [r0], #4 ; diff (C) 1.249 + 1.250 + uxtb16 r9, r11 ; [p2 | p0] (D) 1.251 + uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) 1.252 + uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) 1.253 + 1.254 + usub16 r6, r8, r9 ; [d2 | d0] (D) 1.255 + usub16 r7, r10, r11 ; [d3 | d1] (D) 1.256 + 1.257 + add r1, r1, r2 ; update src pointer 1.258 + add r3, r3, r12 ; update pred pointer 1.259 + 1.260 + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) 1.261 + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) 1.262 + 1.263 + str r8, [r0], #4 ; diff (D) 1.264 + subs r4, r4, #1 ; update loop counter 1.265 + str r9, [r0], #4 ; diff (D) 1.266 + 1.267 + bne loop 1.268 + 1.269 + ldmfd sp!, {r4-r11} 1.270 + bx lr 1.271 + 1.272 + ENDP 1.273 + 1.274 + END 1.275 +