michael@0: ; michael@0: ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. michael@0: ; michael@0: ; Use of this source code is governed by a BSD-style license michael@0: ; that can be found in the LICENSE file in the root of the source michael@0: ; tree. An additional intellectual property rights grant can be found michael@0: ; in the file PATENTS. All contributing project authors may michael@0: ; be found in the AUTHORS file in the root of the source tree. michael@0: ; michael@0: michael@0: michael@0: EXPORT |vp8_subtract_mby_armv6| michael@0: EXPORT |vp8_subtract_mbuv_armv6| michael@0: EXPORT |vp8_subtract_b_armv6| michael@0: michael@0: INCLUDE vp8_asm_enc_offsets.asm michael@0: michael@0: ARM michael@0: REQUIRE8 michael@0: PRESERVE8 michael@0: michael@0: AREA ||.text||, CODE, READONLY, ALIGN=2 michael@0: michael@0: ; r0 BLOCK *be michael@0: ; r1 BLOCKD *bd michael@0: ; r2 int pitch michael@0: |vp8_subtract_b_armv6| PROC michael@0: michael@0: stmfd sp!, {r4-r9} michael@0: michael@0: ldr r4, [r0, #vp8_block_base_src] michael@0: ldr r5, [r0, #vp8_block_src] michael@0: ldr r6, [r0, #vp8_block_src_diff] michael@0: michael@0: ldr r3, [r4] michael@0: ldr r7, [r0, #vp8_block_src_stride] michael@0: add r3, r3, r5 ; src = *base_src + src michael@0: ldr r8, [r1, #vp8_blockd_predictor] michael@0: michael@0: mov r9, #4 ; loop count michael@0: michael@0: loop_block michael@0: michael@0: ldr r0, [r3], r7 ; src michael@0: ldr r1, [r8], r2 ; pred michael@0: michael@0: uxtb16 r4, r0 ; [s2 | s0] michael@0: uxtb16 r5, r1 ; [p2 | p0] michael@0: uxtb16 r0, r0, ror #8 ; [s3 | s1] michael@0: uxtb16 r1, r1, ror #8 ; [p3 | p1] michael@0: michael@0: usub16 r4, r4, r5 ; [d2 | d0] michael@0: usub16 r5, r0, r1 ; [d3 | d1] michael@0: michael@0: subs r9, r9, #1 ; decrement loop counter michael@0: michael@0: pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] michael@0: pkhtb r1, r5, r4, asr #16 ; [d3 | d2] michael@0: michael@0: str r0, [r6, #0] ; diff michael@0: str r1, [r6, #4] ; diff michael@0: michael@0: add r6, r6, r2, lsl #1 ; update diff pointer michael@0: bne loop_block michael@0: michael@0: ldmfd sp!, {r4-r9} michael@0: mov pc, lr michael@0: michael@0: ENDP michael@0: michael@0: michael@0: ; r0 short *diff michael@0: ; r1 unsigned char *usrc michael@0: ; r2 unsigned char *vsrc michael@0: ; r3 int src_stride michael@0: ; sp unsigned char *upred michael@0: ; sp unsigned char *vpred michael@0: ; sp int pred_stride michael@0: |vp8_subtract_mbuv_armv6| PROC michael@0: michael@0: stmfd sp!, {r4-r11} michael@0: michael@0: add r0, r0, #512 ; set *diff point to Cb michael@0: mov r4, #8 ; loop count michael@0: ldr r5, [sp, #32] ; upred michael@0: ldr r12, [sp, #40] ; pred_stride michael@0: michael@0: ; Subtract U block michael@0: loop_u michael@0: ldr r6, [r1] ; usrc (A) michael@0: ldr r7, [r5] ; upred (A) michael@0: michael@0: uxtb16 r8, r6 ; [s2 | s0] (A) michael@0: uxtb16 r9, r7 ; [p2 | p0] (A) michael@0: uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) michael@0: uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) michael@0: michael@0: usub16 r6, r8, r9 ; [d2 | d0] (A) michael@0: usub16 r7, r10, r11 ; [d3 | d1] (A) michael@0: michael@0: ldr r10, [r1, #4] ; usrc (B) michael@0: ldr r11, [r5, #4] ; upred (B) michael@0: michael@0: pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) michael@0: pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) michael@0: michael@0: str r8, [r0], #4 ; diff (A) michael@0: uxtb16 r8, r10 ; [s2 | s0] (B) michael@0: str r9, [r0], #4 ; diff (A) michael@0: michael@0: uxtb16 r9, r11 ; [p2 | p0] (B) michael@0: uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) michael@0: uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) michael@0: michael@0: usub16 r6, r8, r9 ; [d2 | d0] (B) michael@0: usub16 r7, r10, r11 ; [d3 | d1] (B) michael@0: michael@0: add r1, r1, r3 ; update usrc pointer michael@0: add r5, r5, r12 ; update upred pointer michael@0: michael@0: pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) michael@0: pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) michael@0: michael@0: str r8, [r0], #4 ; diff (B) michael@0: subs r4, r4, #1 ; update loop counter michael@0: str r9, [r0], #4 ; diff (B) michael@0: michael@0: bne loop_u michael@0: michael@0: ldr r5, [sp, #36] ; vpred michael@0: mov r4, #8 ; loop count michael@0: michael@0: ; Subtract V block michael@0: loop_v michael@0: ldr r6, [r2] ; vsrc (A) michael@0: ldr r7, [r5] ; vpred (A) michael@0: michael@0: uxtb16 r8, r6 ; [s2 | s0] (A) michael@0: uxtb16 r9, r7 ; [p2 | p0] (A) michael@0: uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) michael@0: uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) michael@0: michael@0: usub16 r6, r8, r9 ; [d2 | d0] (A) michael@0: usub16 r7, r10, r11 ; [d3 | d1] (A) michael@0: michael@0: ldr r10, [r2, #4] ; vsrc (B) michael@0: ldr r11, [r5, #4] ; vpred (B) michael@0: michael@0: pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) michael@0: pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) michael@0: michael@0: str r8, [r0], #4 ; diff (A) michael@0: uxtb16 r8, r10 ; [s2 | s0] (B) michael@0: str r9, [r0], #4 ; diff (A) michael@0: michael@0: uxtb16 r9, r11 ; [p2 | p0] (B) michael@0: uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) michael@0: uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) michael@0: michael@0: usub16 r6, r8, r9 ; [d2 | d0] (B) michael@0: usub16 r7, r10, r11 ; [d3 | d1] (B) michael@0: michael@0: add r2, r2, r3 ; update vsrc pointer michael@0: add r5, r5, r12 ; update vpred pointer michael@0: michael@0: pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) michael@0: pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) michael@0: michael@0: str r8, [r0], #4 ; diff (B) michael@0: subs r4, r4, #1 ; update loop counter michael@0: str r9, [r0], #4 ; diff (B) michael@0: michael@0: bne loop_v michael@0: michael@0: ldmfd sp!, {r4-r11} michael@0: bx lr michael@0: michael@0: ENDP michael@0: michael@0: michael@0: ; r0 short *diff michael@0: ; r1 unsigned char *src michael@0: ; r2 int src_stride michael@0: ; r3 unsigned char *pred michael@0: ; sp int pred_stride michael@0: |vp8_subtract_mby_armv6| PROC michael@0: michael@0: stmfd sp!, {r4-r11} michael@0: ldr r12, [sp, #32] ; pred_stride michael@0: mov r4, #16 michael@0: loop michael@0: ldr r6, [r1] ; src (A) michael@0: ldr r7, [r3] ; pred (A) michael@0: michael@0: uxtb16 r8, r6 ; [s2 | s0] (A) michael@0: uxtb16 r9, r7 ; [p2 | p0] (A) michael@0: uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) michael@0: uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) michael@0: michael@0: usub16 r6, r8, r9 ; [d2 | d0] (A) michael@0: usub16 r7, r10, r11 ; [d3 | d1] (A) michael@0: michael@0: ldr r10, [r1, #4] ; src (B) michael@0: ldr r11, [r3, #4] ; pred (B) michael@0: michael@0: pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) michael@0: pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) michael@0: michael@0: str r8, [r0], #4 ; diff (A) michael@0: uxtb16 r8, r10 ; [s2 | s0] (B) michael@0: str r9, [r0], #4 ; diff (A) michael@0: michael@0: uxtb16 r9, r11 ; [p2 | p0] (B) michael@0: uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) michael@0: uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) michael@0: michael@0: usub16 r6, r8, r9 ; [d2 | d0] (B) michael@0: usub16 r7, r10, r11 ; [d3 | d1] (B) michael@0: michael@0: ldr r10, [r1, #8] ; src (C) michael@0: ldr r11, [r3, #8] ; pred (C) michael@0: michael@0: pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) michael@0: pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) michael@0: michael@0: str r8, [r0], #4 ; diff (B) michael@0: uxtb16 r8, r10 ; [s2 | s0] (C) michael@0: str r9, [r0], #4 ; diff (B) michael@0: michael@0: uxtb16 r9, r11 ; [p2 | p0] (C) michael@0: uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) michael@0: uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) michael@0: michael@0: usub16 r6, r8, r9 ; [d2 | d0] (C) michael@0: usub16 r7, r10, r11 ; [d3 | d1] (C) michael@0: michael@0: ldr r10, [r1, #12] ; src (D) michael@0: ldr r11, [r3, #12] ; pred (D) michael@0: michael@0: pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) michael@0: pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) michael@0: michael@0: str r8, [r0], #4 ; diff (C) michael@0: uxtb16 r8, r10 ; [s2 | s0] (D) michael@0: str r9, [r0], #4 ; diff (C) michael@0: michael@0: uxtb16 r9, r11 ; [p2 | p0] (D) michael@0: uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) michael@0: uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) michael@0: michael@0: usub16 r6, r8, r9 ; [d2 | d0] (D) michael@0: usub16 r7, r10, r11 ; [d3 | d1] (D) michael@0: michael@0: add r1, r1, r2 ; update src pointer michael@0: add r3, r3, r12 ; update pred pointer michael@0: michael@0: pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) michael@0: pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) michael@0: michael@0: str r8, [r0], #4 ; diff (D) michael@0: subs r4, r4, #1 ; update loop counter michael@0: str r9, [r0], #4 ; diff (D) michael@0: michael@0: bne loop michael@0: michael@0: ldmfd sp!, {r4-r11} michael@0: bx lr michael@0: michael@0: ENDP michael@0: michael@0: END michael@0: