ref: f642ee891b1db8087f8d09847ec464c9be89113e
dir: /vp8/common/arm/armv6/dequant_idct_v6.asm/
; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license and patent ; grant that can be found in the LICENSE file in the root of the source ; tree. All contributing project authors may be found in the AUTHORS ; file in the root of the source tree. ; EXPORT |vp8_dequant_idct_add_v6| AREA |.text|, CODE, READONLY ;void vp8_dequant_idct_v6(short *input, short *dq, ; unsigned char *dest, int stride) ; r0 = q ; r1 = dq ; r2 = dst ; r3 = stride |vp8_dequant_idct_add_v6| PROC stmdb sp!, {r4-r11, lr} ldr r4, [r0] ;input ldr r5, [r1], #4 ;dq sub sp, sp, #4 str r3, [sp] mov r12, #4 vp8_dequant_add_loop smulbb r6, r4, r5 smultt r7, r4, r5 ldr r4, [r0, #4] ;input ldr r5, [r1], #4 ;dq strh r6, [r0], #2 strh r7, [r0], #2 smulbb r6, r4, r5 smultt r7, r4, r5 subs r12, r12, #1 ldrne r4, [r0, #4] ldrne r5, [r1], #4 strh r6, [r0], #2 strh r7, [r0], #2 bne vp8_dequant_add_loop sub r0, r0, #32 mov r1, r0 ; short_idct4x4llm_v6_dual ldr r3, cospi8sqrt2minus1 ldr r4, sinpi8sqrt2 ldr r6, [r0, #8] mov r5, #2 vp8_dequant_idct_loop1_v6 ldr r12, [r0, #24] ldr r14, [r0, #16] smulwt r9, r3, r6 smulwb r7, r3, r6 smulwt r10, r4, r6 smulwb r8, r4, r6 pkhbt r7, r7, r9, lsl #16 smulwt r11, r3, r12 pkhbt r8, r8, r10, lsl #16 uadd16 r6, r6, r7 smulwt r7, r4, r12 smulwb r9, r3, r12 smulwb r10, r4, r12 subs r5, r5, #1 pkhbt r9, r9, r11, lsl #16 ldr r11, [r0], #4 pkhbt r10, r10, r7, lsl #16 uadd16 r7, r12, r9 usub16 r7, r8, r7 uadd16 r6, r6, r10 uadd16 r10, r11, r14 usub16 r8, r11, r14 uadd16 r9, r10, r6 usub16 r10, r10, r6 uadd16 r6, r8, r7 usub16 r7, r8, r7 str r6, [r1, #8] ldrne r6, [r0, #8] str r7, [r1, #16] str r10, [r1, #24] str r9, [r1], #4 bne vp8_dequant_idct_loop1_v6 mov r5, #2 sub r0, r1, #8 vp8_dequant_idct_loop2_v6 ldr r6, [r0], #4 ldr r7, [r0], #4 ldr r8, [r0], #4 ldr r9, [r0], #4 smulwt r1, r3, r6 smulwt r12, r4, r6 smulwt lr, r3, r8 smulwt r10, r4, r8 pkhbt r11, r8, r6, lsl #16 pkhbt r1, lr, r1, lsl #16 pkhbt r12, r10, r12, lsl #16 pkhtb r6, r6, r8, asr #16 uadd16 r6, r1, r6 pkhbt lr, r9, r7, lsl #16 uadd16 r10, r11, lr usub16 lr, r11, lr pkhtb r8, r7, r9, asr #16 subs r5, r5, #1 smulwt r1, r3, r8 smulwb r7, r3, r8 smulwt r11, r4, r8 smulwb r9, r4, r8 pkhbt r1, r7, r1, lsl #16 uadd16 r8, r1, r8 pkhbt r11, r9, r11, lsl #16 usub16 r1, r12, r8 uadd16 r8, r11, r6 ldr r9, c0x00040004 ldr r12, [sp] ; get stride from stack uadd16 r6, r10, r8 usub16 r7, r10, r8 uadd16 r7, r7, r9 uadd16 r6, r6, r9 uadd16 r10, r14, r1 usub16 r1, r14, r1 uadd16 r10, r10, r9 uadd16 r1, r1, r9 ldr r11, [r2] ; load input from dst mov r8, r7, asr #3 pkhtb r9, r8, r10, asr #19 mov r8, r1, asr #3 pkhtb r8, r8, r6, asr #19 uxtb16 lr, r11, ror #8 qadd16 r9, r9, lr uxtb16 lr, r11 qadd16 r8, r8, lr usat16 r9, #8, r9 usat16 r8, #8, r8 orr r9, r8, r9, lsl #8 ldr r11, [r2, r12] ; load input from dst mov r7, r7, lsl #16 mov r1, r1, lsl #16 mov r10, r10, lsl #16 mov r6, r6, lsl #16 mov r7, r7, asr #3 pkhtb r7, r7, r10, asr #19 mov r1, r1, asr #3 pkhtb r1, r1, r6, asr #19 uxtb16 r8, r11, ror #8 qadd16 r7, r7, r8 uxtb16 r8, r11 qadd16 r1, r1, r8 usat16 r7, #8, r7 usat16 r1, #8, r1 orr r1, r1, r7, lsl #8 str r9, [r2], r12 ; store output to dst str r1, [r2], r12 ; store output to dst bne vp8_dequant_idct_loop2_v6 ; memset sub r0, r0, #32 add sp, sp, #4 mov r12, #0 str r12, [r0] str r12, [r0, #4] str r12, [r0, #8] str r12, [r0, #12] str r12, [r0, #16] str r12, [r0, #20] str r12, [r0, #24] str r12, [r0, #28] ldmia sp!, {r4 - r11, pc} ENDP ; |vp8_dequant_idct_add_v6| ; Constant Pool cospi8sqrt2minus1 DCD 0x00004E7B sinpi8sqrt2 DCD 0x00008A8C c0x00040004 DCD 0x00040004 END