ref: 29a0c77acf4b33a41f4ba0909cc535ee3ca67ecc
parent: 8e57c926a36ded5ca064a2ae500922f9e9837f41
author: Martin Storsjö <[email protected]>
date: Mon Mar 17 13:04:54 EDT 2014
Don't clobber q4-q7 in WelsIntra16x16Combined3Satd_neon This is similar to what is done in other neon functions. This function was missed since it isn't covered by the current set of unittests.
--- a/codec/encoder/core/arm/intra_pred_sad_3_opt_neon.S
+++ b/codec/encoder/core/arm/intra_pred_sad_3_opt_neon.S
@@ -154,6 +154,7 @@
WELS_ASM_FUNC_BEGIN WelsIntra16x16Combined3Satd_neon
stmdb sp!, {r4-r7, lr}
+ vpush {q4-q7}
//Get the top line data to 'q15'(16 bytes)
sub r7, r0, r1
@@ -258,8 +259,8 @@
HDM_TRANSFORM_4X4_L0 d9, d13, d23, d19, d27, d15, d16, d17, d14
//Get the data from stack
- ldr r5, [sp, #20] //the addr of Best_mode
- ldr r6, [sp, #24] //the value of i_lambda
+ ldr r5, [sp, #84] //the addr of Best_mode
+ ldr r6, [sp, #88] //the value of i_lambda
//vadd.u16 d24, d25
vrshr.u16 d15, #1
@@ -291,6 +292,7 @@
str r4, [r5]
+ vpop {q4-q7}
ldmia sp!, {r4-r7, lr}
WELS_ASM_FUNC_END