ref: 525dbe7093af25cfe993f0e16ace2ff3685e1aad
parent: d34e209266a3f7bc95d2808e06767329bc3cf96c
author: Karina <[email protected]>
date: Thu Apr 14 06:06:57 EDT 2016
add 32-bit parameter sign-extentions for block_add_aarch64_neon.S
--- a/codec/decoder/core/arm64/block_add_aarch64_neon.S
+++ b/codec/decoder/core/arm64/block_add_aarch64_neon.S
@@ -68,7 +68,7 @@
// uint8_t *pred, const int32_t stride, int16_t *rs
WELS_ASM_AARCH64_FUNC_BEGIN IdctResAddPred_AArch64_neon
-
+ SIGN_EXTENSION x1,w1
ld4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x2] // cost 3 cycles!
ROW_TRANSFORM_1_STEP v0, v1, v2, v3, v16, v17, v18, v19, v4, v5
TRANSFORM_4BYTES v0, v1, v2, v3, v16, v17, v18, v19
@@ -113,6 +113,7 @@
WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero16x16_AArch64_neon
eor v0.16b, v0.16b, v0.16b
eor v1.16b, v1.16b, v1.16b
+ SIGN_EXTENSION x1,w1
lsl x1, x1, 1
.rept 16
st1 {v0.16b, v1.16b}, [x0], x1