shithub: openh264

Download patch

ref: 50daa8f737fe174d723a10f97fa7ec24f54a6178
parent: eb9f56584fae81eab9be6ab999040ed5e4a7cfcd
parent: 79437648692aea32c893ad1b097a76f2b64c9c28
author: HaiboZhu <[email protected]>
date: Tue Apr 12 12:48:54 EDT 2016

Merge pull request #2439 from ruil2/deblocking_fix

add missing sign extension for arm64 on deblocking_aarch64_neon.S

--- a/codec/common/arm64/arm_arch64_common_macro.S
+++ b/codec/common/arm64/arm_arch64_common_macro.S
@@ -62,3 +62,7 @@
 .endm
 
 #endif
+
+.macro SIGN_EXTENSION arg0, arg1
+  sxtw \arg0, \arg1
+.endm
--- a/codec/common/arm64/deblocking_aarch64_neon.S
+++ b/codec/common/arm64/deblocking_aarch64_neon.S
@@ -305,6 +305,7 @@
 WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaLt4V_AArch64_neon //uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc
     dup v16.16b, w2 //alpha
     dup v17.16b, w3 //beta
+    SIGN_EXTENSION x1,w1
     add x2, x1, x1, lsl #1
     sub x2, x0, x2
     movi v23.16b, #128
@@ -363,8 +364,8 @@
 WELS_ASM_AARCH64_FUNC_BEGIN DeblockLumaEq4V_AArch64_neon
     dup     v16.16b, w2 //alpha
     dup     v17.16b, w3 //beta
+    SIGN_EXTENSION x1,w1
     sub     x3, x0, x1, lsl #2
-
     ld1     {v0.16b}, [x3], x1
     ld1     {v4.16b}, [x0], x1
     ld1     {v1.16b}, [x3], x1
@@ -431,7 +432,7 @@
     dup v17.16b, w3 //beta
     sub x2, x0, #3
     movi v23.16b, #128
-
+    SIGN_EXTENSION x1,w1
     LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 0
     LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 1
     LOAD_LUMA_DATA_3      v0, v1, v2, v3, v4, v5, 2
@@ -515,7 +516,7 @@
     dup     v16.16b, w2 //alpha
     dup     v17.16b, w3 //beta
     sub     x3, x0, #4
-
+    SIGN_EXTENSION x1,w1
     LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 0
     LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 1
     LOAD_LUMA_DATA_4      v0, v1, v2, v3, v4, v5, v6, v7, 2