ref: 3b68840d5f437f46c16d3f4abeeaa9970adb77dd
parent: c9433ee73bcc4a0e42fec17a34692833e75a26cf
parent: cc407b4b217392232f98d005b5ba66aed69cf5a8
author: HaiboZhu <[email protected]>
date: Wed Apr 20 06:03:01 EDT 2016
Merge pull request #2444 from GuangweiWang/fix-assembly-arm64 Fix assembly arm64 Code review at: https://rbcommons.com/s/OpenH264/r/1594/
--- a/codec/decoder/core/arm64/block_add_aarch64_neon.S
+++ b/codec/decoder/core/arm64/block_add_aarch64_neon.S
@@ -122,6 +122,7 @@
WELS_ASM_AARCH64_FUNC_BEGIN WelsBlockZero8x8_AArch64_neon
eor v0.16b, v0.16b, v0.16b
+ SIGN_EXTENSION x1, w1
lsl x1, x1, 1
.rept 8
st1 {v0.16b}, [x0], x1
--- a/codec/encoder/core/arm64/reconstruct_aarch64_neon.S
+++ b/codec/encoder/core/arm64/reconstruct_aarch64_neon.S
@@ -469,7 +469,10 @@
st1 {v0.16b, v1.16b}, [x0]
WELS_ASM_AARCH64_FUNC_END
+//void WelsDctT4_AArch64_neon (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
WELS_ASM_AARCH64_FUNC_BEGIN WelsDctT4_AArch64_neon
+ SIGN_EXTENSION x2, w2
+ SIGN_EXTENSION x4, w4
LOAD_4x4_DATA_FOR_DCT v0, v1, x1, x2, x3, x4
usubl v2.8h, v0.8b, v1.8b
usubl2 v4.8h, v0.16b, v1.16b