ref: 249b8a0aa6b9e8fd7d0a1327d9b6dcd35af0900c
parent: c1a24d52f5e4ca96a0775dfe0f83cc22e43f3c8c
author: dongzhang <[email protected]>
date: Tue Jul 8 10:17:38 EDT 2014
add arm64 MB COPY code and UT
--- a/codec/build/iOS/common/common.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/common/common.xcodeproj/project.pbxproj
@@ -24,6 +24,7 @@
F556A8251906673900E156A8 /* expand_picture_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */; };
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */; };
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5B8D82C190757290037849A /* mc_aarch64_neon.S */; };
+ F5BB0BB8196BB5960072D50D /* copy_mb_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */; };
FAABAA1818E9354A00D4186F /* sad_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = FAABAA1718E9354A00D4186F /* sad_common.cpp */; };
/* End PBXBuildFile section */
@@ -72,6 +73,7 @@
F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = expand_picture_aarch64_neon.S; path = arm64/expand_picture_aarch64_neon.S; sourceTree = "<group>"; };
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = deblocking_aarch64_neon.S; path = arm64/deblocking_aarch64_neon.S; sourceTree = "<group>"; };
F5B8D82C190757290037849A /* mc_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = mc_aarch64_neon.S; path = arm64/mc_aarch64_neon.S; sourceTree = "<group>"; };
+ F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = copy_mb_aarch64_neon.S; path = arm64/copy_mb_aarch64_neon.S; sourceTree = "<group>"; };
FAABAA1618E9353F00D4186F /* sad_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sad_common.h; sourceTree = "<group>"; };
FAABAA1718E9354A00D4186F /* sad_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sad_common.cpp; sourceTree = "<group>"; };
/* End PBXFileReference section */
@@ -177,6 +179,7 @@
F556A81D1906669F00E156A8 /* arm64 */ = {
isa = PBXGroup;
children = (
+ F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */,
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */,
F5B8D82C190757290037849A /* mc_aarch64_neon.S */,
F556A8221906673900E156A8 /* arm_arch64_common_macro.S */,
@@ -245,6 +248,7 @@
5BA8F2C019603F5F00011CE4 /* common_tables.cpp in Sources */,
4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */,
4C3406CC18D96EA600DFA14A /* mc_neon.S in Sources */,
+ F5BB0BB8196BB5960072D50D /* copy_mb_aarch64_neon.S in Sources */,
4C3406CB18D96EA600DFA14A /* expand_picture_neon.S in Sources */,
4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */,
53C1C9BC193F0FB000404D8F /* expand_pic.cpp in Sources */,
--- /dev/null
+++ b/codec/common/arm64/copy_mb_aarch64_neon.S
@@ -1,0 +1,274 @@
+/*!
+ * \copy
+ * Copyright (c) 2013, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifdef HAVE_NEON_AARCH64
+.text
+#include "arm_arch64_common_macro.S"
+
+#ifdef __APPLE__
+.macro LOAD_ALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, src*, src_stride
+ ld1 {$0.d}[0], [$4], $5
+ ld1 {$1.d}[0], [$4], $5
+ ld1 {$2.d}[0], [$4], $5
+ ld1 {$3.d}[0], [$4], $5
+// }
+.endm
+
+.macro STORE_ALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, dst*, dst_stride
+ st1 {$0.d}[0], [$4], $5
+ st1 {$1.d}[0], [$4], $5
+ st1 {$2.d}[0], [$4], $5
+ st1 {$3.d}[0], [$4], $5
+// }
+.endm
+
+.macro LOAD_UNALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, src*, src_stride
+ ld1 {$0.8b}, [$4], $5
+ ld1 {$1.8b}, [$4], $5
+ ld1 {$2.8b}, [$4], $5
+ ld1 {$3.8b}, [$4], $5
+// }
+.endm
+
+.macro STORE_UNALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, dst*, dst_stride
+ st1 {$0.8b}, [$4], $5
+ st1 {$1.8b}, [$4], $5
+ st1 {$2.8b}, [$4], $5
+ st1 {$3.8b}, [$4], $5
+// }
+.endm
+
+.macro LOAD16_ALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, src*, src_stride
+ ld1 {$0.2d}, [$4], $5
+ ld1 {$1.2d}, [$4], $5
+ ld1 {$2.2d}, [$4], $5
+ ld1 {$3.2d}, [$4], $5
+// }
+.endm
+
+.macro STORE16_ALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, dst*, dst_stride
+ st1 {$0.2d}, [$4], $5
+ st1 {$1.2d}, [$4], $5
+ st1 {$2.2d}, [$4], $5
+ st1 {$3.2d}, [$4], $5
+// }
+.endm
+
+.macro LOAD16_UNALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, src*, src_stride
+ ld1 {$0.16b}, [$4], $5
+ ld1 {$1.16b}, [$4], $5
+ ld1 {$2.16b}, [$4], $5
+ ld1 {$3.16b}, [$4], $5
+// }
+.endm
+
+.macro STORE16_UNALIGNED_DATA_WITH_STRIDE
+// { // input: $0~$3, dst*, dst_stride
+ st1 {$0.16b}, [$4], $5
+ st1 {$1.16b}, [$4], $5
+ st1 {$2.16b}, [$4], $5
+ st1 {$3.16b}, [$4], $5
+// }
+.endm
+
+#else
+.macro LOAD_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: $0~$3, src*, src_stride
+ ld1 {\arg0\().d}[0], [\arg4], \arg5
+ ld1 {\arg1\().d}[0], [\arg4], \arg5
+ ld1 {\arg2\().d}[0], [\arg4], \arg5
+ ld1 {\arg3\().d}[0], [\arg4], \arg5
+// }
+.endm
+
+.macro STORE_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: $0~$3, dst*, dst_stride
+ st1 {\arg0\().d}[0], [\arg4], \arg5
+ st1 {\arg1\().d}[0], [\arg4], \arg5
+ st1 {\arg2\().d}[0], [\arg4], \arg5
+ st1 {\arg3\().d}[0], [\arg4], \arg5
+// }
+.endm
+
+.macro LOAD_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: $0~$3, src*, src_stride
+ ld1 {\arg0\().8b}, [\arg4], \arg5
+ ld1 {\arg1\().8b}, [\arg4], \arg5
+ ld1 {\arg2\().8b}, [\arg4], \arg5
+ ld1 {\arg3\().8b}, [\arg4], \arg5
+// }
+.endm
+
+.macro STORE_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: $0~$3, dst*, dst_stride
+ st1 {\arg0\().8b}, [\arg4], \arg5
+ st1 {\arg1\().8b}, [\arg4], \arg5
+ st1 {\arg2\().8b}, [\arg4], \arg5
+ st1 {\arg3\().8b}, [\arg4], \arg5
+// }
+.endm
+
+.macro LOAD16_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: $0~$3, src*, src_stride
+ ld1 {\arg0\().2d}, [\arg4], \arg5
+ ld1 {\arg1\().2d}, [\arg4], \arg5
+ ld1 {\arg2\().2d}, [\arg4], \arg5
+ ld1 {\arg3\().2d}, [\arg4], \arg5
+// }
+.endm
+
+.macro STORE16_ALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: $0~$3, dst*, dst_stride
+ st1 {\arg0\().2d}, [\arg4], \arg5
+ st1 {\arg1\().2d}, [\arg4], \arg5
+ st1 {\arg2\().2d}, [\arg4], \arg5
+ st1 {\arg3\().2d}, [\arg4], \arg5
+// }
+.endm
+
+.macro LOAD16_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: $0~$3, src*, src_stride
+ ld1 {\arg0\().16b}, [\arg4], \arg5
+ ld1 {\arg1\().16b}, [\arg4], \arg5
+ ld1 {\arg2\().16b}, [\arg4], \arg5
+ ld1 {\arg3\().16b}, [\arg4], \arg5
+// }
+.endm
+
+.macro STORE16_UNALIGNED_DATA_WITH_STRIDE arg0, arg1, arg2, arg3, arg4, arg5
+// { // input: $0~$3, dst*, dst_stride
+ st1 {\arg0\().16b}, [\arg4], \arg5
+ st1 {\arg1\().16b}, [\arg4], \arg5
+ st1 {\arg2\().16b}, [\arg4], \arg5
+ st1 {\arg3\().16b}, [\arg4], \arg5
+// }
+.endm
+
+#endif
+
+
+WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x8_AArch64_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1
+
+WELS_ASM_AARCH64_FUNC_END
+
+
+WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x16_AArch64_neon
+
+ LOAD16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
+
+ STORE16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
+
+ LOAD16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
+
+ STORE16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
+
+ LOAD16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
+
+ STORE16_ALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
+
+ LOAD16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
+
+ STORE16_ALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
+
+WELS_ASM_AARCH64_FUNC_END
+
+
+WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x16NotAligned_AArch64_neon
+
+ LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
+
+ STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
+
+ LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
+
+ STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
+
+ LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
+
+ STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
+
+ LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
+
+ STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
+
+WELS_ASM_AARCH64_FUNC_END
+
+
+WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy16x8NotAligned_AArch64_neon
+
+ LOAD16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
+
+ STORE16_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
+
+ LOAD16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x2, x3
+
+ STORE16_UNALIGNED_DATA_WITH_STRIDE v16, v17, v18, v19, x0, x1
+
+WELS_ASM_AARCH64_FUNC_END
+
+
+WELS_ASM_AARCH64_FUNC_BEGIN WelsCopy8x16_AArch64_neon
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x2, x3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE v0, v1, v2, v3, x0, x1
+
+ LOAD_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x2, x3
+
+ STORE_UNALIGNED_DATA_WITH_STRIDE v4, v5, v6, v7, x0, x1
+
+WELS_ASM_AARCH64_FUNC_END
+
+#endif
--- a/codec/common/inc/copy_mb.h
+++ b/codec/common/inc/copy_mb.h
@@ -65,6 +65,14 @@
void WelsCopy8x16_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
#endif
+#if defined (HAVE_NEON_AARCH64)
+void WelsCopy8x8_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
+void WelsCopy16x16_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
+void WelsCopy16x16NotAligned_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
+void WelsCopy16x8NotAligned_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
+void WelsCopy8x16_AArch64_neon (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
+#endif
+
#if defined(__cplusplus)
}
#endif//__cplusplus
--- a/codec/common/targets.mk
+++ b/codec/common/targets.mk
@@ -39,6 +39,7 @@
ifeq ($(ASM_ARCH), arm64)
COMMON_ASM_ARM64_SRCS=\
+ $(COMMON_SRCDIR)/arm64/copy_mb_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/deblocking_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/expand_picture_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/mc_aarch64_neon.S\
--- a/codec/encoder/core/src/encode_mb_aux.cpp
+++ b/codec/encoder/core/src/encode_mb_aux.cpp
@@ -553,8 +553,8 @@
pFuncList->pfQuantizationHadamard2x2 = WelsHadamardQuant2x2_AArch64_neon;
pFuncList->pfQuantizationHadamard2x2Skip = WelsHadamardQuant2x2Skip_AArch64_neon;
pFuncList->pfDctT4 = WelsDctT4_AArch64_neon;
- //pFuncList->pfCopy8x8Aligned = WelsCopy8x8_AArch64_neon; // will enable in next update
- //pFuncList->pfCopy8x16Aligned = WelsCopy8x16_AArch64_neon; // will enable in next update
+ pFuncList->pfCopy8x8Aligned = WelsCopy8x8_AArch64_neon;
+ pFuncList->pfCopy8x16Aligned = WelsCopy8x16_AArch64_neon;
pFuncList->pfGetNoneZeroCount = WelsGetNoneZeroCount_AArch64_neon;
pFuncList->pfTransformHadamard4x4Dc = WelsHadamardT4Dc_AArch64_neon;
@@ -564,9 +564,9 @@
pFuncList->pfQuantizationFour4x4 = WelsQuantFour4x4_AArch64_neon;
pFuncList->pfQuantizationFour4x4Max = WelsQuantFour4x4Max_AArch64_neon;
- //pFuncList->pfCopy16x16Aligned = WelsCopy16x16_AArch64_neon; // will enable in next update
- //pFuncList->pfCopy16x16NotAligned = WelsCopy16x16NotAligned_AArch64_neon; // will enable in next update
- //pFuncList->pfCopy16x8NotAligned = WelsCopy16x8NotAligned_AArch64_neon; // will enable in next update
+ pFuncList->pfCopy16x16Aligned = WelsCopy16x16_AArch64_neon;
+ pFuncList->pfCopy16x16NotAligned = WelsCopy16x16NotAligned_AArch64_neon;
+ pFuncList->pfCopy16x8NotAligned = WelsCopy16x8NotAligned_AArch64_neon;
pFuncList->pfDctFourT4 = WelsDctFourT4_AArch64_neon;
}
#endif
--- /dev/null
+++ b/test/encoder/EncUT_MBCopy.cpp
@@ -1,0 +1,140 @@
+#include <gtest/gtest.h>
+#include <math.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "cpu_core.h"
+#include "cpu.h"
+#include "macros.h"
+#include "encode_mb_aux.h"
+#include "wels_func_ptr_def.h"
+#include "copy_mb.h"
+
+using namespace WelsSVCEnc;
+#define MBCOPYTEST_NUM 1000
+static void FillWithRandomData (uint8_t* p, int32_t Len) {
+ for (int32_t i = 0; i < Len; i++) {
+ p[i] = rand() % 256;
+ }
+}
+
+
+TEST (MBCopyFunTest, pfCopy8x8Aligned) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pSrcAlign, 16 * 64 + 1, 16)
+ ENFORCE_STACK_ALIGN_2D (uint8_t, pDstAlign, 2, 16 * 32 + 16, 16)
+
+ int32_t iCpuCores = 0;
+ SWelsFuncPtrList sFuncPtrList;
+ uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
+ WelsInitEncodingFuncs (&sFuncPtrList, m_uiCpuFeatureFlag);
+
+ for (int32_t k = 0; k < MBCOPYTEST_NUM; k++) {
+ memset (pDstAlign[0], 0, 16 * 32 + 1);
+ memset (pDstAlign[1], 0, 16 * 32 + 1);
+ FillWithRandomData ((uint8_t*)pSrcAlign, 16 * 64 + 1);
+ WelsCopy8x8_c (pDstAlign[0], 32, pSrcAlign, 64);
+ sFuncPtrList.pfCopy8x8Aligned (pDstAlign[1], 32, pSrcAlign, 64);
+
+ for (int32_t i = 0; i < 16 * 32 + 1; i++) {
+ ASSERT_EQ (pDstAlign[0][i], pDstAlign[1][i]);
+ }
+
+ }
+
+}
+
+TEST (MBCopyFunTest, pfCopy8x16Aligned) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pSrcAlign, 16 * 64 + 1, 16)
+ ENFORCE_STACK_ALIGN_2D (uint8_t, pDstAlign, 2, 16 * 32 + 16, 16)
+
+ int32_t iCpuCores = 0;
+ SWelsFuncPtrList sFuncPtrList;
+ uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
+ WelsInitEncodingFuncs (&sFuncPtrList, m_uiCpuFeatureFlag);
+
+ for (int32_t k = 0; k < MBCOPYTEST_NUM; k++) {
+ memset (pDstAlign[0], 0, 16 * 32 + 1);
+ memset (pDstAlign[1], 0, 16 * 32 + 1);
+ FillWithRandomData ((uint8_t*)pSrcAlign, 16 * 64 + 1);
+ WelsCopy8x16_c (pDstAlign[0], 32, pSrcAlign, 64);
+ sFuncPtrList.pfCopy8x16Aligned (pDstAlign[1], 32, pSrcAlign, 64);
+
+ for (int32_t i = 0; i < 16 * 32 + 1; i++) {
+ ASSERT_EQ (pDstAlign[0][i], pDstAlign[1][i]);
+ }
+
+ }
+
+}
+
+TEST (MBCopyFunTest, pfCopy16x16Aligned) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pSrcAlign, 16 * 64 + 1, 16)
+ ENFORCE_STACK_ALIGN_2D (uint8_t, pDstAlign, 2, 16 * 32 + 16, 16)
+
+ int32_t iCpuCores = 0;
+ SWelsFuncPtrList sFuncPtrList;
+ uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
+ WelsInitEncodingFuncs (&sFuncPtrList, m_uiCpuFeatureFlag);
+
+ for (int32_t k = 0; k < MBCOPYTEST_NUM; k++) {
+ memset (pDstAlign[0], 0, 16 * 32 + 1);
+ memset (pDstAlign[1], 0, 16 * 32 + 1);
+ FillWithRandomData ((uint8_t*)pSrcAlign, 16 * 64 + 1);
+ WelsCopy16x16_c (pDstAlign[0], 32, pSrcAlign, 64);
+ sFuncPtrList.pfCopy16x16Aligned (pDstAlign[1], 32, pSrcAlign, 64);
+
+ for (int32_t i = 0; i < 16 * 32 + 1; i++) {
+ ASSERT_EQ (pDstAlign[0][i], pDstAlign[1][i]);
+ }
+
+ }
+
+}
+
+TEST (MBCopyFunTest, pfCopy16x8NotAligned) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pSrcAlign, 16 * 64 + 1, 16)
+ ENFORCE_STACK_ALIGN_2D (uint8_t, pDstAlign, 2, 16 * 32 + 16, 16)
+
+ int32_t iCpuCores = 0;
+ SWelsFuncPtrList sFuncPtrList;
+ uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
+ WelsInitEncodingFuncs (&sFuncPtrList, m_uiCpuFeatureFlag);
+
+ for (int32_t k = 0; k < MBCOPYTEST_NUM; k++) {
+ memset (pDstAlign[0], 0, 16 * 32 + 1);
+ memset (pDstAlign[1], 0, 16 * 32 + 1);
+ FillWithRandomData ((uint8_t*)pSrcAlign, 16 * 64 + 1);
+ WelsCopy16x8_c (pDstAlign[0], 32, pSrcAlign + 1, 64);
+ sFuncPtrList.pfCopy16x8NotAligned (pDstAlign[1], 32, pSrcAlign + 1, 64);
+
+ for (int32_t i = 0; i < 16 * 32 + 1; i++) {
+ ASSERT_EQ (pDstAlign[0][i], pDstAlign[1][i]);
+ }
+
+ }
+
+}
+
+TEST (MBCopyFunTest, pfCopy16x16NotAligned) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pSrcAlign, 16 * 64 + 1, 16)
+ ENFORCE_STACK_ALIGN_2D (uint8_t, pDstAlign, 2, 16 * 32 + 16, 16)
+
+ int32_t iCpuCores = 0;
+ SWelsFuncPtrList sFuncPtrList;
+ uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
+ WelsInitEncodingFuncs (&sFuncPtrList, m_uiCpuFeatureFlag);
+
+ for (int32_t k = 0; k < MBCOPYTEST_NUM; k++) {
+ memset (pDstAlign[0], 0, 16 * 32 + 1);
+ memset (pDstAlign[1], 0, 16 * 32 + 1);
+ FillWithRandomData ((uint8_t*)pSrcAlign, 16 * 64 + 1);
+ WelsCopy16x16_c (pDstAlign[0], 32, pSrcAlign + 1, 64);
+ sFuncPtrList.pfCopy16x16NotAligned (pDstAlign[1], 32, pSrcAlign + 1, 64);
+
+ for (int32_t i = 0; i < 16 * 32 + 1; i++) {
+ ASSERT_EQ (pDstAlign[0][i], pDstAlign[1][i]);
+ }
+
+ }
+
+}
--- a/test/encoder/targets.mk
+++ b/test/encoder/targets.mk
@@ -6,6 +6,7 @@
$(ENCODER_UNITTEST_SRCDIR)/EncUT_EncoderMbAux.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_ExpGolomb.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_GetIntraPredictor.cpp\
+ $(ENCODER_UNITTEST_SRCDIR)/EncUT_MBCopy.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryAlloc.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryZero.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionEstimate.cpp\