ref: e5609bc0fe7a580b8805d57b0878721b48be1b13
parent: 130c682c29d6a5767770322f2d958357eb19380e
author: Martin Storsjö <[email protected]>
date: Sun Jun 29 13:43:28 EDT 2014
Remove global function pointers in the encoder MC code This is similar to how this is done in the decoder part of MC.
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -81,18 +81,6 @@
{4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
}
};
-typedef void (*WelsMcFunc0) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-typedef void (*WelsMcFunc1) (uint8_t* pDst, int32_t iDstStride, const uint8_t* psrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB,
- int32_t iSrcBStride, int32_t iHeight);
-WelsMcFunc0 McCopyWidthEq16 = NULL;
-WelsMcFunc0 McCopyWidthEq8 = NULL;
-WelsMcFunc0 McCopyWidthEq4 = NULL;
-WelsMcFunc0 pfMcHorVer02WidthEq16 = NULL;
-WelsMcFunc1 pfPixelAvgWidthEq16 = NULL;
-WelsMcFunc0 pfMcHorVer20WidthEq16 = NULL;
-WelsMcFunc0 pfMcHorVer22WidthEq16 = NULL;
//***************************************************************************//
// C code implementation //
@@ -224,97 +212,97 @@
/////////////////////luma MC//////////////////////////
-static inline void McHorVer01WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer01WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
}
-static inline void McHorVer03WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer03WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
}
-static inline void McHorVer10WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer10WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
}
-static inline void McHorVer11WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer11WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer12WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer12WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer13WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer13WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer21WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer21WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer23WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer23WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer30WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer30WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
}
-static inline void McHorVer31WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer31WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer32WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer32WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer33WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer33WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
@@ -360,15 +348,15 @@
pDst += iDstStride;
}
}
-static inline void McCopy (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
- int32_t iHeight) {
+static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
+ int32_t iHeight) {
int32_t i;
- if (iWidth == 16 && McCopyWidthEq16 != NULL)
- McCopyWidthEq16 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8 && McCopyWidthEq8 != NULL)
- McCopyWidthEq8 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4 && McCopyWidthEq4 != NULL)
- McCopyWidthEq4 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ if (iWidth == 16)
+ McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else {
for (i = 0; i < iHeight; i++) {
memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
@@ -386,7 +374,7 @@
const int32_t kiDy = mv.iMvY & 0x07;
if (0 == kiDx && 0 == kiDy) {
- McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
const int32_t kiDA = g_kuiABCD[kiDy][kiDx][0];
const int32_t kiDB = g_kuiABCD[kiDy][kiDx][1];
@@ -441,6 +429,117 @@
McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
}
+static inline void McHorVer01WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer03WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer10WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer11WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer12WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer13WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer21WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer23WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer30WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer31WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer32WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer33WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+
+static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ int32_t i;
+ if (iWidth == 16)
+ McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else {
+ for (i = 0; i < iHeight; i++) {
+ memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
+ pDst += iDstStride;
+ pSrc += iSrcStride;
+ }
+ }
+}
+
typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
const uint8_t* pABCD, int32_t iHeigh);
void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
@@ -453,7 +552,7 @@
};
if (0 == kiD8x && 0 == kiD8y) {
- McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
}
@@ -469,7 +568,7 @@
McChromaWidthEq8_ssse3
};
if (0 == kiD8x && 0 == kiD8y) {
- McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
}
@@ -671,17 +770,17 @@
static const PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
- McCopyWidthEq16_c, McHorVer10WidthEq16, McHorVer20WidthEq16_c, McHorVer30WidthEq16,
- McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
- McHorVer02WidthEq16_c, McHorVer12WidthEq16, McHorVer22WidthEq16_c, McHorVer32WidthEq16,
- McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
+ McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
+ McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
+ McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
+ McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
};
#if defined (X86_ASM)
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
- McCopyWidthEq16_sse2, McHorVer10WidthEq16, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16,
- McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
- McHorVer02WidthEq16_sse2, McHorVer12WidthEq16, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16,
- McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
+ McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
+ McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
+ McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
+ McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
};
#endif
#if defined(HAVE_NEON)
@@ -705,13 +804,6 @@
pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22_c;
memcpy (pFuncList->sMcFuncs.pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
pFuncList->sMcFuncs.pfChromaMc = McChroma_c;
- McCopyWidthEq4 = McCopyWidthEq4_c;
- McCopyWidthEq8 = McCopyWidthEq8_c;
- McCopyWidthEq16 = McCopyWidthEq16_c;
- pfPixelAvgWidthEq16 = PixelAvgWidthEq16_c;
- pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_c;
- pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_c;
- pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_c;
memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16));
#if defined (X86_ASM)
if (uiCpuFlag & WELS_CPU_SSE2) {
@@ -721,13 +813,6 @@
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2;
- McCopyWidthEq4 = McCopyWidthEq4_mmx;
- McCopyWidthEq8 = McCopyWidthEq8_mmx;
- McCopyWidthEq16 = McCopyWidthEq16_sse2;
- pfPixelAvgWidthEq16 = PixelAvgWidthEq16_sse2;
- pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_sse2;
- pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_sse2;
- pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_sse2;
memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2));
}