ref: bdf9f6b4ffb61dbe87690f3de0bd12312de1bf34
parent: dce954119995e8242ff99d134988d1fc730a8d4f
parent: e5609bc0fe7a580b8805d57b0878721b48be1b13
author: ruil2 <[email protected]>
date: Wed Jul 2 10:01:35 EDT 2014
Merge pull request #1043 from mstorsjo/avoid-globals Get rid of global non-readonly variables within the library
--- a/codec/encoder/core/inc/encoder.h
+++ b/codec/encoder/core/inc/encoder.h
@@ -91,12 +91,12 @@
* \brief Dump reconstruction for dependency layer
*/
-extern "C" void DumpDependencyRec (SPicture* pSrcPic, const char* kpFileName, const int8_t kiDid);
+extern "C" void DumpDependencyRec (SPicture* pSrcPic, const char* kpFileName, const int8_t kiDid, bool bAppend);
/*!
* \brief Dump the reconstruction pictures
*/
-void DumpRecFrame (SPicture* pSrcPic, const char* kpFileName);
+void DumpRecFrame (SPicture* pSrcPic, const char* kpFileName, bool bAppend);
/*!
--- a/codec/encoder/core/inc/encoder_context.h
+++ b/codec/encoder/core/inc/encoder_context.h
@@ -217,6 +217,11 @@
int32_t iEncoderError;
WELS_MUTEX mutexEncoderError;
int32_t iDropNumber;
+
+#ifdef ENABLE_FRAME_DUMP
+bool bDependencyRecFlag[MAX_DEPENDENCY_LAYER];
+bool bRecFlag;
+#endif
} sWelsEncCtx/*, *PWelsEncCtx*/;
}
#endif//sWelsEncCtx_H__
--- a/codec/encoder/core/inc/get_intra_predictor.h
+++ b/codec/encoder/core/inc/get_intra_predictor.h
@@ -165,7 +165,6 @@
}
#endif//__cplusplus
-void WelsInitFillingPredFuncs (const uint32_t kuiCpuFlag);
void WelsInitIntraPredFuncs (SWelsFuncPtrList* pFuncList, const uint32_t kuiCpuFlag);
}
--- a/codec/encoder/core/inc/set_mb_syn_cavlc.h
+++ b/codec/encoder/core/inc/set_mb_syn_cavlc.h
@@ -43,6 +43,7 @@
#include "typedefs.h"
#include "bit_stream.h"
+#include "wels_func_ptr_def.h"
namespace WelsSVCEnc {
@@ -59,16 +60,6 @@
#define LUMA_DC_AC 0x04
-typedef int32_t (*PCavlcParamCalFunc) (int16_t* pCoff, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs,
- int32_t iEndIdx);
-
-typedef struct TagCoeffFunc {
-PCavlcParamCalFunc pfCavlcParamCal;
-} SCoeffFunc;
-
-/* For CAVLC */
-extern SCoeffFunc sCoeffFunc;
-
typedef struct TagCavlcTableItem {
uint16_t uiBits;
uint8_t uiLen;
@@ -75,9 +66,10 @@
uint8_t uiSuffixLength;
} SCavlcTableItem;
-void InitCoeffFunc (const uint32_t uiCpuFlag);
+void InitCoeffFunc (SWelsFuncPtrList* pFuncList, const uint32_t uiCpuFlag);
-int32_t WriteBlockResidualCavlc (int16_t* pCoffLevel, int32_t iEndIdx, int32_t iCalRunLevelFlag,
+int32_t WriteBlockResidualCavlc (SWelsFuncPtrList* pFuncList, int16_t* pCoffLevel, int32_t iEndIdx,
+ int32_t iCalRunLevelFlag,
int32_t iResidualProperty, int8_t iNC, SBitStringAux* pBs);
#if defined(__cplusplus)
--- a/codec/encoder/core/inc/svc_set_mb_syn_cavlc.h
+++ b/codec/encoder/core/inc/svc_set_mb_syn_cavlc.h
@@ -50,7 +50,7 @@
namespace WelsSVCEnc {
-int32_t WelsWriteMbResidual (SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs);
+int32_t WelsWriteMbResidual (SWelsFuncPtrList* pFuncList, SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs);
void WelsSpatialWriteSubMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb);
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -86,8 +86,8 @@
PWelsLumaHalfpelMcFunc pfLumaHalfpelCen;
PWelsMcFunc pfChromaMc;
- PWelsLumaQuarpelMcFunc* pfLumaQuarpelMc;
- PWelsSampleAveragingFunc* pfSampleAveraging;
+ PWelsLumaQuarpelMcFunc pfLumaQuarpelMc[16];
+ PWelsSampleAveragingFunc pfSampleAveraging[2];
} SMcFunc;
typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);
@@ -191,6 +191,9 @@
typedef void (*PMarkPicFunc) (void* pCtx);
typedef bool (*PUpdateRefListFunc) (void* pCtx);
+typedef int32_t (*PCavlcParamCalFunc) (int16_t* pCoff, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs,
+ int32_t iEndIdx);
+
struct TagWelsFuncPointerList {
SExpandPicFunc sExpandPicFunc;
PFillInterNeighborCacheFunc pfFillInterNeighborCache;
@@ -280,6 +283,8 @@
PBuildRefListFunc pBuildRefList;
PMarkPicFunc pMarkPic;
PUpdateRefListFunc pUpdateRefList;
+
+ PCavlcParamCalFunc pfCavlcParamCal;
};
} //end of namespace WelsSVCEnc {
--- a/codec/encoder/core/src/encoder.cpp
+++ b/codec/encoder/core/src/encoder.cpp
@@ -181,7 +181,6 @@
InitExpandPictureFunc (& (pFuncList->sExpandPicFunc), uiCpuFlag);
/* Intra_Prediction_fn*/
- WelsInitFillingPredFuncs (uiCpuFlag);
WelsInitIntraPredFuncs (pFuncList, uiCpuFlag);
/* ME func */
@@ -201,7 +200,7 @@
/*init pixel average function*/
/*get one column or row pixel when refinement*/
WelsInitMcFuncs (pFuncList, uiCpuFlag);
- InitCoeffFunc (uiCpuFlag);
+ InitCoeffFunc (pFuncList, uiCpuFlag);
WelsInitEncodingFuncs (pFuncList, uiCpuFlag);
WelsInitReconstructionFuncs (pFuncList, uiCpuFlag);
@@ -351,34 +350,23 @@
* \brief Dump reconstruction for dependency layer
*/
-extern "C" void DumpDependencyRec (SPicture* pCurPicture, const char* kpFileName, const int8_t kiDid) {
+extern "C" void DumpDependencyRec (SPicture* pCurPicture, const char* kpFileName, const int8_t kiDid, bool bAppend) {
WelsFileHandle* pDumpRecFile = NULL;
- static bool bDependencyRecFlag[MAX_DEPENDENCY_LAYER] = {0};
int32_t iWrittenSize = 0;
+ const char* openMode = bAppend ? "ab" : "wb";
if (NULL == pCurPicture || NULL == kpFileName || kiDid >= MAX_DEPENDENCY_LAYER)
return;
- if (bDependencyRecFlag[kiDid]) {
- if (strlen (kpFileName) > 0) // confirmed_safe_unsafe_usage
- pDumpRecFile = WelsFopen (kpFileName, "ab");
- else {
- char sDependencyRecFileName[16] = {0};
- WelsSnprintf (sDependencyRecFileName, 16, "rec%d.yuv", kiDid); // confirmed_safe_unsafe_usage
- pDumpRecFile = WelsFopen (sDependencyRecFileName, "ab");
- }
- if (NULL != pDumpRecFile)
- WelsFseek (pDumpRecFile, 0, SEEK_END);
- } else {
- if (strlen (kpFileName) > 0) { // confirmed_safe_unsafe_usage
- pDumpRecFile = WelsFopen (kpFileName, "wb");
- } else {
- char sDependencyRecFileName[16] = {0};
- WelsSnprintf (sDependencyRecFileName, 16, "rec%d.yuv", kiDid); // confirmed_safe_unsafe_usage
- pDumpRecFile = WelsFopen (sDependencyRecFileName, "wb");
- }
- bDependencyRecFlag[kiDid] = true;
+ if (strlen (kpFileName) > 0) // confirmed_safe_unsafe_usage
+ pDumpRecFile = WelsFopen (kpFileName, openMode);
+ else {
+ char sDependencyRecFileName[16] = {0};
+ WelsSnprintf (sDependencyRecFileName, 16, "rec%d.yuv", kiDid); // confirmed_safe_unsafe_usage
+ pDumpRecFile = WelsFopen (sDependencyRecFileName, openMode);
}
+ if (NULL != pDumpRecFile && bAppend)
+ WelsFseek (pDumpRecFile, 0, SEEK_END);
if (NULL != pDumpRecFile) {
int32_t i = 0;
@@ -419,30 +407,21 @@
* \brief Dump the reconstruction pictures
*/
-void DumpRecFrame (SPicture* pCurPicture, const char* kpFileName) {
+void DumpRecFrame (SPicture* pCurPicture, const char* kpFileName, bool bAppend) {
WelsFileHandle* pDumpRecFile = NULL;
- static bool bRecFlag = false;
int32_t iWrittenSize = 0;
+ const char* openMode = bAppend ? "ab" : "wb";
if (NULL == pCurPicture || NULL == kpFileName)
return;
- if (bRecFlag) {
- if (strlen (kpFileName) > 0) { // confirmed_safe_unsafe_usage
- pDumpRecFile = WelsFopen (kpFileName, "ab");
- } else {
- pDumpRecFile = WelsFopen ("rec.yuv", "ab");
- }
- if (NULL != pDumpRecFile)
- WelsFseek (pDumpRecFile, 0, SEEK_END);
+ if (strlen (kpFileName) > 0) { // confirmed_safe_unsafe_usage
+ pDumpRecFile = WelsFopen (kpFileName, openMode);
} else {
- if (strlen (kpFileName) > 0) { // confirmed_safe_unsafe_usage
- pDumpRecFile = WelsFopen (kpFileName, "wb");
- } else {
- pDumpRecFile = WelsFopen ("rec.yuv", "wb");
- }
- bRecFlag = true;
+ pDumpRecFile = WelsFopen ("rec.yuv", openMode);
}
+ if (NULL != pDumpRecFile && bAppend)
+ WelsFseek (pDumpRecFile, 0, SEEK_END);
if (NULL != pDumpRecFile) {
int32_t i = 0;
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -3384,8 +3384,11 @@
#ifdef ENABLE_FRAME_DUMP
// Dump reconstruction picture for each sQualityStat layer
- if (iCurDid + 1 < pSvcParam->iSpatialLayerNum)
- DumpDependencyRec (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iSpatialLayerNum].sRecFileName[0], iCurDid);
+ if (iCurDid + 1 < pSvcParam->iSpatialLayerNum) {
+ DumpDependencyRec (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iSpatialLayerNum].sRecFileName[0], iCurDid,
+ pCtx->bDependencyRecFlag[iCurDid]);
+ pCtx->bDependencyRecFlag[iCurDid] = true;
+ }
#endif//ENABLE_FRAME_DUMP
#if defined(ENABLE_PSNR_CALC)
@@ -3551,7 +3554,8 @@
#ifdef ENABLE_FRAME_DUMP
DumpRecFrame (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iSpatialLayerNum -
- 1].sRecFileName[0]); // pDecPic: final reconstruction output
+ 1].sRecFileName[0], pCtx->bRecFlag); // pDecPic: final reconstruction output
+ pCtx->bRecFlag = true;
#endif//ENABLE_FRAME_DUMP
++ pCtx->iCodingIndex;
--- a/codec/encoder/core/src/get_intra_predictor.cpp
+++ b/codec/encoder/core/src/get_intra_predictor.cpp
@@ -65,27 +65,9 @@
ST64 (pPred + 8, LD64 (kuiSrc8));
}
-PFillingPred WelsFillingPred8to16;
-PFillingPred WelsFillingPred8x2to16;
-PFillingPred1to16 WelsFillingPred1to16;
-
-void WelsInitFillingPredFuncs (const uint32_t kuiCpuFlag) {
- WelsFillingPred8to16 = WelsFillingPred8to16_c;
- WelsFillingPred8x2to16 = WelsFillingPred8x2to16_c;
- WelsFillingPred1to16 = WelsFillingPred1to16_c;
-
-#if defined(X86_ASM)
- if (kuiCpuFlag & WELS_CPU_MMXEXT) {
- // WelsFillingPred8to16 = WelsFillingPred8to16_mmx;
- // WelsFillingPred8x2to16 = WelsFillingPred8x2to16_mmx;
- // WelsFillingPred1to16 = WelsFillingPred1to16_mmx;
- }
- if (kuiCpuFlag & WELS_CPU_SSE2) {
- // WelsFillingPred8x2to16 = WelsFillingPred8x2to16_sse2;
- // WelsFillingPred1to16 = WelsFillingPred1to16_sse2;
- }
-#endif//X86_ASM
-}
+#define WelsFillingPred8to16 WelsFillingPred8to16_c
+#define WelsFillingPred8x2to16 WelsFillingPred8x2to16_c
+#define WelsFillingPred1to16 WelsFillingPred1to16_c
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -81,27 +81,7 @@
{4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
}
};
-typedef int32_t (*VerFilterFunc) (const uint8_t* pSrc, const int32_t kiSrcStride);
-typedef int32_t (*HorFilterFunc) (const uint8_t* pSrc);
-typedef int32_t (*HorFilterFuncInput16Bits) (int16_t* pSrc);
-VerFilterFunc fpVerFilter = NULL;
-HorFilterFunc fpHorFilter = NULL;
-HorFilterFuncInput16Bits fpHorFilterInput16Bits = NULL;
-
-typedef void (*WelsMcFunc0) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-typedef void (*WelsMcFunc1) (uint8_t* pDst, int32_t iDstStride, const uint8_t* psrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB,
- int32_t iSrcBStride, int32_t iHeight);
-WelsMcFunc0 McCopyWidthEq16 = NULL;
-WelsMcFunc0 McCopyWidthEq8 = NULL;
-WelsMcFunc0 McCopyWidthEq4 = NULL;
-WelsMcFunc0 pfMcHorVer02WidthEq16 = NULL;
-WelsMcFunc1 pfPixelAvgWidthEq16 = NULL;
-WelsMcFunc0 pfMcHorVer20WidthEq16 = NULL;
-WelsMcFunc0 pfMcHorVer22WidthEq16 = NULL;
-
//***************************************************************************//
// C code implementation //
//***************************************************************************//
@@ -194,7 +174,7 @@
int32_t i, j;
for (i = 0; i < iHeight; i++) {
for (j = 0; j < 16; j++) {
- pDst[j] = WelsClip1 ((fpHorFilter (pSrc + j) + 16) >> 5);
+ pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5);
}
pDst += iDstStride;
pSrc += iSrcStride;
@@ -206,7 +186,7 @@
int32_t i, j;
for (i = 0; i < iHeight; i++) {
for (j = 0; j < 16; j++) {
- pDst[j] = WelsClip1 ((fpVerFilter (pSrc + j, iSrcStride) + 16) >> 5);
+ pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5);
}
pDst += iDstStride;
pSrc += iSrcStride;
@@ -220,10 +200,10 @@
for (i = 0; i < iHeight; i++) {
for (j = 0; j < 16 + 5; j++) {
- pTmp[j] = fpVerFilter (pSrc - 2 + j, iSrcStride);
+ pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
}
for (k = 0; k < 16; k++) {
- pDst[k] = WelsClip1 ((fpHorFilterInput16Bits (&pTmp[2 + k]) + 512) >> 10);
+ pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10);
}
pSrc += iSrcStride;
pDst += iDstStride;
@@ -232,97 +212,97 @@
/////////////////////luma MC//////////////////////////
-static inline void McHorVer01WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer01WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
}
-static inline void McHorVer03WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer03WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
}
-static inline void McHorVer10WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer10WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
}
-static inline void McHorVer11WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer11WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer12WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer12WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer13WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer13WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer21WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer21WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer23WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer23WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer30WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer30WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
}
-static inline void McHorVer31WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer31WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer32WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer32WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
-static inline void McHorVer33WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
+static inline void McHorVer33WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+ McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
}
static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
@@ -331,7 +311,7 @@
int32_t i, j;
for (i = 0; i < iHeight; i++) {
for (j = 0; j < iWidth; j++) {
- pDst[j] = WelsClip1 ((fpHorFilter (pSrc + j) + 16) >> 5);
+ pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5);
}
pDst += iDstStride;
pSrc += iSrcStride;
@@ -344,7 +324,7 @@
int32_t i, j;
for (i = 0; i < iHeight; i++) {
for (j = 0; j < iWidth; j++) {
- pDst[j] = WelsClip1 ((fpVerFilter (pSrc + j, iSrcStride) + 16) >> 5);
+ pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5);
}
pDst += iDstStride;
pSrc += iSrcStride;
@@ -359,24 +339,24 @@
for (i = 0; i < iHeight; i++) {
for (j = 0; j < iWidth + 5; j++) {
- pTmp[j] = fpVerFilter (pSrc - 2 + j, iSrcStride);
+ pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
}
for (k = 0; k < iWidth; k++) {
- pDst[k] = WelsClip1 ((fpHorFilterInput16Bits (&pTmp[2 + k]) + 512) >> 10);
+ pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10);
}
pSrc += iSrcStride;
pDst += iDstStride;
}
}
-static inline void McCopy (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
- int32_t iHeight) {
+static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
+ int32_t iHeight) {
int32_t i;
- if (iWidth == 16 && McCopyWidthEq16 != NULL)
- McCopyWidthEq16 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8 && McCopyWidthEq8 != NULL)
- McCopyWidthEq8 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4 && McCopyWidthEq4 != NULL)
- McCopyWidthEq4 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ if (iWidth == 16)
+ McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else {
for (i = 0; i < iHeight; i++) {
memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
@@ -394,7 +374,7 @@
const int32_t kiDy = mv.iMvY & 0x07;
if (0 == kiDx && 0 == kiDy) {
- McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
const int32_t kiDA = g_kuiABCD[kiDy][kiDx][0];
const int32_t kiDB = g_kuiABCD[kiDy][kiDx][1];
@@ -449,6 +429,117 @@
McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
}
+static inline void McHorVer01WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer03WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer10WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer11WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer12WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer13WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer21WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer23WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer30WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer31WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer32WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer33WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+
+static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ int32_t i;
+ if (iWidth == 16)
+ McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else {
+ for (i = 0; i < iHeight; i++) {
+ memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
+ pDst += iDstStride;
+ pSrc += iSrcStride;
+ }
+ }
+}
+
typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
const uint8_t* pABCD, int32_t iHeigh);
void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
@@ -461,7 +552,7 @@
};
if (0 == kiD8x && 0 == kiD8y) {
- McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
}
@@ -477,7 +568,7 @@
McChromaWidthEq8_ssse3
};
if (0 == kiD8x && 0 == kiD8y) {
- McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
} else {
kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
}
@@ -676,24 +767,24 @@
typedef void (*PixelAvgFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t);
void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
- static PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
+ static const PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
- static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
- McCopyWidthEq16_c, McHorVer10WidthEq16, McHorVer20WidthEq16_c, McHorVer30WidthEq16,
- McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
- McHorVer02WidthEq16_c, McHorVer12WidthEq16, McHorVer22WidthEq16_c, McHorVer32WidthEq16,
- McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
+ static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
+ McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
+ McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
+ McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
+ McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
};
#if defined (X86_ASM)
- static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
- McCopyWidthEq16_sse2, McHorVer10WidthEq16, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16,
- McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
- McHorVer02WidthEq16_sse2, McHorVer12WidthEq16, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16,
- McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
+ static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
+ McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
+ McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
+ McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
+ McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
};
#endif
#if defined(HAVE_NEON)
- static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
+ static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
McCopyWidthEq16_neon, McHorVer10WidthEq16_neon, McHorVer20WidthEq16_neon, McHorVer30WidthEq16_neon,
McHorVer01WidthEq16_neon, EncMcHorVer11_neon, EncMcHorVer21_neon, EncMcHorVer31_neon,
McHorVer02WidthEq16_neon, EncMcHorVer12_neon, McHorVer22WidthEq16_neon, EncMcHorVer32_neon,
@@ -701,7 +792,7 @@
};
#endif
#if defined(HAVE_NEON_AARCH64)
- static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y]
+ static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y]
McCopyWidthEq16_AArch64_neon, McHorVer10WidthEq16_AArch64_neon, McHorVer20WidthEq16_AArch64_neon, McHorVer30WidthEq16_AArch64_neon,
McHorVer01WidthEq16_AArch64_neon, EncMcHorVer11_AArch64_neon, EncMcHorVer21_AArch64_neon, EncMcHorVer31_AArch64_neon,
McHorVer02WidthEq16_AArch64_neon, EncMcHorVer12_AArch64_neon, McHorVer22WidthEq16_AArch64_neon, EncMcHorVer32_AArch64_neon,
@@ -711,19 +802,9 @@
pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20_c;
pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02_c;
pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22_c;
- pFuncList->sMcFuncs.pfSampleAveraging = pfPixAvgFunc;
+ memcpy (pFuncList->sMcFuncs.pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
pFuncList->sMcFuncs.pfChromaMc = McChroma_c;
- fpVerFilter = VerFilter_c;
- fpHorFilter = HorFilter_c;
- fpHorFilterInput16Bits = HorFilterInput16bit1_c;
- McCopyWidthEq4 = McCopyWidthEq4_c;
- McCopyWidthEq8 = McCopyWidthEq8_c;
- McCopyWidthEq16 = McCopyWidthEq16_c;
- pfPixelAvgWidthEq16 = PixelAvgWidthEq16_c;
- pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_c;
- pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_c;
- pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_c;
- pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16;
+ memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16));
#if defined (X86_ASM)
if (uiCpuFlag & WELS_CPU_SSE2) {
pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
@@ -732,14 +813,7 @@
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2;
- McCopyWidthEq4 = McCopyWidthEq4_mmx;
- McCopyWidthEq8 = McCopyWidthEq8_mmx;
- McCopyWidthEq16 = McCopyWidthEq16_sse2;
- pfPixelAvgWidthEq16 = PixelAvgWidthEq16_sse2;
- pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_sse2;
- pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_sse2;
- pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_sse2;
- pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16_sse2;
+ memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2));
}
if (uiCpuFlag & WELS_CPU_SSSE3) {
@@ -750,7 +824,7 @@
#if defined(HAVE_NEON)
if (uiCpuFlag & WELS_CPU_NEON) {
- pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16_neon;
+ memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_neon, sizeof (pWelsMcFuncWidthEq16_neon));
pFuncList->sMcFuncs.pfChromaMc = EncMcChroma_neon;
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
@@ -761,7 +835,8 @@
#endif
#if defined(HAVE_NEON_AARCH64)
if (uiCpuFlag & WELS_CPU_NEON) {
- pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16_AArch64_neon;
+ memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_AArch64_neon,
+ sizeof (pWelsMcFuncWidthEq16_AArch64_neon));
pFuncList->sMcFuncs.pfChromaMc = EncMcChroma_AArch64_neon;
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon;
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon;
--- a/codec/encoder/core/src/set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/set_mb_syn_cavlc.cpp
@@ -44,7 +44,6 @@
#include "wels_const.h"
namespace WelsSVCEnc {
-SCoeffFunc sCoeffFunc;
const ALIGNED_DECLARE (uint8_t, g_kuiZeroLeftMap[16], 16) = {
0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
@@ -74,7 +73,8 @@
return iTotalZeros;
}
-int32_t WriteBlockResidualCavlc (int16_t* pCoffLevel, int32_t iEndIdx, int32_t iCalRunLevelFlag,
+int32_t WriteBlockResidualCavlc (SWelsFuncPtrList* pFuncList, int16_t* pCoffLevel, int32_t iEndIdx,
+ int32_t iCalRunLevelFlag,
int32_t iResidualProperty, int8_t iNC, SBitStringAux* pBs) {
ENFORCE_STACK_ALIGN_1D (int16_t, iLevel, 16, 16)
ENFORCE_STACK_ALIGN_1D (uint8_t, uiRun, 16, 16)
@@ -95,7 +95,7 @@
if (iCalRunLevelFlag) {
int32_t iCount = 0;
- iTotalZeros = sCoeffFunc.pfCavlcParamCal (pCoffLevel, uiRun, iLevel, &iTotalCoeffs, iEndIdx);
+ iTotalZeros = pFuncList->pfCavlcParamCal (pCoffLevel, uiRun, iLevel, &iTotalCoeffs, iEndIdx);
iCount = (iTotalCoeffs > 3) ? 3 : iTotalCoeffs;
for (i = 0; i < iCount ; i++) {
if (WELS_ABS (iLevel[i]) == 1) {
@@ -200,12 +200,12 @@
}
-void InitCoeffFunc (const uint32_t uiCpuFlag) {
- sCoeffFunc.pfCavlcParamCal = CavlcParamCal_c;
+void InitCoeffFunc (SWelsFuncPtrList* pFuncList, const uint32_t uiCpuFlag) {
+ pFuncList->pfCavlcParamCal = CavlcParamCal_c;
#if defined(X86_ASM)
if (uiCpuFlag & WELS_CPU_SSE2) {
- // sCoeffFunc.pfCavlcParamCal = CavlcParamCal_sse2;
+ // pFuncList->pfCavlcParamCal = CavlcParamCal_sse2;
}
#endif
}
--- a/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
@@ -244,7 +244,7 @@
pSlice->uiLastMbQp = pCurMb->uiLumaQp;
BsWriteSE (pBs, kiDeltaQp);
- if (WelsWriteMbResidual (pMbCache, pCurMb, pBs))
+ if (WelsWriteMbResidual (pEncCtx->pFuncList, pMbCache, pCurMb, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
} else {
pCurMb->uiLumaQp = pSlice->uiLastMbQp;
@@ -256,7 +256,7 @@
return CheckBitstreamBuffer (pSlice->uiSliceIdx, pEncCtx, pBs);
}
-int32_t WelsWriteMbResidual (SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs) {
+int32_t WelsWriteMbResidual (SWelsFuncPtrList* pFuncList, SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs) {
int32_t i;
Mb_Type uiMbType = pCurMb->uiMbType;
const int32_t kiCbpChroma = pCurMb->uiCbp >> 4;
@@ -270,7 +270,7 @@
iA = pNonZeroCoeffCount[8];
iB = pNonZeroCoeffCount[ 1];
WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
- if (WriteBlockResidualCavlc (sMbCacheInfo->pDct->iLumaI16x16Dc, 15, 1, LUMA_4x4, iC, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, sMbCacheInfo->pDct->iLumaI16x16Dc, 15, 1, LUMA_4x4, iC, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
/* AC Luma */
@@ -282,7 +282,7 @@
iA = pNonZeroCoeffCount[iIdx - 1];
iB = pNonZeroCoeffCount[iIdx - 8];
WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
- if (WriteBlockResidualCavlc (pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, LUMA_AC, iC, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, LUMA_AC, iC, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
pBlock += 16;
}
@@ -302,25 +302,25 @@
iA = pNonZeroCoeffCount[iIdx - 1];
iB = pNonZeroCoeffCount[iIdx - 8];
WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
- if (WriteBlockResidualCavlc (pBlock, 15, kiA > 0, LUMA_4x4, iC, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock, 15, kiA > 0, LUMA_4x4, iC, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
iA = kiA;
iB = pNonZeroCoeffCount[iIdx - 7];
WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
- if (WriteBlockResidualCavlc (pBlock + 16, 15, kiB > 0, LUMA_4x4, iC, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock + 16, 15, kiB > 0, LUMA_4x4, iC, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
iA = pNonZeroCoeffCount[iIdx + 7];
iB = kiA;
WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
- if (WriteBlockResidualCavlc (pBlock + 32, 15, kiC > 0, LUMA_4x4, iC, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock + 32, 15, kiC > 0, LUMA_4x4, iC, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
iA = kiC;
iB = kiB;
WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
- if (WriteBlockResidualCavlc (pBlock + 48, 15, kiD > 0, LUMA_4x4, iC, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock + 48, 15, kiD > 0, LUMA_4x4, iC, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
}
pBlock += 64;
@@ -331,11 +331,11 @@
if (kiCbpChroma) {
/* Chroma DC residual present */
pBlock = sMbCacheInfo->pDct->iChromaDc[0]; // Cb
- if (WriteBlockResidualCavlc (pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
pBlock += 4; // Cr
- if (WriteBlockResidualCavlc (pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
/* Chroma AC residual present */
@@ -348,7 +348,7 @@
iA = pNonZeroCoeffCount[iIdx - 1];
iB = pNonZeroCoeffCount[iIdx - 8];
WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
- if (WriteBlockResidualCavlc (pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
pBlock += 16;
}
@@ -360,7 +360,7 @@
iA = pNonZeroCoeffCount[iIdx - 1];
iB = pNonZeroCoeffCount[iIdx - 8];
WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
- if (WriteBlockResidualCavlc (pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs))
+ if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs))
return ENC_RETURN_VLCOVERFLOWFOUND;
pBlock += 16;
}
--- a/test/encoder/EncUT_GetIntraPredictor.cpp
+++ b/test/encoder/EncUT_GetIntraPredictor.cpp
@@ -14,8 +14,6 @@
pRef[i] = rand() % 256;
const int32_t kkiStride = 0;
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredV_c (pPred, pRef, kkiStride);
for (int i = 0; i < 4; i++)
@@ -47,8 +45,6 @@
const uint8_t kuiV3[4] = {kuiH3, kuiH3, kuiH3, kuiH3};
const uint8_t kuiV4[4] = {kuiH4, kuiH4, kuiH4, kuiH4};
- const uint32_t kuiCpuFlag = 0;
-
ENFORCE_STACK_ALIGN_1D (uint8_t, uiV, 16, 16) // TobeCont'd about assign opt as follows
ST32 (&uiV[0], LD32 (kuiV1));
ST32 (&uiV[4], LD32 (kuiV2));
@@ -55,7 +51,6 @@
ST32 (&uiV[8], LD32 (kuiV3));
ST32 (&uiV[12], LD32 (kuiV4));
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredH_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
@@ -99,8 +94,6 @@
uiV[11] = uiV[14] = kuiDDL5;
uiV[15] = kuiDDL6;
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredDDL_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
@@ -133,8 +126,6 @@
uiV[2] = uiV[5] = uiV[8] = kuiDLT2;
uiV[3] = kuiDLT3;
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredDDLTop_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
@@ -190,8 +181,6 @@
uiV[8] = uiV[13] = kuiDDR5;
uiV[12] = kuiDDR6;
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredDDR_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
@@ -240,8 +229,6 @@
uiV[11] = kuiVL4;
uiV[15] = kuiVL9;
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredVL_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
@@ -285,8 +272,6 @@
uiV[6] = uiV[13] = kuiVLT6;
uiV[7] = uiV[14] = uiV[15] = kuiVLT7;
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredVLTop_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
@@ -340,8 +325,6 @@
uiV[8] = kuiVR8;
uiV[12] = kuiVR9;
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredVR_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
@@ -388,8 +371,6 @@
uiV[7] = uiV[9] = kuiHU5;
memset (&uiV[10], kuiL3, 6 * sizeof (uint8_t));
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredHU_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
@@ -444,8 +425,6 @@
uiV[12] = kuiHD8;
uiV[13] = kuiHD9;
- const uint32_t kuiCpuFlag = 0;
- WelsInitFillingPredFuncs (kuiCpuFlag);
WelsI4x4LumaPredHD_c (pPred, pRef, kiStride);
for (int i = 0; i < 4; i++)
--- a/test/encoder/EncUT_Sample.cpp
+++ b/test/encoder/EncUT_Sample.cpp
@@ -83,7 +83,6 @@
uint8_t* pDec = (uint8_t*)cMemoryAlign.WelsMalloc (iLineSizeDec << 5, "pDec");
uint8_t* pEnc = (uint8_t*)cMemoryAlign.WelsMalloc (iLineSizeEnc << 5, "pEnc");
uint8_t* pDst = (uint8_t*)cMemoryAlign.WelsMalloc (512, "pDst");
- WelsInitFillingPredFuncs (WELS_CPU_SSE2);
for (int i = 0; i < (iLineSizeDec << 5); i++)
pDec[i] = rand() % 256;
for (int i = 0; i < (iLineSizeEnc << 5); i++)