shithub: openh264

Download patch

ref: bdf9f6b4ffb61dbe87690f3de0bd12312de1bf34
parent: dce954119995e8242ff99d134988d1fc730a8d4f
parent: e5609bc0fe7a580b8805d57b0878721b48be1b13
author: ruil2 <[email protected]>
date: Wed Jul 2 10:01:35 EDT 2014

Merge pull request #1043 from mstorsjo/avoid-globals

Get rid of global non-readonly variables within the library

--- a/codec/encoder/core/inc/encoder.h
+++ b/codec/encoder/core/inc/encoder.h
@@ -91,12 +91,12 @@
  * \brief	Dump reconstruction for dependency layer
  */
 
-extern "C" void DumpDependencyRec (SPicture* pSrcPic, const char* kpFileName, const int8_t kiDid);
+extern "C" void DumpDependencyRec (SPicture* pSrcPic, const char* kpFileName, const int8_t kiDid, bool bAppend);
 
 /*!
  * \brief	Dump the reconstruction pictures
  */
-void DumpRecFrame (SPicture* pSrcPic, const char* kpFileName);
+void DumpRecFrame (SPicture* pSrcPic, const char* kpFileName, bool bAppend);
 
 
 /*!
--- a/codec/encoder/core/inc/encoder_context.h
+++ b/codec/encoder/core/inc/encoder_context.h
@@ -217,6 +217,11 @@
 int32_t iEncoderError;
 WELS_MUTEX					mutexEncoderError;
 int32_t iDropNumber;
+
+#ifdef ENABLE_FRAME_DUMP
+bool bDependencyRecFlag[MAX_DEPENDENCY_LAYER];
+bool bRecFlag;
+#endif
 } sWelsEncCtx/*, *PWelsEncCtx*/;
 }
 #endif//sWelsEncCtx_H__
--- a/codec/encoder/core/inc/get_intra_predictor.h
+++ b/codec/encoder/core/inc/get_intra_predictor.h
@@ -165,7 +165,6 @@
 }
 #endif//__cplusplus
 
-void WelsInitFillingPredFuncs (const uint32_t kuiCpuFlag);
 void WelsInitIntraPredFuncs (SWelsFuncPtrList* pFuncList, const uint32_t kuiCpuFlag);
 
 }
--- a/codec/encoder/core/inc/set_mb_syn_cavlc.h
+++ b/codec/encoder/core/inc/set_mb_syn_cavlc.h
@@ -43,6 +43,7 @@
 
 #include "typedefs.h"
 #include "bit_stream.h"
+#include "wels_func_ptr_def.h"
 
 namespace WelsSVCEnc {
 
@@ -59,16 +60,6 @@
 
 #define LUMA_DC_AC    0x04
 
-typedef  int32_t (*PCavlcParamCalFunc) (int16_t* pCoff, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs,
-                                        int32_t iEndIdx);
-
-typedef  struct TagCoeffFunc {
-PCavlcParamCalFunc    pfCavlcParamCal;
-} SCoeffFunc;
-
-/*  For CAVLC   */
-extern SCoeffFunc    sCoeffFunc;
-
 typedef struct TagCavlcTableItem {
 uint16_t uiBits;
 uint8_t  uiLen;
@@ -75,9 +66,10 @@
 uint8_t  uiSuffixLength;
 } SCavlcTableItem;
 
-void  InitCoeffFunc (const uint32_t uiCpuFlag);
+void  InitCoeffFunc (SWelsFuncPtrList* pFuncList, const uint32_t uiCpuFlag);
 
-int32_t  WriteBlockResidualCavlc (int16_t* pCoffLevel, int32_t iEndIdx, int32_t iCalRunLevelFlag,
+int32_t  WriteBlockResidualCavlc (SWelsFuncPtrList* pFuncList, int16_t* pCoffLevel, int32_t iEndIdx,
+                                  int32_t iCalRunLevelFlag,
                                   int32_t iResidualProperty, int8_t iNC, SBitStringAux* pBs);
 
 #if defined(__cplusplus)
--- a/codec/encoder/core/inc/svc_set_mb_syn_cavlc.h
+++ b/codec/encoder/core/inc/svc_set_mb_syn_cavlc.h
@@ -50,7 +50,7 @@
 
 namespace WelsSVCEnc {
 
-int32_t WelsWriteMbResidual (SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs);
+int32_t WelsWriteMbResidual (SWelsFuncPtrList* pFuncList, SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs);
 
 void WelsSpatialWriteSubMbPred (sWelsEncCtx* pEncCtx, SSlice* pSlice, SMB* pCurMb);
 
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -86,8 +86,8 @@
   PWelsLumaHalfpelMcFunc      pfLumaHalfpelCen;
   PWelsMcFunc                         pfChromaMc;
 
-  PWelsLumaQuarpelMcFunc*     pfLumaQuarpelMc;
-  PWelsSampleAveragingFunc*   pfSampleAveraging;
+  PWelsLumaQuarpelMcFunc      pfLumaQuarpelMc[16];
+  PWelsSampleAveragingFunc    pfSampleAveraging[2];
 } SMcFunc;
 
 typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);
@@ -191,6 +191,9 @@
 typedef void (*PMarkPicFunc) (void* pCtx);
 typedef bool (*PUpdateRefListFunc) (void* pCtx);
 
+typedef  int32_t (*PCavlcParamCalFunc) (int16_t* pCoff, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs,
+                                        int32_t iEndIdx);
+
 struct TagWelsFuncPointerList {
   SExpandPicFunc sExpandPicFunc;
   PFillInterNeighborCacheFunc       pfFillInterNeighborCache;
@@ -280,6 +283,8 @@
   PBuildRefListFunc     pBuildRefList;
   PMarkPicFunc          pMarkPic;
   PUpdateRefListFunc    pUpdateRefList;
+
+  PCavlcParamCalFunc    pfCavlcParamCal;
 };
 
 }  //end of namespace WelsSVCEnc {
--- a/codec/encoder/core/src/encoder.cpp
+++ b/codec/encoder/core/src/encoder.cpp
@@ -181,7 +181,6 @@
   InitExpandPictureFunc (& (pFuncList->sExpandPicFunc), uiCpuFlag);
 
   /* Intra_Prediction_fn*/
-  WelsInitFillingPredFuncs (uiCpuFlag);
   WelsInitIntraPredFuncs (pFuncList, uiCpuFlag);
 
   /* ME func */
@@ -201,7 +200,7 @@
   /*init pixel average function*/
   /*get one column or row pixel when refinement*/
   WelsInitMcFuncs (pFuncList, uiCpuFlag);
-  InitCoeffFunc (uiCpuFlag);
+  InitCoeffFunc (pFuncList, uiCpuFlag);
 
   WelsInitEncodingFuncs (pFuncList, uiCpuFlag);
   WelsInitReconstructionFuncs (pFuncList, uiCpuFlag);
@@ -351,34 +350,23 @@
  * \brief	Dump reconstruction for dependency layer
  */
 
-extern "C" void DumpDependencyRec (SPicture* pCurPicture, const char* kpFileName, const int8_t kiDid) {
+extern "C" void DumpDependencyRec (SPicture* pCurPicture, const char* kpFileName, const int8_t kiDid, bool bAppend) {
   WelsFileHandle* pDumpRecFile = NULL;
-  static bool bDependencyRecFlag[MAX_DEPENDENCY_LAYER]	= {0};
   int32_t iWrittenSize											= 0;
+  const char* openMode = bAppend ? "ab" : "wb";
 
   if (NULL == pCurPicture || NULL == kpFileName || kiDid >= MAX_DEPENDENCY_LAYER)
     return;
 
-  if (bDependencyRecFlag[kiDid]) {
-    if (strlen (kpFileName) > 0)	// confirmed_safe_unsafe_usage
-      pDumpRecFile = WelsFopen (kpFileName, "ab");
-    else {
-      char sDependencyRecFileName[16] = {0};
-      WelsSnprintf (sDependencyRecFileName, 16, "rec%d.yuv", kiDid);	// confirmed_safe_unsafe_usage
-      pDumpRecFile	= WelsFopen (sDependencyRecFileName, "ab");
-    }
-    if (NULL != pDumpRecFile)
-      WelsFseek (pDumpRecFile, 0, SEEK_END);
-  } else {
-    if (strlen (kpFileName) > 0) {	// confirmed_safe_unsafe_usage
-      pDumpRecFile	= WelsFopen (kpFileName, "wb");
-    } else {
-      char sDependencyRecFileName[16] = {0};
-      WelsSnprintf (sDependencyRecFileName, 16, "rec%d.yuv", kiDid);	// confirmed_safe_unsafe_usage
-      pDumpRecFile	= WelsFopen (sDependencyRecFileName, "wb");
-    }
-    bDependencyRecFlag[kiDid]	= true;
+  if (strlen (kpFileName) > 0)	// confirmed_safe_unsafe_usage
+    pDumpRecFile = WelsFopen (kpFileName, openMode);
+  else {
+    char sDependencyRecFileName[16] = {0};
+    WelsSnprintf (sDependencyRecFileName, 16, "rec%d.yuv", kiDid);	// confirmed_safe_unsafe_usage
+    pDumpRecFile	= WelsFopen (sDependencyRecFileName, openMode);
   }
+  if (NULL != pDumpRecFile && bAppend)
+    WelsFseek (pDumpRecFile, 0, SEEK_END);
 
   if (NULL != pDumpRecFile) {
     int32_t i = 0;
@@ -419,30 +407,21 @@
  * \brief	Dump the reconstruction pictures
  */
 
-void DumpRecFrame (SPicture* pCurPicture, const char* kpFileName) {
+void DumpRecFrame (SPicture* pCurPicture, const char* kpFileName, bool bAppend) {
   WelsFileHandle* pDumpRecFile				= NULL;
-  static bool bRecFlag	= false;
   int32_t iWrittenSize			= 0;
+  const char* openMode = bAppend ? "ab" : "wb";
 
   if (NULL == pCurPicture || NULL == kpFileName)
     return;
 
-  if (bRecFlag) {
-    if (strlen (kpFileName) > 0) {	// confirmed_safe_unsafe_usage
-      pDumpRecFile	= WelsFopen (kpFileName, "ab");
-    } else {
-      pDumpRecFile	= WelsFopen ("rec.yuv", "ab");
-    }
-    if (NULL != pDumpRecFile)
-      WelsFseek (pDumpRecFile, 0, SEEK_END);
+  if (strlen (kpFileName) > 0) {	// confirmed_safe_unsafe_usage
+    pDumpRecFile	= WelsFopen (kpFileName, openMode);
   } else {
-    if (strlen (kpFileName) > 0) {	// confirmed_safe_unsafe_usage
-      pDumpRecFile	= WelsFopen (kpFileName, "wb");
-    } else {
-      pDumpRecFile	= WelsFopen ("rec.yuv", "wb");
-    }
-    bRecFlag	= true;
+    pDumpRecFile	= WelsFopen ("rec.yuv", openMode);
   }
+  if (NULL != pDumpRecFile && bAppend)
+    WelsFseek (pDumpRecFile, 0, SEEK_END);
 
   if (NULL != pDumpRecFile) {
     int32_t i = 0;
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -3384,8 +3384,11 @@
 
 #ifdef ENABLE_FRAME_DUMP
     // Dump reconstruction picture for each sQualityStat layer
-    if (iCurDid + 1 < pSvcParam->iSpatialLayerNum)
-      DumpDependencyRec (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iSpatialLayerNum].sRecFileName[0], iCurDid);
+    if (iCurDid + 1 < pSvcParam->iSpatialLayerNum) {
+      DumpDependencyRec (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iSpatialLayerNum].sRecFileName[0], iCurDid,
+                         pCtx->bDependencyRecFlag[iCurDid]);
+      pCtx->bDependencyRecFlag[iCurDid] = true;
+    }
 #endif//ENABLE_FRAME_DUMP
 
 #if defined(ENABLE_PSNR_CALC)
@@ -3551,7 +3554,8 @@
 
 #ifdef ENABLE_FRAME_DUMP
   DumpRecFrame (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iSpatialLayerNum -
-                1].sRecFileName[0]);	// pDecPic: final reconstruction output
+                1].sRecFileName[0], pCtx->bRecFlag);	// pDecPic: final reconstruction output
+  pCtx->bRecFlag = true;
 #endif//ENABLE_FRAME_DUMP
 
   ++ pCtx->iCodingIndex;
--- a/codec/encoder/core/src/get_intra_predictor.cpp
+++ b/codec/encoder/core/src/get_intra_predictor.cpp
@@ -65,27 +65,9 @@
   ST64 (pPred + 8, LD64 (kuiSrc8));
 }
 
-PFillingPred					WelsFillingPred8to16;
-PFillingPred					WelsFillingPred8x2to16;
-PFillingPred1to16 WelsFillingPred1to16;
-
-void WelsInitFillingPredFuncs (const uint32_t kuiCpuFlag) {
-  WelsFillingPred8to16	= WelsFillingPred8to16_c;
-  WelsFillingPred8x2to16	= WelsFillingPred8x2to16_c;
-  WelsFillingPred1to16	= WelsFillingPred1to16_c;
-
-#if defined(X86_ASM)
-  if (kuiCpuFlag & WELS_CPU_MMXEXT) {
-    //  WelsFillingPred8to16		= WelsFillingPred8to16_mmx;
-    //  WelsFillingPred8x2to16	    = WelsFillingPred8x2to16_mmx;
-    //  WelsFillingPred1to16		= WelsFillingPred1to16_mmx;
-  }
-  if (kuiCpuFlag & WELS_CPU_SSE2) {
-    // WelsFillingPred8x2to16	    = WelsFillingPred8x2to16_sse2;
-    // WelsFillingPred1to16		= WelsFillingPred1to16_sse2;
-  }
-#endif//X86_ASM
-}
+#define WelsFillingPred8to16 WelsFillingPred8to16_c
+#define WelsFillingPred8x2to16 WelsFillingPred8x2to16_c
+#define WelsFillingPred1to16 WelsFillingPred1to16_c
 
 
 
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -81,27 +81,7 @@
     {4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
   }
 };
-typedef int32_t (*VerFilterFunc) (const uint8_t* pSrc, const int32_t kiSrcStride);
-typedef int32_t (*HorFilterFunc) (const uint8_t* pSrc);
-typedef int32_t (*HorFilterFuncInput16Bits) (int16_t* pSrc);
 
-VerFilterFunc fpVerFilter			= NULL;
-HorFilterFunc fpHorFilter			= NULL;
-HorFilterFuncInput16Bits fpHorFilterInput16Bits = NULL;
-
-typedef void (*WelsMcFunc0) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                             int32_t iHeight);
-typedef void (*WelsMcFunc1) (uint8_t* pDst, int32_t iDstStride, const uint8_t* psrcA, int32_t iSrcAStride,
-                             const uint8_t* pSrcB,
-                             int32_t iSrcBStride, int32_t iHeight);
-WelsMcFunc0 McCopyWidthEq16 = NULL;
-WelsMcFunc0 McCopyWidthEq8 = NULL;
-WelsMcFunc0 McCopyWidthEq4 = NULL;
-WelsMcFunc0 pfMcHorVer02WidthEq16 = NULL;
-WelsMcFunc1 pfPixelAvgWidthEq16  = NULL;
-WelsMcFunc0 pfMcHorVer20WidthEq16 = NULL;
-WelsMcFunc0 pfMcHorVer22WidthEq16 = NULL;
-
 //***************************************************************************//
 //                          C code implementation                            //
 //***************************************************************************//
@@ -194,7 +174,7 @@
   int32_t i, j;
   for (i = 0; i < iHeight; i++) {
     for (j = 0; j < 16; j++) {
-      pDst[j] = WelsClip1 ((fpHorFilter (pSrc + j) + 16) >> 5);
+      pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5);
     }
     pDst += iDstStride;
     pSrc += iSrcStride;
@@ -206,7 +186,7 @@
   int32_t i, j;
   for (i = 0; i < iHeight; i++) {
     for (j = 0; j < 16; j++) {
-      pDst[j] = WelsClip1 ((fpVerFilter (pSrc + j, iSrcStride) + 16) >> 5);
+      pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5);
     }
     pDst += iDstStride;
     pSrc += iSrcStride;
@@ -220,10 +200,10 @@
 
   for (i = 0; i < iHeight; i++) {
     for (j = 0; j < 16 + 5; j++) {
-      pTmp[j] = fpVerFilter (pSrc - 2 + j, iSrcStride);
+      pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
     }
     for (k = 0; k < 16; k++) {
-      pDst[k] = WelsClip1 ((fpHorFilterInput16Bits (&pTmp[2 + k]) + 512) >> 10);
+      pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10);
     }
     pSrc += iSrcStride;
     pDst += iDstStride;
@@ -232,97 +212,97 @@
 
 /////////////////////luma MC//////////////////////////
 
-static inline void McHorVer01WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer01WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
 
-  pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
 }
-static inline void McHorVer03WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer03WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
 
-  pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
 }
-static inline void McHorVer10WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer10WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
 
-  pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
 }
-static inline void McHorVer11WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer11WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
 
-  pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
-  pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+  McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
 }
-static inline void McHorVer12WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer12WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
 
-  pfMcHorVer02WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
-  pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+  McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
 }
-static inline void McHorVer13WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer13WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
 
-  pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
-  pfMcHorVer02WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+  McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
 }
-static inline void McHorVer21WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer21WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
 
-  pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
-  pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+  McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
 }
-static inline void McHorVer23WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer23WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
 
-  pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
-  pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+  McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+  McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
 }
-static inline void McHorVer30WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer30WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
 
-  pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+  McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
 }
-static inline void McHorVer31WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer31WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
 
-  pfMcHorVer20WidthEq16 (pSrc, iSrcStride, pTmp, 16, iHeight);
-  pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+  McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
 }
-static inline void McHorVer32WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer32WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
 
-  pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
-  pfMcHorVer22WidthEq16 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+  McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+  McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
 }
-static inline void McHorVer33WidthEq16 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                                        int32_t iHeight) {
+static inline void McHorVer33WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                          int32_t iHeight) {
   ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
 
-  pfMcHorVer20WidthEq16 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
-  pfMcHorVer02WidthEq16 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
-  pfPixelAvgWidthEq16 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+  McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
 }
 
 static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
@@ -331,7 +311,7 @@
   int32_t i, j;
   for (i = 0; i < iHeight; i++) {
     for (j = 0; j < iWidth; j++) {
-      pDst[j] = WelsClip1 ((fpHorFilter (pSrc + j) + 16) >> 5);
+      pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5);
     }
     pDst += iDstStride;
     pSrc += iSrcStride;
@@ -344,7 +324,7 @@
   int32_t i, j;
   for (i = 0; i < iHeight; i++) {
     for (j = 0; j < iWidth; j++) {
-      pDst[j] = WelsClip1 ((fpVerFilter (pSrc + j, iSrcStride) + 16) >> 5);
+      pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5);
     }
     pDst += iDstStride;
     pSrc += iSrcStride;
@@ -359,24 +339,24 @@
 
   for (i = 0; i < iHeight; i++) {
     for (j = 0; j < iWidth + 5; j++) {
-      pTmp[j] = fpVerFilter (pSrc - 2 + j, iSrcStride);
+      pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
     }
     for (k = 0; k < iWidth; k++) {
-      pDst[k] = WelsClip1 ((fpHorFilterInput16Bits (&pTmp[2 + k]) + 512) >> 10);
+      pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10);
     }
     pSrc += iSrcStride;
     pDst += iDstStride;
   }
 }
-static inline void McCopy (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
-                           int32_t iHeight) {
+static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
+                             int32_t iHeight) {
   int32_t i;
-  if (iWidth == 16 && McCopyWidthEq16 != NULL)
-    McCopyWidthEq16 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-  else if (iWidth == 8 && McCopyWidthEq8 != NULL)
-    McCopyWidthEq8 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-  else if (iWidth == 4 && McCopyWidthEq4 != NULL)
-    McCopyWidthEq4 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+  if (iWidth == 16)
+    McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+  else if (iWidth == 8)
+    McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+  else if (iWidth == 4)
+    McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
   else {
     for (i = 0; i < iHeight; i++) {
       memcpy (pDst, pSrc, iWidth);	// confirmed_safe_unsafe_usage
@@ -394,7 +374,7 @@
   const int32_t kiDy = mv.iMvY & 0x07;
 
   if (0 == kiDx && 0 == kiDy) {
-    McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+    McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
   } else {
     const int32_t kiDA = g_kuiABCD[kiDy][kiDx][0];
     const int32_t kiDB = g_kuiABCD[kiDy][kiDx][1];
@@ -449,6 +429,117 @@
   McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1,  48, pDst + iWidth - 8, iDstStride, 8, iHeight);
 }
 
+static inline void McHorVer01WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+  McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer03WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+  McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer10WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+  McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer11WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+  McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer12WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+  McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer13WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+  McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer21WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+  McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer23WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+  McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+  McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer30WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
+
+  McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+}
+static inline void McHorVer31WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+  McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer32WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+  McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
+  McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+static inline void McHorVer33WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                             int32_t iHeight) {
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
+
+  McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+  McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
+  PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
+}
+
+static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                                int32_t iWidth, int32_t iHeight) {
+  int32_t i;
+  if (iWidth == 16)
+    McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+  else if (iWidth == 8)
+    McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+  else if (iWidth == 4)
+    McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+  else {
+    for (i = 0; i < iHeight; i++) {
+      memcpy (pDst, pSrc, iWidth);	// confirmed_safe_unsafe_usage
+      pDst += iDstStride;
+      pSrc += iSrcStride;
+    }
+  }
+}
+
 typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                   const uint8_t* pABCD, int32_t iHeigh);
 void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
@@ -461,7 +552,7 @@
   };
 
   if (0 == kiD8x && 0 == kiD8y) {
-    McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+    McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
   } else {
     kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
   }
@@ -477,7 +568,7 @@
     McChromaWidthEq8_ssse3
   };
   if (0 == kiD8x && 0 == kiD8y) {
-    McCopy (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+    McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
   } else {
     kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
   }
@@ -676,24 +767,24 @@
 
 typedef void (*PixelAvgFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t);
 void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
-  static PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
+  static const PixelAvgFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
 
-  static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
-    McCopyWidthEq16_c,  McHorVer10WidthEq16, McHorVer20WidthEq16_c,     McHorVer30WidthEq16,
-    McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
-    McHorVer02WidthEq16_c,     McHorVer12WidthEq16, McHorVer22WidthEq16_c,    McHorVer32WidthEq16,
-    McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
+  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
+    McCopyWidthEq16_c,     McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
+    McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
+    McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
+    McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
   };
 #if defined (X86_ASM)
-  static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
-    McCopyWidthEq16_sse2,  McHorVer10WidthEq16, McHorVer20WidthEq16_sse2,     McHorVer30WidthEq16,
-    McHorVer01WidthEq16, McHorVer11WidthEq16, McHorVer21WidthEq16, McHorVer31WidthEq16,
-    McHorVer02WidthEq16_sse2,     McHorVer12WidthEq16, McHorVer22WidthEq16_sse2,    McHorVer32WidthEq16,
-    McHorVer03WidthEq16, McHorVer13WidthEq16, McHorVer23WidthEq16, McHorVer33WidthEq16
+  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
+    McCopyWidthEq16_sse2,     McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
+    McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
+    McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
+    McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
   };
 #endif
 #if defined(HAVE_NEON)
-  static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
+  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
     McCopyWidthEq16_neon,        McHorVer10WidthEq16_neon,   McHorVer20WidthEq16_neon,    McHorVer30WidthEq16_neon,
     McHorVer01WidthEq16_neon,    EncMcHorVer11_neon,         EncMcHorVer21_neon,          EncMcHorVer31_neon,
     McHorVer02WidthEq16_neon,    EncMcHorVer12_neon,         McHorVer22WidthEq16_neon,    EncMcHorVer32_neon,
@@ -701,7 +792,7 @@
   };
 #endif
 #if defined(HAVE_NEON_AARCH64)
-  static PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y]
+  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y]
     McCopyWidthEq16_AArch64_neon,        McHorVer10WidthEq16_AArch64_neon,   McHorVer20WidthEq16_AArch64_neon,    McHorVer30WidthEq16_AArch64_neon,
     McHorVer01WidthEq16_AArch64_neon,    EncMcHorVer11_AArch64_neon,         EncMcHorVer21_AArch64_neon,          EncMcHorVer31_AArch64_neon,
     McHorVer02WidthEq16_AArch64_neon,    EncMcHorVer12_AArch64_neon,         McHorVer22WidthEq16_AArch64_neon,    EncMcHorVer32_AArch64_neon,
@@ -711,19 +802,9 @@
   pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20_c;
   pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02_c;
   pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22_c;
-  pFuncList->sMcFuncs.pfSampleAveraging = pfPixAvgFunc;
+  memcpy (pFuncList->sMcFuncs.pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
   pFuncList->sMcFuncs.pfChromaMc	= McChroma_c;
-  fpVerFilter				= VerFilter_c;
-  fpHorFilter				= HorFilter_c;
-  fpHorFilterInput16Bits			= HorFilterInput16bit1_c;
-  McCopyWidthEq4 = McCopyWidthEq4_c;
-  McCopyWidthEq8 = McCopyWidthEq8_c;
-  McCopyWidthEq16 = McCopyWidthEq16_c;
-  pfPixelAvgWidthEq16 = PixelAvgWidthEq16_c;
-  pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_c;
-  pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_c;
-  pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_c;
-  pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16;
+  memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16));
 #if defined (X86_ASM)
   if (uiCpuFlag & WELS_CPU_SSE2) {
     pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
@@ -732,14 +813,7 @@
     pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
     pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
     pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2;
-    McCopyWidthEq4 = McCopyWidthEq4_mmx;
-    McCopyWidthEq8 = McCopyWidthEq8_mmx;
-    McCopyWidthEq16 = McCopyWidthEq16_sse2;
-    pfPixelAvgWidthEq16 = PixelAvgWidthEq16_sse2;
-    pfMcHorVer02WidthEq16 = McHorVer02WidthEq16_sse2;
-    pfMcHorVer20WidthEq16 = McHorVer20WidthEq16_sse2;
-    pfMcHorVer22WidthEq16 = McHorVer22WidthEq16_sse2;
-    pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16_sse2;
+    memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2));
   }
 
   if (uiCpuFlag & WELS_CPU_SSSE3) {
@@ -750,7 +824,7 @@
 
 #if defined(HAVE_NEON)
   if (uiCpuFlag & WELS_CPU_NEON) {
-    pFuncList->sMcFuncs.pfLumaQuarpelMc	= pWelsMcFuncWidthEq16_neon;
+    memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_neon, sizeof (pWelsMcFuncWidthEq16_neon));
     pFuncList->sMcFuncs.pfChromaMc	= EncMcChroma_neon;
     pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
     pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
@@ -761,7 +835,8 @@
 #endif
 #if defined(HAVE_NEON_AARCH64)
   if (uiCpuFlag & WELS_CPU_NEON) {
-    pFuncList->sMcFuncs.pfLumaQuarpelMc	= pWelsMcFuncWidthEq16_AArch64_neon;
+    memcpy (pFuncList->sMcFuncs.pfLumaQuarpelMc, pWelsMcFuncWidthEq16_AArch64_neon,
+            sizeof (pWelsMcFuncWidthEq16_AArch64_neon));
     pFuncList->sMcFuncs.pfChromaMc	= EncMcChroma_AArch64_neon;
     pFuncList->sMcFuncs.pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon;
     pFuncList->sMcFuncs.pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon;
--- a/codec/encoder/core/src/set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/set_mb_syn_cavlc.cpp
@@ -44,7 +44,6 @@
 #include "wels_const.h"
 
 namespace WelsSVCEnc {
-SCoeffFunc    sCoeffFunc;
 
 const  ALIGNED_DECLARE (uint8_t, g_kuiZeroLeftMap[16], 16) = {
   0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
@@ -74,7 +73,8 @@
   return iTotalZeros;
 }
 
-int32_t  WriteBlockResidualCavlc (int16_t* pCoffLevel, int32_t iEndIdx, int32_t iCalRunLevelFlag,
+int32_t  WriteBlockResidualCavlc (SWelsFuncPtrList* pFuncList, int16_t* pCoffLevel, int32_t iEndIdx,
+                                  int32_t iCalRunLevelFlag,
                                   int32_t iResidualProperty, int8_t iNC, SBitStringAux* pBs) {
   ENFORCE_STACK_ALIGN_1D (int16_t, iLevel, 16, 16)
   ENFORCE_STACK_ALIGN_1D (uint8_t, uiRun, 16, 16)
@@ -95,7 +95,7 @@
 
   if (iCalRunLevelFlag) {
     int32_t iCount = 0;
-    iTotalZeros = sCoeffFunc.pfCavlcParamCal (pCoffLevel, uiRun, iLevel, &iTotalCoeffs, iEndIdx);
+    iTotalZeros = pFuncList->pfCavlcParamCal (pCoffLevel, uiRun, iLevel, &iTotalCoeffs, iEndIdx);
     iCount = (iTotalCoeffs > 3) ? 3 : iTotalCoeffs;
     for (i = 0; i < iCount ; i++) {
       if (WELS_ABS (iLevel[i]) == 1) {
@@ -200,12 +200,12 @@
 }
 
 
-void InitCoeffFunc (const uint32_t uiCpuFlag) {
-  sCoeffFunc.pfCavlcParamCal = CavlcParamCal_c;
+void InitCoeffFunc (SWelsFuncPtrList* pFuncList, const uint32_t uiCpuFlag) {
+  pFuncList->pfCavlcParamCal = CavlcParamCal_c;
 
 #if defined(X86_ASM)
   if (uiCpuFlag & WELS_CPU_SSE2) {
-    // sCoeffFunc.pfCavlcParamCal = CavlcParamCal_sse2;
+    // pFuncList->pfCavlcParamCal = CavlcParamCal_sse2;
   }
 #endif
 }
--- a/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
@@ -244,7 +244,7 @@
     pSlice->uiLastMbQp = pCurMb->uiLumaQp;
 
     BsWriteSE (pBs, kiDeltaQp);
-    if (WelsWriteMbResidual (pMbCache, pCurMb, pBs))
+    if (WelsWriteMbResidual (pEncCtx->pFuncList, pMbCache, pCurMb, pBs))
       return ENC_RETURN_VLCOVERFLOWFOUND;
   } else {
     pCurMb->uiLumaQp = pSlice->uiLastMbQp;
@@ -256,7 +256,7 @@
   return CheckBitstreamBuffer (pSlice->uiSliceIdx, pEncCtx, pBs);
 }
 
-int32_t WelsWriteMbResidual (SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs) {
+int32_t WelsWriteMbResidual (SWelsFuncPtrList* pFuncList, SMbCache* sMbCacheInfo, SMB* pCurMb, SBitStringAux* pBs) {
   int32_t i;
   Mb_Type uiMbType					= pCurMb->uiMbType;
   const int32_t kiCbpChroma		= pCurMb->uiCbp >> 4;
@@ -270,7 +270,7 @@
     iA = pNonZeroCoeffCount[8];
     iB = pNonZeroCoeffCount[ 1];
     WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
-    if (WriteBlockResidualCavlc (sMbCacheInfo->pDct->iLumaI16x16Dc, 15, 1, LUMA_4x4, iC, pBs))
+    if (WriteBlockResidualCavlc (pFuncList, sMbCacheInfo->pDct->iLumaI16x16Dc, 15, 1, LUMA_4x4, iC, pBs))
       return ENC_RETURN_VLCOVERFLOWFOUND;
 
     /* AC Luma */
@@ -282,7 +282,7 @@
         iA = pNonZeroCoeffCount[iIdx - 1];
         iB = pNonZeroCoeffCount[iIdx - 8];
         WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
-        if (WriteBlockResidualCavlc (pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, LUMA_AC, iC, pBs))
+        if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, LUMA_AC, iC, pBs))
           return ENC_RETURN_VLCOVERFLOWFOUND;
         pBlock += 16;
       }
@@ -302,25 +302,25 @@
           iA = pNonZeroCoeffCount[iIdx - 1];
           iB = pNonZeroCoeffCount[iIdx - 8];
           WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
-          if (WriteBlockResidualCavlc (pBlock, 15, kiA > 0, LUMA_4x4, iC, pBs))
+          if (WriteBlockResidualCavlc (pFuncList, pBlock, 15, kiA > 0, LUMA_4x4, iC, pBs))
             return ENC_RETURN_VLCOVERFLOWFOUND;
 
           iA = kiA;
           iB = pNonZeroCoeffCount[iIdx - 7];
           WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
-          if (WriteBlockResidualCavlc (pBlock + 16, 15, kiB > 0, LUMA_4x4, iC, pBs))
+          if (WriteBlockResidualCavlc (pFuncList, pBlock + 16, 15, kiB > 0, LUMA_4x4, iC, pBs))
             return ENC_RETURN_VLCOVERFLOWFOUND;
 
           iA = pNonZeroCoeffCount[iIdx + 7];
           iB = kiA;
           WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
-          if (WriteBlockResidualCavlc (pBlock + 32, 15, kiC > 0, LUMA_4x4, iC, pBs))
+          if (WriteBlockResidualCavlc (pFuncList, pBlock + 32, 15, kiC > 0, LUMA_4x4, iC, pBs))
             return ENC_RETURN_VLCOVERFLOWFOUND;
 
           iA = kiC;
           iB = kiB;
           WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
-          if (WriteBlockResidualCavlc (pBlock + 48, 15, kiD > 0, LUMA_4x4, iC, pBs))
+          if (WriteBlockResidualCavlc (pFuncList, pBlock + 48, 15, kiD > 0, LUMA_4x4, iC, pBs))
             return ENC_RETURN_VLCOVERFLOWFOUND;
         }
         pBlock += 64;
@@ -331,11 +331,11 @@
   if (kiCbpChroma) {
     /* Chroma DC residual present */
     pBlock = sMbCacheInfo->pDct->iChromaDc[0]; // Cb
-    if (WriteBlockResidualCavlc (pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs))
+    if (WriteBlockResidualCavlc (pFuncList, pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs))
       return ENC_RETURN_VLCOVERFLOWFOUND;
 
     pBlock += 4; // Cr
-    if (WriteBlockResidualCavlc (pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs))
+    if (WriteBlockResidualCavlc (pFuncList, pBlock, 3, 1, CHROMA_DC, CHROMA_DC_NC_OFFSET, pBs))
       return ENC_RETURN_VLCOVERFLOWFOUND;
 
     /* Chroma AC residual present */
@@ -348,7 +348,7 @@
         iA = pNonZeroCoeffCount[iIdx - 1];
         iB = pNonZeroCoeffCount[iIdx - 8];
         WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
-        if (WriteBlockResidualCavlc (pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs))
+        if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs))
           return ENC_RETURN_VLCOVERFLOWFOUND;
         pBlock += 16;
       }
@@ -360,7 +360,7 @@
         iA = pNonZeroCoeffCount[iIdx - 1];
         iB = pNonZeroCoeffCount[iIdx - 8];
         WELS_NON_ZERO_COUNT_AVERAGE (iC, iA, iB);
-        if (WriteBlockResidualCavlc (pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs))
+        if (WriteBlockResidualCavlc (pFuncList, pBlock, 14, pNonZeroCoeffCount[iIdx] > 0, CHROMA_AC, iC, pBs))
           return ENC_RETURN_VLCOVERFLOWFOUND;
         pBlock += 16;
       }
--- a/test/encoder/EncUT_GetIntraPredictor.cpp
+++ b/test/encoder/EncUT_GetIntraPredictor.cpp
@@ -14,8 +14,6 @@
     pRef[i] = rand() % 256;
 
   const int32_t kkiStride = 0;
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredV_c (pPred, pRef, kkiStride);
 
   for (int i = 0; i < 4; i++)
@@ -47,8 +45,6 @@
   const uint8_t kuiV3[4] = {kuiH3, kuiH3, kuiH3, kuiH3};
   const uint8_t kuiV4[4] = {kuiH4, kuiH4, kuiH4, kuiH4};
 
-  const uint32_t kuiCpuFlag = 0;
-
   ENFORCE_STACK_ALIGN_1D (uint8_t, uiV, 16, 16) // TobeCont'd about assign opt as follows
   ST32 (&uiV[0], LD32 (kuiV1));
   ST32 (&uiV[4], LD32 (kuiV2));
@@ -55,7 +51,6 @@
   ST32 (&uiV[8], LD32 (kuiV3));
   ST32 (&uiV[12], LD32 (kuiV4));
 
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredH_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
@@ -99,8 +94,6 @@
   uiV[11] = uiV[14] = kuiDDL5;
   uiV[15] = kuiDDL6;
 
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredDDL_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
@@ -133,8 +126,6 @@
   uiV[2] = uiV[5] = uiV[8] = kuiDLT2;
   uiV[3] = kuiDLT3;
 
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredDDLTop_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
@@ -190,8 +181,6 @@
   uiV[8] = uiV[13] = kuiDDR5;
   uiV[12] = kuiDDR6;
 
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredDDR_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
@@ -240,8 +229,6 @@
   uiV[11] = kuiVL4;
   uiV[15] = kuiVL9;
 
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredVL_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
@@ -285,8 +272,6 @@
   uiV[6] = uiV[13] = kuiVLT6;
   uiV[7] = uiV[14] = uiV[15] = kuiVLT7;
 
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredVLTop_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
@@ -340,8 +325,6 @@
   uiV[8] = kuiVR8;
   uiV[12] = kuiVR9;
 
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredVR_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
@@ -388,8 +371,6 @@
   uiV[7] = uiV[9] = kuiHU5;
   memset (&uiV[10], kuiL3, 6 * sizeof (uint8_t));
 
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredHU_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
@@ -444,8 +425,6 @@
   uiV[12] = kuiHD8;
   uiV[13] = kuiHD9;
 
-  const uint32_t kuiCpuFlag = 0;
-  WelsInitFillingPredFuncs (kuiCpuFlag);
   WelsI4x4LumaPredHD_c (pPred, pRef, kiStride);
 
   for (int i = 0; i < 4; i++)
--- a/test/encoder/EncUT_Sample.cpp
+++ b/test/encoder/EncUT_Sample.cpp
@@ -83,7 +83,6 @@
   uint8_t* pDec = (uint8_t*)cMemoryAlign.WelsMalloc (iLineSizeDec << 5, "pDec");
   uint8_t* pEnc = (uint8_t*)cMemoryAlign.WelsMalloc (iLineSizeEnc << 5, "pEnc");
   uint8_t* pDst = (uint8_t*)cMemoryAlign.WelsMalloc (512, "pDst");
-  WelsInitFillingPredFuncs (WELS_CPU_SSE2);
   for (int i = 0; i < (iLineSizeDec << 5); i++)
     pDec[i] = rand() % 256;
   for (int i = 0; i < (iLineSizeEnc << 5); i++)