shithub: openh264

Download patch

ref: 3ef97dc0c93bc0068adf1f8ee0d2ae5c4c46d0bb
parent: bdf9f6b4ffb61dbe87690f3de0bd12312de1bf34
parent: a964ae8cfa432af037873bb701bf1cb8d3129596
author: ruil2 <[email protected]>
date: Wed Jul 2 10:02:51 EDT 2014

Merge pull request #1057 from sijchen/scc_md_merge11

[Encoder] Completing and bug-fixing the screen content MD/ME process

--- a/codec/encoder/core/inc/bit_stream.h
+++ b/codec/encoder/core/inc/bit_stream.h
@@ -43,11 +43,11 @@
  *	auxiliary struct for bit-stream reading / writing
  */
 typedef struct TagBitStringAux {
-uint8_t*		pBuf;		// pBuffer to start position
-uint8_t*		pBufEnd;	// pBuffer + length
-uint8_t*		pBufPtr;	// current writing position
-uint32_t    uiCurBits;
-int32_t		iLeftBits;	// count number of available bits left ([1, 8]),
+  uint8_t*		pBuf;		// pBuffer to start position
+  uint8_t*		pBufEnd;	// pBuffer + length
+  uint8_t*		pBufPtr;	// current writing position
+  uint32_t    uiCurBits;
+  int32_t		iLeftBits;	// count number of available bits left ([1, 8]),
 // need pointer to next byte start position in case 0 bit left then 8 instead
 } SBitStringAux;
 
@@ -61,15 +61,15 @@
  * \return	iSize of pBuffer pData in byte; failed in -1 return
  */
 static inline int32_t InitBits (SBitStringAux* pBs, const uint8_t* kpBuf, const int32_t kiSize) {
-uint8_t* ptr = (uint8_t*)kpBuf;
+  uint8_t* ptr = (uint8_t*)kpBuf;
 
-pBs->pBuf			= ptr;
-pBs->pBufPtr		= ptr;
-pBs->pBufEnd		= ptr + kiSize;
-pBs->iLeftBits	= 32;
-pBs->uiCurBits = 0;
+  pBs->pBuf			= ptr;
+  pBs->pBufPtr		= ptr;
+  pBs->pBufEnd		= ptr + kiSize;
+  pBs->iLeftBits	= 32;
+  pBs->uiCurBits = 0;
 
-return kiSize;
+  return kiSize;
 }
 
 }
--- a/codec/encoder/core/inc/deblocking.h
+++ b/codec/encoder/core/inc/deblocking.h
@@ -50,15 +50,15 @@
 //struct tagDeblockingFunc;
 
 typedef struct TagDeblockingFilter {
-uint8_t*		pCsData[3];	// pointer to reconstructed picture pData
-int32_t		iCsStride[3];	// Cs iStride
-int16_t     iMbStride;
-int8_t		iSliceAlphaC0Offset;
-int8_t		iSliceBetaOffset;
-uint8_t     uiLumaQP;
-uint8_t     uiChromaQP;
-uint8_t     uiFilterIdc;
-uint8_t     uiReserved;
+  uint8_t*		pCsData[3];	// pointer to reconstructed picture pData
+  int32_t		iCsStride[3];	// Cs iStride
+  int16_t     iMbStride;
+  int8_t		iSliceAlphaC0Offset;
+  int8_t		iSliceBetaOffset;
+  uint8_t     uiLumaQP;
+  uint8_t     uiChromaQP;
+  uint8_t     uiFilterIdc;
+  uint8_t     uiReserved;
 } SDeblockingFilter;
 
 #if defined(__cplusplus)
--- a/codec/encoder/core/inc/encoder_context.h
+++ b/codec/encoder/core/inc/encoder_context.h
@@ -119,7 +119,9 @@
 int32_t*					pSadCostMb;
 /* MVD cost tables for Inter MB */
 int32_t              iMvRange;
-uint16_t*					pMvdCostTableInter; //[52];	// adaptive to spatial layers
+uint16_t*					pMvdCostTable; //[52];	// adaptive to spatial layers
+int32_t					  iMvdCostTableSize; //the size of above table
+int32_t					    iMvdCostTableStride; //the stride of above table
 SMVUnitXY*
 pMvUnitBlock4x4;	// (*pMvUnitBlock4x4[2])[MB_BLOCK4x4_NUM];	    // for store each 4x4 blocks' mv unit, the two swap after different d layer
 int8_t*
--- a/codec/encoder/core/inc/svc_base_layer_md.h
+++ b/codec/encoder/core/inc/svc_base_layer_md.h
@@ -76,15 +76,7 @@
 //both used in BL and EL
 //void wels_md_inter_init ( SWelsMD* pMd, const uint8_t ref_idx, const bool is_highest_dlayer_flag );
 
-bool WelsMdInterJudgeBGDPskip (void* pEnc, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
-                               bool* bKeepSkip);
-bool WelsMdInterJudgeBGDPskipFalse (void* pEnc, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
-                                    bool* bKeepSkip);
 
-void WelsMdInterUpdateBGDInfo (SDqLayer* pCurLayer,  SMB* pCurMb, const bool kbCollocatedPredFlag,
-                               const int32_t kiRefPictureType);
-void WelsMdInterUpdateBGDInfoNULL (SDqLayer* pCurLayer,  SMB* pCurMb, const bool kbCollocatedPredFlag,
-                                   const int32_t kiRefPictureType);
 
 bool WelsMdInterJudgePskip (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
                             bool bTrySkip);
@@ -102,9 +94,6 @@
 void WelsMdIntraSecondaryModesEnc (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb, SMbCache* pMbCache);
 //end of: both used in BL and EL
 
-//typedef void (*MD_INTRA_MB_BASE) (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SMB* pCurMb);
-
-void WelsInitSCDPskipFunc (SWelsFuncPtrList* pFuncList, const bool bScrollingDetection);
 
 }
 #endif//WELS_MACROBLOCK_MODE_DECISION_H__
--- a/codec/encoder/core/inc/svc_mode_decision.h
+++ b/codec/encoder/core/inc/svc_mode_decision.h
@@ -64,11 +64,31 @@
 void WelsMdInterMbEnhancelayer (void* pEnc, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache);
 SMB* GetRefMb (SDqLayer* pCurLayer, SMB* pCurMb);
 void SetMvBaseEnhancelayer (SWelsMD* pMd, SMB* pCurMb, const SMB* kpRefMb);
+
+//////////////
+// MD from background detection
+//////////////
+bool WelsMdInterJudgeBGDPskip (void* pEnc, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
+                               bool* bKeepSkip);
+bool WelsMdInterJudgeBGDPskipFalse (void* pEnc, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
+                                    bool* bKeepSkip);
+
+void WelsMdInterUpdateBGDInfo (SDqLayer* pCurLayer,  SMB* pCurMb, const bool kbCollocatedPredFlag,
+                               const int32_t kiRefPictureType);
+void WelsMdInterUpdateBGDInfoNULL (SDqLayer* pCurLayer,  SMB* pCurMb, const bool kbCollocatedPredFlag,
+                                   const int32_t kiRefPictureType);
+
+//////////////
+// MD for screen contents
+//////////////
 bool MdInterSCDPskipProcess (sWelsEncCtx* pEncCtx, SWelsMD* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
                              ESkipModes eSkipMode);
-
 typedef bool (*pJudgeSkipFun) (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, SWelsMD* pWelsMd);
 void SetBlockStaticIdcToMd (void* pVaa, void* pMd, SMB* pCurMb, void* pDqLay);
+void WelsInitSCDPskipFunc (SWelsFuncPtrList* pFuncList, const bool bScrollingDetection);
+
+void SetScrollingMvToMd (void* pVaa, void* pWelsMd);
+void SetScrollingMvToMdNull (void* pVaa, void* pWelsMd);
 }
 #endif //SVC_MODE_DECISION_H
 
--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -145,8 +145,8 @@
  * \return  NONE
  */
 void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice);
-
-
+void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice);
+void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice);
 /*!
  * \brief  BL mb motion estimate initial point testing
  *
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -132,6 +132,7 @@
 
 typedef bool (*PInterMdScrollingPSkipDecisionFunc) (void* pEncCtx, void* pWelsMd, SSlice* slice, SMB* pCurMb,
     SMbCache* pMbCache);
+typedef void (*PSetScrollingMv) (void* pVaa, void* pMd);
 
 typedef void (*PInterMdFunc) (void* pEncCtx, void* pWelsMd, SSlice* slice, SMB* pCurMb, SMbCache* pMbCache);
 
@@ -211,6 +212,7 @@
   PInterMdBackgroundInfoUpdateFunc      pfInterMdBackgroundInfoUpdate;
 
   PInterMdScrollingPSkipDecisionFunc pfSCDPSkipDecision;
+  PSetScrollingMv pfSetScrollingMv;
 
   SMcFunc                sMcFuncs;
   SSampleDealingFunc     sSampleDealingFuncs;
--- a/codec/encoder/core/src/encoder.cpp
+++ b/codec/encoder/core/src/encoder.cpp
@@ -49,6 +49,7 @@
 #include "sample.h"
 
 #include "svc_base_layer_md.h"
+#include "svc_mode_decision.h"
 #include "set_mb_syn_cavlc.h"
 #include "crt_util_safe_x.h"	// Safe CRT routines like utils for cross_platforms
 #include "slice_multi_threading.h"
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -46,6 +46,7 @@
 #include "picture_handle.h"
 #include "svc_base_layer_md.h"
 #include "svc_encode_slice.h"
+#include "svc_mode_decision.h"
 #include "decode_mb_aux.h"
 #include "deblocking.h"
 #include "ref_list_mgr_svc.h"
@@ -527,7 +528,7 @@
         iCountNumNals += kiNumOfSlice;
       assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER);
       if (kiNumOfSlice > MAX_SLICES_NUM) {
-        WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_ERROR,
+        WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
                  "AcquireLayersNals(), num_of_slice(%d) > MAX_SLICES_NUM(%d) per (iDid= %d, qid= %d) settings!\n",
                  kiNumOfSlice, MAX_SLICES_NUM, iDIndex, 0);
         return 1;
@@ -535,7 +536,7 @@
     }
 
     if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) {
-      WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_ERROR,
+      WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
                "AcquireLayersNals(), num_of_nals(%d) > MAX_NAL_UNITS_IN_LAYER(%d) per (iDid= %d, qid= %d) settings!\n",
                (iCountNumNals - iOrgNumNals), MAX_NAL_UNITS_IN_LAYER, iDIndex, 0);
       return 1;
@@ -551,7 +552,7 @@
 
   // to check number of layers / nals / slices dependencies, 12/8/2010
   if (iCountNumLayers > MAX_LAYER_NUM_OF_FRAME) {
-    WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_ERROR, "AcquireLayersNals(), iCountNumLayers(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
+    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, "AcquireLayersNals(), iCountNumLayers(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
              iCountNumLayers, MAX_LAYER_NUM_OF_FRAME);
     return 1;
   }
@@ -955,7 +956,7 @@
                                   & (pDlayerParam->sSliceCfg),
                                   pPps);
       if (iResult) {
-        WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING, "InitDqLayers(), InitSlicePEncCtx failed(%d)!", iResult);
+        WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "InitDqLayers(), InitSlicePEncCtx failed(%d)!", iResult);
         FreeMemorySvc (ppCtx);
         return 1;
       }
@@ -1278,14 +1279,15 @@
   (*ppCtx)->iMvRange = pParam->iUsageType ? EXPANDED_MV_RANGE : CAMERA_STARTMV_RANGE;
   const int32_t kiMvdRange = (pParam->iUsageType ? EXPANDED_MVD_RANGE : ((kiNumDependencyLayers == 1) ? CAMERA_MVD_RANGE :
                               CAMERA_HIGHLAYER_MVD_RANGE));
-  const uint32_t kuiMvdInterTableSize	=  1 + (kiMvdRange << 3);//intepel*4=qpel;  qpel_mv_range*2=(+/-);
-  const uint32_t kuiMvdCacheAlignedSize	= kuiMvdInterTableSize * sizeof (uint16_t);
+  const uint32_t kuiMvdInterTableSize	= (kiMvdRange << 2); //intepel*4=qpel
+  const uint32_t kuiMvdInterTableStride	=  1 + (kuiMvdInterTableSize << 1);//qpel_mv_range*2=(+/-);
+  const uint32_t kuiMvdCacheAlignedSize	= kuiMvdInterTableStride * sizeof (uint16_t);
   int32_t iVclLayersBsSizeCount		= 0;
   int32_t iNonVclLayersBsSizeCount	= 0;
   int32_t iTargetSpatialBsSize			= 0;
 
   if (kiNumDependencyLayers < 1 || kiNumDependencyLayers > MAX_DEPENDENCY_LAYER) {
-    WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc() failed due to invalid iNumDependencyLayers(%d)!\n",
+    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc() failed due to invalid iNumDependencyLayers(%d)!\n",
              kiNumDependencyLayers);
     FreeMemorySvc (ppCtx);
     return 1;
@@ -1292,7 +1294,7 @@
   }
 
   if (pParam->uiGopSize == 0 || (pParam->uiIntraPeriod && ((pParam->uiIntraPeriod % pParam->uiGopSize) != 0))) {
-    WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING,
+    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING,
              "RequestMemorySvc() failed due to invalid uiIntraPeriod(%d) (=multipler of uiGopSize(%d)!",
              pParam->uiIntraPeriod, pParam->uiGopSize);
     FreeMemorySvc (ppCtx);
@@ -1306,7 +1308,7 @@
 
   iResult = AcquireLayersNals (ppCtx, pParam, &iCountLayers, &iCountNals);
   if (iResult) {
-    WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AcquireLayersNals failed(%d)!", iResult);
+    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AcquireLayersNals failed(%d)!", iResult);
     FreeMemorySvc (ppCtx);
     return 1;
   }
@@ -1359,7 +1361,7 @@
 
   // for pSlice bs buffers
   if (pParam->iMultipleThreadIdc > 1 && RequestMtResource (ppCtx, pParam, iCountBsLen, iTargetSpatialBsSize)) {
-    WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMtResource failed!");
+    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMtResource failed!");
     FreeMemorySvc (ppCtx);
     return 1;
   }
@@ -1406,7 +1408,7 @@
 
   // stride tables
   if (AllocStrideTables (ppCtx, kiNumDependencyLayers)) {
-    WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AllocStrideTables failed!");
+    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AllocStrideTables failed!");
     FreeMemorySvc (ppCtx);
     return 1;
   }
@@ -1422,7 +1424,7 @@
     (*ppCtx)->pVaa	= (SVAAFrameInfoExt*)pMa->WelsMallocz (sizeof (SVAAFrameInfoExt), "pVaa");
     WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa), FreeMemorySvc (ppCtx))
     if (RequestMemoryVaaScreen ((*ppCtx)->pVaa, pMa, (*ppCtx)->pSvcParam->iMaxNumRefFrame, iCountMaxMbNum << 2)) {
-      WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMemoryVaaScreen failed!");
+      WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMemoryVaaScreen failed!");
       FreeMemorySvc (ppCtx);
       return 1;
     }
@@ -1471,20 +1473,22 @@
 
   iResult = InitDqLayers (ppCtx);
   if (iResult) {
-    WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitDqLayers failed(%d)!", iResult);
+    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitDqLayers failed(%d)!", iResult);
     FreeMemorySvc (ppCtx);
     return iResult;
   }
 
   if (InitMbListD (ppCtx)) {
-    WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitMbListD failed!");
+    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitMbListD failed!");
     FreeMemorySvc (ppCtx);
     return 1;
   }
 
-  (*ppCtx)->pMvdCostTableInter = (uint16_t*)pMa->WelsMallocz (52 * kuiMvdCacheAlignedSize, "pMvdCostTableInter");
-  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pMvdCostTableInter), FreeMemorySvc (ppCtx))
-  MvdCostInit ((*ppCtx)->pMvdCostTableInter, kuiMvdInterTableSize);  //should put to a better place?
+  (*ppCtx)->iMvdCostTableSize = kuiMvdInterTableSize;
+  (*ppCtx)->iMvdCostTableStride = kuiMvdInterTableStride;
+  (*ppCtx)->pMvdCostTable = (uint16_t*)pMa->WelsMallocz (52 * kuiMvdCacheAlignedSize, "pMvdCostTable");
+  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pMvdCostTable), FreeMemorySvc (ppCtx))
+  MvdCostInit ((*ppCtx)->pMvdCostTable, kuiMvdInterTableStride);  //should put to a better place?
 
   if ((*ppCtx)->ppRefPicListExt[0] != NULL && (*ppCtx)->ppRefPicListExt[0]->pRef[0] != NULL)
     (*ppCtx)->pDecPic				= (*ppCtx)->ppRefPicListExt[0]->pRef[0];
@@ -1741,9 +1745,9 @@
     }
 
     /* MVD cost tables for Inter */
-    if (NULL != pCtx->pMvdCostTableInter) {
-      pMa->WelsFree (pCtx->pMvdCostTableInter, "pMvdCostTableInter");
-      pCtx->pMvdCostTableInter = NULL;
+    if (NULL != pCtx->pMvdCostTable) {
+      pMa->WelsFree (pCtx->pMvdCostTable, "pMvdCostTable");
+      pCtx->pMvdCostTable = NULL;
     }
 
     FreeCodingParam (&pCtx->pSvcParam, pMa);
@@ -1757,7 +1761,7 @@
 #endif//MEMORY_MONITOR
 
     if ((*ppCtx)->pMemAlign != NULL) {
-      WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_INFO, "FreeMemorySvc(), verify memory usage (%d bytes) after free..\n",
+      WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, "FreeMemorySvc(), verify memory usage (%d bytes) after free..\n",
                (*ppCtx)->pMemAlign->WelsGetMemoryUsage());
       delete (*ppCtx)->pMemAlign;
       (*ppCtx)->pMemAlign = NULL;
@@ -2150,7 +2154,8 @@
   if (NULL == ppCtx || NULL == *ppCtx)
     return;
 
-  WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pCtx= %p, iThreadCount= %d, iMultipleThreadIdc= %d.\n",
+  WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO,
+           "WelsUninitEncoderExt(), pCtx= %p, iThreadCount= %d, iMultipleThreadIdc= %d.\n",
            (void*) (*ppCtx), (*ppCtx)->pSvcParam->iCountThreadsNum, (*ppCtx)->pSvcParam->iMultipleThreadIdc);
 
 #if defined(STAT_OUTPUT)
@@ -2168,7 +2173,8 @@
           WelsEventSignal (& (*ppCtx)->pSliceThreading->pExitEncodeEvent[iThreadIdx]);
           WelsEventSignal (& (*ppCtx)->pSliceThreading->pThreadMasterEvent[iThreadIdx]);
           res = WelsThreadJoin ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx]);	// waiting thread exit
-          WelsLog (&(*ppCtx)->sLogCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pthread_join(pThreadHandles%d) return %d..\n", iThreadIdx,
+          WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pthread_join(pThreadHandles%d) return %d..\n",
+                   iThreadIdx,
                    res);
           (*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx] = 0;
         }
@@ -2333,7 +2339,7 @@
                     / (pSliceCtx->iMaxSliceNumConstraint))
        ) {
 
-      WelsLog (&(pCtx->sLogCtx),
+      WelsLog (& (pCtx->sLogCtx),
                WELS_LOG_WARNING,
                "Set-SliceConstraint(%d) too small for current resolution (MB# %d) under QP/BR!\n",
                pSliceCtx->uiSliceSizeConstraint,
@@ -2492,9 +2498,12 @@
   const bool kbHighestSpatialLayer	=
     (pCtx->pSvcParam->iSpatialLayerNum == (pCurLayer->sLayerInfo.sNalHeaderExt.uiDependencyId + 1));
   SWelsFuncPtrList* pFuncList = pCtx->pFuncList;
-  SLogContext* pLogCtx = &(pCtx->sLogCtx);
+  SLogContext* pLogCtx = & (pCtx->sLogCtx);
   /* function pointers conditional assignment under sWelsEncCtx, layer_mb_enc_rec (in stack) is exclusive */
-  if (kbHighestSpatialLayer && pCtx->pSvcParam->iUsageType == CAMERA_VIDEO_REAL_TIME) {
+  if ((pCtx->pSvcParam->iUsageType == CAMERA_VIDEO_REAL_TIME && kbHighestSpatialLayer) ||
+      (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME && P_SLICE == pCtx->eSliceType
+       && kbHighestSpatialLayer) //TODO: here is for sync with the origin code, consider the design again with more tests
+     ) {
     SetFastCodingFunc (pFuncList);
   } else {
     SetNormalCodingFunc (pFuncList);
@@ -2501,7 +2510,9 @@
   }
 
   if (P_SLICE == pCtx->eSliceType) {
-    pFuncList->pfMotionSearch[0]  = WelsMotionEstimateSearch;
+    for (int i = 0; i < BLOCK_STATIC_IDC_ALL; i++) {
+      pFuncList->pfMotionSearch[i] = WelsMotionEstimateSearch;
+    }
     pFuncList->pfSearchMethod[BLOCK_16x16]  =
       pFuncList->pfSearchMethod[BLOCK_16x8] =
         pFuncList->pfSearchMethod[BLOCK_8x16] =
@@ -2509,6 +2520,8 @@
             pFuncList->pfSearchMethod[BLOCK_4x4] = WelsDiamondSearch;
     pFuncList->pfFirstIntraMode = WelsMdFirstIntraMode;
     pFuncList->sSampleDealingFuncs.pfMeCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd;
+    pFuncList->pfSetScrollingMv = SetScrollingMvToMdNull;
+
     if (kbHighestSpatialLayer) {
       pFuncList->pfCalculateSatd = NotCalculateSatdCost;
       pFuncList->pfInterFineMd = WelsMdInterFinePartitionVaa;
@@ -2518,7 +2531,6 @@
     }
   }
 
-  return;
   //to init at each frame will be needed when dealing with hybrid content (camera+screen)
   if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) {
     if (P_SLICE == pCtx->eSliceType) {
@@ -2526,6 +2538,17 @@
       pFuncList->pfInterFineMd = WelsMdInterFinePartitionVaaOnScreen;
 
       //ME related func pointers
+      SVAAFrameInfoExt* pVaaExt		= static_cast<SVAAFrameInfoExt*> (pCtx->pVaa);
+      if (pVaaExt->sScrollDetectInfo.bScrollDetectFlag
+          && (pVaaExt->sScrollDetectInfo.iScrollMvX | pVaaExt->sScrollDetectInfo.iScrollMvY)) {
+        pFuncList->pfSetScrollingMv = SetScrollingMvToMd;
+      } else {
+        pFuncList->pfSetScrollingMv = SetScrollingMvToMdNull;
+      }
+
+      pFuncList->pfMotionSearch[NO_STATIC] = WelsMotionEstimateSearch;
+      pFuncList->pfMotionSearch[COLLOCATED_STATIC] = WelsMotionEstimateSearchStatic;
+      pFuncList->pfMotionSearch[SCROLLED_STATIC] = WelsMotionEstimateSearchScrolled;
       //ME16x16
       if (!SetMeMethod (ME_DIA_CROSS, pFuncList->pfSearchMethod[BLOCK_16x16])) {
         WelsLog (pLogCtx, WELS_LOG_WARNING, "SetMeMethod(BLOCK_16x16) ME_DIA_CROSS unsuccessful, switched to default search\n");
@@ -2547,8 +2570,7 @@
         pFeatureSearchPreparation->pRefBlockFeature = pScreenBlockFeatureStorage;
         if (pFeatureSearchPreparation->bFMESwitchFlag
             && !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
-          //TODO: use ORIGIN of reference when preprocessing is ready
-          PerformFMEPreprocess (pFuncList, pCurLayer->pRefPic, pFeatureSearchPreparation->pFeatureOfBlock,
+          PerformFMEPreprocess (pFuncList, pCurLayer->pRefOri[0], pFeatureSearchPreparation->pFeatureOfBlock,
                                 pScreenBlockFeatureStorage);
         }
 
@@ -2556,7 +2578,8 @@
         if (pFeatureSearchPreparation->bFMESwitchFlag && pScreenBlockFeatureStorage->bRefBlockFeatureCalculated
             && (!pScreenBlockFeatureStorage->iIs16x16)) {
           if (!SetMeMethod (ME_DIA_CROSS_FME, pFuncList->pfSearchMethod[BLOCK_8x8])) {
-            WelsLog (pLogCtx, WELS_LOG_WARNING, "SetMeMethod(BLOCK_8x8) ME_DIA_CROSS_FME unsuccessful, switched to default search\n");
+            WelsLog (pLogCtx, WELS_LOG_WARNING,
+                     "SetMeMethod(BLOCK_8x8) ME_DIA_CROSS_FME unsuccessful, switched to default search\n");
           }
         }
 
@@ -2833,7 +2856,7 @@
 
   if ((pBs->pBufEnd - pBs->pBufPtr) < iLen || iNal >= pCtx->pOut->iCountNals) {
 #if GOM_TRACE_FLAG
-    WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR,
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
              "[RC] paddingcal pBuffer overflow, bufferlen=%lld, paddinglen=%d, iNalIdx= %d, iCountNals= %d\n",
              static_cast<long long int> (pBs->pBufEnd - pBs->pBufPtr), iLen, iNal, pCtx->pOut->iCountNals);
 #endif
@@ -2947,7 +2970,7 @@
   int8_t iCurDid						= 0;
   int8_t iCurTid						= 0;
   bool bAvcBased					= false;
-  SLogContext* pLogCtx = &(pCtx->sLogCtx);
+  SLogContext* pLogCtx = & (pCtx->sLogCtx);
 #if defined(ENABLE_PSNR_CALC)
   float fSnrY = .0f, fSnrU = .0f, fSnrV = .0f;
 #endif//ENABLE_PSNR_CALC
@@ -3366,6 +3389,12 @@
       PerformDeblockingFilter (pCtx);
     }
 
+    pCtx->pFuncList->pfRc.pfWelsRcPictureInfoUpdate (pCtx, iLayerSize);
+    pCtx->pDecPic->iFrameAverageQp = pCtx->pWelsSvcRc->iAverageFrameQp;
+
+    //update scc related
+    pCtx->pFuncList->pfUpdateFMESwitch (pCtx->pCurDqLayer);
+
     // reference picture list update
     if (eNalRefIdc != NRI_PRI_LOWEST) {
       if (!pCtx->pFuncList->pUpdateRefList (pCtx)) {
@@ -3379,8 +3408,6 @@
     }
 
     iFrameSize += iLayerSize;
-
-    pCtx->pFuncList->pfRc.pfWelsRcPictureInfoUpdate (pCtx, iLayerSize);
 
 #ifdef ENABLE_FRAME_DUMP
     // Dump reconstruction picture for each sQualityStat layer
--- a/codec/encoder/core/src/ratectl.cpp
+++ b/codec/encoder/core/src/ratectl.cpp
@@ -348,11 +348,11 @@
 #ifdef _TEST_TEMP_Rc_
     fprintf (fp_vgop, "%d\n", WELS_ROUND ((double)iTotalBits / iFrameInVGop));
 #endif
-    WelsLog (&(pEncCtx->sLogCtx), WELS_LOG_INFO, "[Rc] VGOPbitrate%d: %d \n", kiDid, iVGopBitrate);
+    WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_INFO, "[Rc] VGOPbitrate%d: %d \n", kiDid, iVGopBitrate);
     if (iTotalBits > 0) {
       iTid = 0;
       while (iTid <= kiHighestTid) {
-        WelsLog (&(pEncCtx->sLogCtx), WELS_LOG_INFO, "T%d=%8.3f \n", iTid, (double) (pTOverRc[iTid].iGopBitsDq / iTotalBits));
+        WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_INFO, "T%d=%8.3f \n", iTid, (double) (pTOverRc[iTid].iGopBitsDq / iTotalBits));
         ++ iTid;
       }
     }
@@ -756,7 +756,7 @@
 void RcTraceFrameBits (sWelsEncCtx* pEncCtx) {
   SWelsSvcRc* pWelsSvcRc = &pEncCtx->pWelsSvcRc[pEncCtx->uiDependencyId];
 
-  WelsLog (&(pEncCtx->sLogCtx), WELS_LOG_INFO,
+  WelsLog (& (pEncCtx->sLogCtx), WELS_LOG_INFO,
            "[Rc] encoding_qp%d, qp = %3d, index = %8d, iTid = %1d, used = %8d, target = %8d, remaingbits = %8d\n",
            pEncCtx->uiDependencyId, pWelsSvcRc->iAverageFrameQp, pEncCtx->iFrameIndex, pEncCtx->uiTemporalId,
            pWelsSvcRc->iFrameDqBits,
--- a/codec/encoder/core/src/ref_list_mgr_svc.cpp
+++ b/codec/encoder/core/src/ref_list_mgr_svc.cpp
@@ -49,6 +49,9 @@
     pRef->uiRecieveConfirmed = RECIEVE_FAILED;
     pRef->iMarkFrameNum = -1;
     pRef->bUsedAsRef	= false;
+
+    if (NULL != pRef->pScreenBlockFeatureStorage)
+      pRef->pScreenBlockFeatureStorage->bRefBlockFeatureCalculated	= false;
   }
 }
 
@@ -168,7 +171,7 @@
   SLTRState* pLtr = &pCtx->pLtr[pCtx->uiDependencyId];
   int32_t iMaxFrameNumPlus1 = (1 << pCtx->pSps->uiLog2MaxFrameNum);
   int32_t i;
-  SLogContext* pLogCtx = &(pCtx->sLogCtx);
+  SLogContext* pLogCtx = & (pCtx->sLogCtx);
 
   for (i = 0; i < LONG_TERM_REF_NUM; i++) {
     if (pLongRefList[i] != NULL)	{
@@ -211,7 +214,7 @@
   int32_t i, j;
 
   if (pLtr->uiLtrMarkState == LTR_MARKING_SUCCESS) {
-    WelsLog (&(pCtx->sLogCtx), WELS_LOG_WARNING,
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
              "pLtr->uiLtrMarkState = %d, pLtr.iCurLtrIdx = %d , pLtr->iLtrMarkFbFrameNum = %d ,pCtx->iFrameNum = %d ",
              pLtr->uiLtrMarkState, pLtr->iCurLtrIdx, pLtr->iLtrMarkFbFrameNum, pCtx->iFrameNum);
     for (i = 0; i < pRefList->uiLongRefCount; i++)	{
@@ -234,7 +237,7 @@
         pLtr->iLTRMarkSuccessNum++;
         pLtr->iCurLtrIdx = (pLtr->iCurLtrIdx + 1) % LONG_TERM_REF_NUM;
         pLtr->iLTRMarkMode = (pLtr->iLTRMarkSuccessNum >= (LONG_TERM_REF_NUM)) ? (LTR_DELAY_MARK) : (LTR_DIRECT_MARK);
-        WelsLog (&(pCtx->sLogCtx), WELS_LOG_WARNING, "LTR mark mode =%d", pLtr->iLTRMarkMode);
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "LTR mark mode =%d", pLtr->iLTRMarkMode);
         pLtr->bLTRMarkEnable = true;
         break;
       }
@@ -523,12 +526,12 @@
         pLtr->bReceivedT0LostFlag = true;
         pLtr->iLastCorFrameNumDec = pRequest->iLastCorrectFrameNum;
         pLtr->iCurFrameNumInDec = pRequest->iCurrentFrameNum;
-        WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO,
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
                  "Receive valid LTR recovery pRequest,feedback_type = %d ,uiIdrPicId = %d , current_frame_num = %d , last correct frame num = %d"
                  , pRequest->uiFeedbackType, pRequest->uiIDRPicId, pRequest->iCurrentFrameNum, pRequest->iLastCorrectFrameNum);
       }
 
-      WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO,
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
                "Receive LTR recovery pRequest,feedback_type = %d ,uiIdrPicId = %d , current_frame_num = %d , last correct frame num = %d"
                , pRequest->uiFeedbackType, pRequest->uiIDRPicId, pRequest->iCurrentFrameNum, pRequest->iLastCorrectFrameNum);
     }
@@ -546,13 +549,13 @@
             || pLTRMarkingFeedback->uiFeedbackType == LTR_MARKING_FAILED)) { // avoid error pData
       pLtr->uiLtrMarkState = pLTRMarkingFeedback->uiFeedbackType;
       pLtr->iLtrMarkFbFrameNum =  pLTRMarkingFeedback->iLTRFrameNum ;
-      WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO,
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
                "Receive valid LTR marking feedback, feedback_type = %d , uiIdrPicId = %d , LTR_frame_num = %d , cur_idr_pic_id = %d",
                pLTRMarkingFeedback->uiFeedbackType, pLTRMarkingFeedback->uiIDRPicId, pLTRMarkingFeedback->iLTRFrameNum ,
                pCtx->sPSOVector.uiIdrPicId);
 
     } else {
-      WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO,
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
                "Receive LTR marking feedback, feedback_type = %d , uiIdrPicId = %d , LTR_frame_num = %d , cur_idr_pic_id = %d",
                pLTRMarkingFeedback->uiFeedbackType, pLTRMarkingFeedback->uiIDRPicId, pLTRMarkingFeedback->iLTRFrameNum ,
                pCtx->sPSOVector.uiIdrPicId);
@@ -586,7 +589,8 @@
         if (pRefList->pLongRefList[i]->uiRecieveConfirmed == RECIEVE_SUCCESS)	{
           pCtx->pRefList0[pCtx->iNumRef0++] = pRefList->pLongRefList[i];
           pLtr->iLastRecoverFrameNum = pCtx->iFrameNum;
-          WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO, "pRef is int32_t !iLastRecoverFrameNum = %d, pRef iFrameNum = %d,LTR number = %d,",
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+                   "pRef is int32_t !iLastRecoverFrameNum = %d, pRef iFrameNum = %d,LTR number = %d,",
                    pLtr->iLastRecoverFrameNum, pCtx->pRefList0[0]->iFrameNum, pRefList->uiLongRefCount);
           break;
         }
@@ -596,7 +600,8 @@
         SPicture* pRef = pRefList->pShortRefList[i];
         if (pRef != NULL && pRef->bUsedAsRef && pRef->iFramePoc >= 0 && pRef->uiTemporalId <= kuiTid) {
           pCtx->pRefList0[pCtx->iNumRef0++]	= pRef;
-          WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO, "WelsBuildRefList pCtx->uiTemporalId = %d,pRef->iFrameNum = %d,pRef->uiTemporalId = %d\n",
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+                   "WelsBuildRefList pCtx->uiTemporalId = %d,pRef->iFrameNum = %d,pRef->uiTemporalId = %d\n",
                    pCtx->uiTemporalId, pRef->iFrameNum, pRef->uiTemporalId);
           break;
         }
@@ -639,9 +644,10 @@
     if (pCtx->iNumRef0 > 0) {
       if ((!pCtx->pRefList0[0]->bIsLongRef) || (!pCtx->pSvcParam->bEnableLongTermReference)) {
         if (iAbsDiffPicNumMinus1 < 0) {
-          WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO, "WelsUpdateRefSyntax():::uiAbsDiffPicNumMinus1:%d\n", iAbsDiffPicNumMinus1);
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "WelsUpdateRefSyntax():::uiAbsDiffPicNumMinus1:%d\n", iAbsDiffPicNumMinus1);
           iAbsDiffPicNumMinus1 += (1 << (pCtx->pSps->uiLog2MaxFrameNum));
-          WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO, "WelsUpdateRefSyntax():::uiAbsDiffPicNumMinus1< 0, update as:%d\n", iAbsDiffPicNumMinus1);
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "WelsUpdateRefSyntax():::uiAbsDiffPicNumMinus1< 0, update as:%d\n",
+                   iAbsDiffPicNumMinus1);
         }
 
         pRefReorder->SReorderingSyntax[0].uiReorderingOfPicNumsIdc = 0;
@@ -770,11 +776,11 @@
           if (pRefPic->uiTemporalId <= pCtx->uiTemporalId && (!pCtx->bCurFrameMarkedAsSceneLtr || pRefPic->bIsSceneLTR)) {
             pCtx->pCurDqLayer->pRefOri[pCtx->iNumRef0] = pRefOri;
             pCtx->pRefList0[pCtx->iNumRef0++] = pRefPic;
-            WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO,
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
                      "WelsBuildRefListScreen(), ref !current iFrameNum = %d, ref iFrameNum = %d,LTR number = %d,iNumRef = %d ref is Scene LTR = %d\n",
                      pCtx->iFrameNum, pCtx->pRefList0[pCtx->iNumRef0 - 1]->iFrameNum, pRefList->uiLongRefCount, iNumRef,
                      pRefPic->bIsSceneLTR);
-            WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO,
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
                      "WelsBuildRefListScreen pCtx->uiTemporalId = %d,pRef->iFrameNum = %d,pRef->uiTemporalId = %d\n",
                      pCtx->uiTemporalId, pRefPic->iFrameNum, pRefPic->uiTemporalId);
           }
@@ -787,7 +793,7 @@
                      || pRefList->pLongRefList[i]->uiTemporalId < pCtx->uiTemporalId)	{
             pCtx->pCurDqLayer->pRefOri[pCtx->iNumRef0] = pRefOri;
             pCtx->pRefList0[pCtx->iNumRef0++] = pRefList->pLongRefList[i];
-            WelsLog (&(pCtx->sLogCtx), WELS_LOG_INFO,
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
                      "WelsBuildRefListScreen(), ref !current iFrameNum = %d, ref iFrameNum = %d,LTR number = %d\n",
                      pCtx->iFrameNum, pCtx->pRefList0[pCtx->iNumRef0 - 1]->iFrameNum, pRefList->uiLongRefCount);
             break;
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -211,7 +211,7 @@
     iNumMbInEachGom = pWelsSvcRc->iNumberMbGom;
 
     if (iNumMbInEachGom <= 0) {
-      WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR,
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
                "[MT] DynamicAdjustSlicing(), invalid iNumMbInEachGom= %d from RC, iDid= %d, iCountNumMb= %d\n", iNumMbInEachGom,
                iCurDid, kiCountNumMb);
       return;
@@ -940,7 +940,7 @@
       WelsEventSignal (
         &pEncPEncCtx->pSliceThreading->pFinUpdateMbListEvent[iEventIdx]);	// mean finished update pMb list for this pSlice
     } else { // WELS_THREAD_ERROR_WAIT_TIMEOUT, or WELS_THREAD_ERROR_WAIT_FAILED
-      WelsLog (&(pEncPEncCtx->sLogCtx), WELS_LOG_WARNING,
+      WelsLog (& (pEncPEncCtx->sLogCtx), WELS_LOG_WARNING,
                "[MT] CodingSliceThreadProc(), waiting pReadySliceCodingEvent[%d] failed(%d) and thread%d terminated!\n", iEventIdx,
                iWaitRet, iThreadIdx);
       uiThrdRet	= 1;
@@ -978,7 +978,7 @@
   const int32_t kiEventCnt = uiNumThreads;
 
   if (pPriData == NULL || pLbi == NULL || kiEventCnt <= 0 || pEventsList == NULL) {
-    WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR,
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
              "FiredSliceThreads(), fail due pPriData == %p || pLbi == %p || iEventCnt(%d) <= 0 || pEventsList == %p!!\n",
              (void*)pPriData, (void*)pLbi, uiNumThreads, (void*)pEventsList);
     return 1;
--- a/codec/encoder/core/src/svc_base_layer_md.cpp
+++ b/codec/encoder/core/src/svc_base_layer_md.cpp
@@ -358,7 +358,7 @@
   ST32 (&pCurMb->sP16x16Mv, 0);
   ST32 (&pCurLayer->pDecPic->sMvList[kiMbXY], 0);
 
-  SetMvWithinIntegerMvRange (kiMbWidth, kiMbHeight, kiMbX, kiMbY, CAMERA_STARTMV_RANGE, & (pSlice->sMvStartMin),
+  SetMvWithinIntegerMvRange (kiMbWidth, kiMbHeight, kiMbX, kiMbY, pEncCtx->iMvRange, & (pSlice->sMvStartMin),
                              & (pSlice->sMvStartMax));
 }
 
@@ -1118,7 +1118,7 @@
     pSlice->uiMvcNum = 1;
 
     PredMv (&pMbCache->sMvComponents, i << 2, 2, pWelsMd->uiRef, & (sMe8x8->sMvp));
-    pFunc->pfMotionSearch[0] (pFunc, pCurDqLayer, sMe8x8, pSlice);
+    pFunc->pfMotionSearch[pWelsMd->iBlock8x8StaticIdc[i]] (pFunc, pCurDqLayer, sMe8x8, pSlice);
     UpdateP8x8Motion2Cache (pMbCache, i << 2, pWelsMd->uiRef, & (sMe8x8->sMv));
     iCostP8x8 += sMe8x8->uiSatdCost;
 //		sMe8x8++;
@@ -1753,73 +1753,6 @@
 
 
 
-
-//////
-//  try the BGD Pskip
-//////
-bool WelsMdInterJudgeBGDPskip (void* pCtx, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
-                               bool* bKeepSkip) {
-  sWelsEncCtx* pEncCtx = (sWelsEncCtx*)pCtx;
-  SWelsMD* pWelsMd = (SWelsMD*)pMd;
-
-  SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
-
-  const int32_t kiRefMbQp = pCurDqLayer->pRefPic->pRefMbQp[pCurMb->iMbXY];
-  const int32_t kiCurMbQp = pCurMb->uiLumaQp;// unsigned -> signed
-  int8_t*	pVaaBgMbFlag = pEncCtx->pVaa->pVaaBackgroundMbFlag + pCurMb->iMbXY;
-
-  const int32_t kiMbWidth = pCurDqLayer->iMbWidth;
-
-  *bKeepSkip = (*bKeepSkip) &&
-               ((!pVaaBgMbFlag[-1]) &&
-                (!pVaaBgMbFlag[-kiMbWidth]) &&
-                (!pVaaBgMbFlag[-kiMbWidth + 1]));
-
-  if (
-    *pVaaBgMbFlag
-    && !IS_INTRA (pMbCache->uiRefMbType)
-    && (kiRefMbQp - kiCurMbQp <= DELTA_QP_BGD_THD || kiRefMbQp <= 26)
-  ) {
-    SMVUnitXY	sVaaPredSkipMv = { 0 };
-    PredSkipMv (pMbCache, &sVaaPredSkipMv);
-    WelsMdBackgroundMbEnc (pEncCtx, pWelsMd, pCurMb, pMbCache, pSlice, (LD32 (&sVaaPredSkipMv) == 0));
-    return true;
-  }
-
-  return false;
-}
-
-bool WelsMdInterJudgeBGDPskipFalse (void* pCtx, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
-                                    bool* bKeepSkip) {
-  return false;
-}
-
-
-
-//////
-//  update BGD related info
-//////
-void WelsMdInterUpdateBGDInfo (SDqLayer* pCurLayer,  SMB* pCurMb, const bool bCollocatedPredFlag,
-                               const int32_t iRefPictureType) {
-  uint8_t* pTargetRefMbQpList = (pCurLayer->pDecPic->pRefMbQp);
-  const int32_t kiMbXY = pCurMb->iMbXY;
-
-  if (pCurMb->uiCbp || I_SLICE == iRefPictureType || 0 == bCollocatedPredFlag) {
-    pTargetRefMbQpList[kiMbXY] = pCurMb->uiLumaQp;
-  } else { //unchange, do not need to evaluation?
-    uint8_t* pRefPicRefMbQpList = (pCurLayer->pRefPic->pRefMbQp);
-    pTargetRefMbQpList[kiMbXY] = pRefPicRefMbQpList[kiMbXY];
-  }
-
-  if (pCurMb->uiMbType == MB_TYPE_BACKGROUND) {
-    pCurMb->uiMbType = MB_TYPE_SKIP;
-  }
-}
-
-void WelsMdInterUpdateBGDInfoNULL (SDqLayer* pCurLayer, SMB* pCurMb, const bool bCollocatedPredFlag,
-                                   const int32_t iRefPictureType) {
-}
-
 //
 //
 //
@@ -1843,7 +1776,8 @@
   if (bSkip) {
     WelsMdInterDecidedPskip (pEncCtx,  pSlice,  pCurMb, pMbCache);
   } else {
-    //Step 2: ILFMD in P
+    //Step 3: SubP16 MD
+    pEncCtx->pFuncList->pfSetScrollingMv (pEncCtx->pVaa, pWelsMd); //SCC
     pEncCtx->pFuncList->pfInterFineMd (pEncCtx, pWelsMd, pSlice, pCurMb, pWelsMd->iCostLuma);
 
     //refinement for inter type
@@ -1874,84 +1808,5 @@
   WelsIMbChromaEncode (pEncCtx, pCurMb, pMbCache);  //add pEnc&rec to MD--2010.3.15
   pCurMb->pSadCost[0] = 0;
 }
-
-//
-//func pointer of inter MD for sub16x16 INTER MD for screen content coding
-//
-static inline void MergeSub16Me (const SWelsME& sSrcMe0, const SWelsME& sSrcMe1, SWelsME* pTarMe) {
-  memcpy (pTarMe, &sSrcMe0, sizeof (sSrcMe0)); // confirmed_safe_unsafe_usage
-
-  pTarMe->uiSadCost = sSrcMe0.uiSadCost + sSrcMe1.uiSadCost;//not precise cost since MVD cost is not the same
-  pTarMe->uiSatdCost = sSrcMe0.uiSatdCost + sSrcMe1.uiSatdCost;//not precise cost since MVD cost is not the same
-}
-static inline bool IsSameMv (const SMVUnitXY& sMv0, const SMVUnitXY& sMv1) {
-  return ((sMv0.iMvX == sMv1.iMvX) && (sMv0.iMvY == sMv1.iMvY));
-}
-bool TryModeMerge (SMbCache* pMbCache, SWelsMD* pWelsMd, SMB* pCurMb) {
-  SWelsME* pMe8x8 = & (pWelsMd->sMe.sMe8x8[0]);
-  const bool bSameMv16x8_0 = IsSameMv (pMe8x8[0].sMv, pMe8x8[1].sMv);
-  const bool bSameMv16x8_1 = IsSameMv (pMe8x8[2].sMv, pMe8x8[3].sMv);
-
-  const bool bSameMv8x16_0 = IsSameMv (pMe8x8[0].sMv, pMe8x8[2].sMv);
-  const bool bSameMv8x16_1 = IsSameMv (pMe8x8[1].sMv, pMe8x8[3].sMv);
-  //need to consider iRefIdx when multi ref is available
-  const bool bSameRefIdx16x8_0 = true; //pMe8x8[0].iRefIdx == pMe8x8[1].iRefIdx;
-  const bool bSameRefIdx16x8_1 = true; //pMe8x8[2].iRefIdx == pMe8x8[3].iRefIdx;
-  const bool bSameRefIdx8x16_0 = true; //pMe8x8[0].iRefIdx == pMe8x8[2].iRefIdx;
-  const bool bSameRefIdx8x16_1 = true; //pMe8x8[1].iRefIdx == pMe8x8[3].iRefIdx;
-  const int32_t iSameMv = (bSameMv16x8_0 << 7) | (bSameRefIdx16x8_0 << 6) | (bSameMv16x8_1 << 5) |
-                          (bSameRefIdx16x8_1 << 4) |
-                          (bSameMv8x16_0 << 3) | (bSameRefIdx8x16_0 << 2) | (bSameMv8x16_1 << 1) | (bSameRefIdx8x16_1);
-
-  switch (iSameMv) {
-  case 0xF0:
-    pCurMb->uiMbType = MB_TYPE_16x8;
-    MergeSub16Me (pMe8x8[0], pMe8x8[1], & (pWelsMd->sMe.sMe16x8[0]));
-    MergeSub16Me (pMe8x8[2], pMe8x8[3], & (pWelsMd->sMe.sMe16x8[1]));
-    PredInter16x8Mv (pMbCache, 0, 0, & (pWelsMd->sMe.sMe16x8[0].sMvp));
-    PredInter16x8Mv (pMbCache, 8, 0, & (pWelsMd->sMe.sMe16x8[1].sMvp));
-    break;
-  case 0x0F:
-    pCurMb->uiMbType = MB_TYPE_8x16;
-    MergeSub16Me (pMe8x8[0], pMe8x8[2], & (pWelsMd->sMe.sMe8x16[0]));
-    MergeSub16Me (pMe8x8[1], pMe8x8[3], & (pWelsMd->sMe.sMe8x16[1]));
-    PredInter8x16Mv (pMbCache, 0, 0, & (pWelsMd->sMe.sMe8x16[0].sMvp));
-    PredInter8x16Mv (pMbCache, 4, 0, & (pWelsMd->sMe.sMe8x16[1].sMvp));
-    break;
-  case 0xFF:
-  //MERGE_16x16
-  //from test results of multiple sequences show that using the following 0x0F to merge 16x16
-  //for some seq there is BR saving some loss
-  //on the whole the BR will increase little bit
-  //to save complexity we decided not to merge 16x16 at present (10/12/2012)
-  default:
-    break;
-  }
-  return (MB_TYPE_8x8 != pCurMb->uiMbType);
-}
-
-
-void WelsMdInterFinePartitionVaaOnScreen (void* pEnc, void* pMd, SSlice* pSlice, SMB* pCurMb, int32_t iBestCost) {
-  sWelsEncCtx* pEncCtx = (sWelsEncCtx*)pEnc;
-  SWelsMD* pWelsMd = (SWelsMD*)pMd;
-  SMbCache* pMbCache = &pSlice->sMbCacheInfo;
-  SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
-  int32_t iCostP8x8;
-  uint8_t uiMbSign = pEncCtx->pFuncList->pfGetMbSignFromInterVaa (&pEncCtx->pVaa->sVaaCalcInfo.pSad8x8[pCurMb->iMbXY][0]);
-
-  if (MBVAASIGN_FLAT == uiMbSign) {
-    return;
-  }
-
-  iCostP8x8 = WelsMdP8x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice);
-  if (iCostP8x8 < iBestCost) {
-    iBestCost = iCostP8x8;
-    pCurMb->uiMbType = MB_TYPE_8x8;
-
-    TryModeMerge (pMbCache, pWelsMd, pCurMb);
-  }
-  pWelsMd->iCostLuma = iBestCost;
-}
-
 
 } // namespace WelsSVCEnc
--- a/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/codec/encoder/core/src/svc_encode_slice.cpp
@@ -921,10 +921,10 @@
 ///////////////
 //  pMb loop
 ///////////////
-inline void WelsInitInterMDStruc (const SMB* pCurMb, uint16_t* pMvdCostTableInter, const int32_t kiMvdInterTableStride,
+inline void WelsInitInterMDStruc (const SMB* pCurMb, uint16_t* pMvdCostTable, const int32_t kiMvdInterTableStride,
                                   SWelsMD* pMd) {
   pMd->iLambda = g_kiQpCostTable[pCurMb->uiLumaQp];
-  pMd->pMvdCost = &pMvdCostTableInter[pCurMb->uiLumaQp * kiMvdInterTableStride];
+  pMd->pMvdCost = &pMvdCostTable[pCurMb->uiLumaQp * kiMvdInterTableStride];
   pMd->	iMbPixX = (pCurMb->iMbX << 4);
   pMd->	iMbPixY = (pCurMb->iMbY << 4);
   memset (&pMd->iBlock8x8StaticIdc[0], 0, sizeof (pMd->iBlock8x8StaticIdc));
@@ -943,9 +943,8 @@
   int32_t	iCurMbIdx			= -1;
   int32_t	iMbSkipRun			= 0;
   const int32_t kiTotalNumMb	= pCurLayer->iMbWidth * pCurLayer->iMbHeight;
-  const int32_t kiMvdInterTableSize	= (pEncCtx->pSvcParam->iSpatialLayerNum == 1 ? 648 : 972);
-  const int32_t kiMvdInterTableStride = 1 + (kiMvdInterTableSize << 1);
-  uint16_t* pMvdCostTableInter		= &pEncCtx->pMvdCostTableInter[kiMvdInterTableSize];
+  const int32_t kiMvdInterTableStride =  pEncCtx->iMvdCostTableStride;
+  uint16_t* pMvdCostTable		= &pEncCtx->pMvdCostTable[pEncCtx->iMvdCostTableSize];
   const int32_t kiSliceIdx				= pSlice->uiSliceIdx;
   const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset;
   int32_t iEncReturn = ENC_RETURN_SUCCESS;
@@ -965,7 +964,7 @@
     WelsMdInterInit (pEncCtx, pSlice, pCurMb, kiSliceFirstMbXY);
 
 TRY_REENCODING:
-    WelsInitInterMDStruc (pCurMb, pMvdCostTableInter, kiMvdInterTableStride, pMd);
+    WelsInitInterMDStruc (pCurMb, pMvdCostTable, kiMvdInterTableStride, pMd);
     pEncCtx->pFuncList->pfInterMd (pEncCtx, pMd, pSlice, pCurMb, pMbCache);
     //mb_qp
 
@@ -1041,7 +1040,7 @@
   int32_t	iMbSkipRun			= 0;
   const int32_t kiMvdInterTableSize	= (pEncCtx->pSvcParam->iSpatialLayerNum == 1 ? 648 : 972);
   const int32_t kiMvdInterTableStride = 1 + (kiMvdInterTableSize << 1);
-  uint16_t* pMvdCostTableInter		= &pEncCtx->pMvdCostTableInter[kiMvdInterTableSize];
+  uint16_t* pMvdCostTable		= &pEncCtx->pMvdCostTable[kiMvdInterTableSize];
   const int32_t kiSliceIdx				= pSlice->uiSliceIdx;
   const int32_t kiPartitionId			= (kiSliceIdx % pEncCtx->iActiveThreadsNum);
   const uint8_t kuiChromaQpIndexOffset = pCurLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset;
@@ -1074,7 +1073,7 @@
     WelsMdInterInit (pEncCtx, pSlice, pCurMb, kiSliceFirstMbXY);
 
 TRY_REENCODING:
-    WelsInitInterMDStruc (pCurMb, pMvdCostTableInter, kiMvdInterTableStride, pMd);
+    WelsInitInterMDStruc (pCurMb, pMvdCostTable, kiMvdInterTableStride, pMd);
     pEncCtx->pFuncList->pfInterMd (pEncCtx, pMd, pSlice, pCurMb, pMbCache);
     //mb_qp
 
--- a/codec/encoder/core/src/svc_mode_decision.cpp
+++ b/codec/encoder/core/src/svc_mode_decision.cpp
@@ -47,39 +47,9 @@
 
 namespace WelsSVCEnc {
 
-//
-// md in enhancement layer
-///
-
-inline bool IsMbStatic (int32_t* pBlockType, EStaticBlockIdc eType) {
-  return (pBlockType != NULL &&
-          eType == pBlockType[0] &&
-          eType == pBlockType[1] &&
-          eType == pBlockType[2] &&
-          eType == pBlockType[3]);
-}
-inline bool IsMbCollocatedStatic (int32_t* pBlockType) {
-  return IsMbStatic (pBlockType, COLLOCATED_STATIC);
-}
-
-inline bool IsMbScrolledStatic (int32_t* pBlockType) {
-  return IsMbStatic (pBlockType, SCROLLED_STATIC);
-}
-
-inline int32_t CalUVSadCost (SWelsFuncPtrList* pFunc, uint8_t* pEncOri, int32_t iStrideUV, uint8_t* pRefOri,
-                             int32_t iRefLineSize) {
-  return pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pEncOri, iStrideUV, pRefOri, iRefLineSize);
-}
-
-inline bool CheckBorder (int32_t iMbX, int32_t iMbY, int32_t iScrollMvX, int32_t iScrollMvY, int32_t iMbWidth,
-                         int32_t iMbHeight) {
-  return ((iMbX << 4) + iScrollMvX < 0 ||
-          (iMbX << 4) + iScrollMvX > (iMbWidth - 1) << 4 ||
-          (iMbY << 4) + iScrollMvY < 0 ||
-          (iMbY << 4) + iScrollMvY > (iMbHeight - 1) << 4
-         ); //border check for safety
-}
-
+//////////////
+// MD for enhancement layers
+//////////////
 void WelsMdSpatialelInterMbIlfmdNoilp (sWelsEncCtx* pEncCtx, SWelsMD* pWelsMd, SSlice* pSlice,
                                        SMB* pCurMb, const Mb_Type kuiRefMbType) {
   SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
@@ -149,10 +119,8 @@
   WelsMdSpatialelInterMbIlfmdNoilp (pEncCtx, pWelsMd, pSlice, pCurMb, kuiInterLayerRefMbType); //MD process
 }
 
-///////////////////////
-// do initiation for noILP (needed by ILFMD)
-////////////////////////
 
+// do initiation for noILP (needed by ILFMD)
 SMB* GetRefMb (SDqLayer* pCurLayer, SMB* pCurMb) {
   const SDqLayer*  kpRefLayer		= pCurLayer->pRefLayer;
   const int32_t  kiRefMbIdx = (pCurMb->iMbY >> 1) * kpRefLayer->iMbWidth + (pCurMb->iMbX >>
@@ -184,6 +152,111 @@
   }
 }
 
+
+
+//////////////
+// MD for Background decision
+//////////////
+//////
+//  try the BGD Pskip
+//////
+bool WelsMdInterJudgeBGDPskip (void* pCtx, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
+                               bool* bKeepSkip) {
+  sWelsEncCtx* pEncCtx = (sWelsEncCtx*)pCtx;
+  SWelsMD* pWelsMd = (SWelsMD*)pMd;
+
+  SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
+
+  const int32_t kiRefMbQp = pCurDqLayer->pRefPic->pRefMbQp[pCurMb->iMbXY];
+  const int32_t kiCurMbQp = pCurMb->uiLumaQp;// unsigned -> signed
+  int8_t*	pVaaBgMbFlag = pEncCtx->pVaa->pVaaBackgroundMbFlag + pCurMb->iMbXY;
+
+  const int32_t kiMbWidth = pCurDqLayer->iMbWidth;
+
+  *bKeepSkip = (*bKeepSkip) &&
+               ((!pVaaBgMbFlag[-1]) &&
+                (!pVaaBgMbFlag[-kiMbWidth]) &&
+                (!pVaaBgMbFlag[-kiMbWidth + 1]));
+
+  if (
+    *pVaaBgMbFlag
+    && !IS_INTRA (pMbCache->uiRefMbType)
+    && (kiRefMbQp - kiCurMbQp <= DELTA_QP_BGD_THD || kiRefMbQp <= 26)
+  ) {
+    SMVUnitXY	sVaaPredSkipMv = { 0 };
+    PredSkipMv (pMbCache, &sVaaPredSkipMv);
+    WelsMdBackgroundMbEnc (pEncCtx, pWelsMd, pCurMb, pMbCache, pSlice, (LD32 (&sVaaPredSkipMv) == 0));
+    return true;
+  }
+
+  return false;
+}
+
+bool WelsMdInterJudgeBGDPskipFalse (void* pCtx, void* pMd, SSlice* pSlice, SMB* pCurMb, SMbCache* pMbCache,
+                                    bool* bKeepSkip) {
+  return false;
+}
+
+
+
+//////
+//  update BGD related info
+//////
+void WelsMdInterUpdateBGDInfo (SDqLayer* pCurLayer,  SMB* pCurMb, const bool bCollocatedPredFlag,
+                               const int32_t iRefPictureType) {
+  uint8_t* pTargetRefMbQpList = (pCurLayer->pDecPic->pRefMbQp);
+  const int32_t kiMbXY = pCurMb->iMbXY;
+
+  if (pCurMb->uiCbp || I_SLICE == iRefPictureType || 0 == bCollocatedPredFlag) {
+    pTargetRefMbQpList[kiMbXY] = pCurMb->uiLumaQp;
+  } else { //unchange, do not need to evaluation?
+    uint8_t* pRefPicRefMbQpList = (pCurLayer->pRefPic->pRefMbQp);
+    pTargetRefMbQpList[kiMbXY] = pRefPicRefMbQpList[kiMbXY];
+  }
+
+  if (pCurMb->uiMbType == MB_TYPE_BACKGROUND) {
+    pCurMb->uiMbType = MB_TYPE_SKIP;
+  }
+}
+
+void WelsMdInterUpdateBGDInfoNULL (SDqLayer* pCurLayer, SMB* pCurMb, const bool bCollocatedPredFlag,
+                                   const int32_t iRefPictureType) {
+}
+
+
+//////////////
+// MD for screen contents
+//////////////
+inline bool IsMbStatic (int32_t* pBlockType, EStaticBlockIdc eType) {
+  return (pBlockType != NULL &&
+          eType == pBlockType[0] &&
+          eType == pBlockType[1] &&
+          eType == pBlockType[2] &&
+          eType == pBlockType[3]);
+}
+inline bool IsMbCollocatedStatic (int32_t* pBlockType) {
+  return IsMbStatic (pBlockType, COLLOCATED_STATIC);
+}
+
+inline bool IsMbScrolledStatic (int32_t* pBlockType) {
+  return IsMbStatic (pBlockType, SCROLLED_STATIC);
+}
+
+inline int32_t CalUVSadCost (SWelsFuncPtrList* pFunc, uint8_t* pEncOri, int32_t iStrideUV, uint8_t* pRefOri,
+                             int32_t iRefLineSize) {
+  return pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] (pEncOri, iStrideUV, pRefOri, iRefLineSize);
+}
+
+inline bool CheckBorder (int32_t iMbX, int32_t iMbY, int32_t iScrollMvX, int32_t iScrollMvY, int32_t iMbWidth,
+                         int32_t iMbHeight) {
+  return ((iMbX << 4) + iScrollMvX < 0 ||
+          (iMbX << 4) + iScrollMvX > (iMbWidth - 1) << 4 ||
+          (iMbY << 4) + iScrollMvY < 0 ||
+          (iMbY << 4) + iScrollMvY > (iMbHeight - 1) << 4
+         ); //border check for safety
+}
+
+
 bool JudgeStaticSkip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, SWelsMD* pWelsMd) {
   SDqLayer* pCurDqLayer			= pEncCtx->pCurDqLayer;
   const int32_t kiMbX = pCurMb->iMbX;
@@ -414,5 +487,112 @@
   }
 }
 
+///////////////////////
+// SubP16x16 Mode Decision for screen content
+////////////////////////
+//
+//func pointer of inter MD for sub16x16 INTER MD for screen content coding
+//
+static inline void MergeSub16Me (const SWelsME& sSrcMe0, const SWelsME& sSrcMe1, SWelsME* pTarMe) {
+  memcpy (pTarMe, &sSrcMe0, sizeof (sSrcMe0)); // confirmed_safe_unsafe_usage
+
+  pTarMe->uiSadCost = sSrcMe0.uiSadCost + sSrcMe1.uiSadCost;//not precise cost since MVD cost is not the same
+  pTarMe->uiSatdCost = sSrcMe0.uiSatdCost + sSrcMe1.uiSatdCost;//not precise cost since MVD cost is not the same
+}
+static inline bool IsSameMv (const SMVUnitXY& sMv0, const SMVUnitXY& sMv1) {
+  return ((sMv0.iMvX == sMv1.iMvX) && (sMv0.iMvY == sMv1.iMvY));
+}
+bool TryModeMerge (SMbCache* pMbCache, SWelsMD* pWelsMd, SMB* pCurMb) {
+  SWelsME* pMe8x8 = & (pWelsMd->sMe.sMe8x8[0]);
+  const bool bSameMv16x8_0 = IsSameMv (pMe8x8[0].sMv, pMe8x8[1].sMv);
+  const bool bSameMv16x8_1 = IsSameMv (pMe8x8[2].sMv, pMe8x8[3].sMv);
+
+  const bool bSameMv8x16_0 = IsSameMv (pMe8x8[0].sMv, pMe8x8[2].sMv);
+  const bool bSameMv8x16_1 = IsSameMv (pMe8x8[1].sMv, pMe8x8[3].sMv);
+  //need to consider iRefIdx when multi ref is available
+  const bool bSameRefIdx16x8_0 = true; //pMe8x8[0].iRefIdx == pMe8x8[1].iRefIdx;
+  const bool bSameRefIdx16x8_1 = true; //pMe8x8[2].iRefIdx == pMe8x8[3].iRefIdx;
+  const bool bSameRefIdx8x16_0 = true; //pMe8x8[0].iRefIdx == pMe8x8[2].iRefIdx;
+  const bool bSameRefIdx8x16_1 = true; //pMe8x8[1].iRefIdx == pMe8x8[3].iRefIdx;
+  const int32_t iSameMv = ((bSameMv16x8_0 && bSameRefIdx16x8_0  && bSameMv16x8_1 && bSameRefIdx16x8_1) << 1) |
+                          (bSameMv8x16_0 && bSameRefIdx8x16_0 && bSameMv8x16_1 && bSameRefIdx8x16_1);
+
+  //TODO: did not consider the MVD cost here, may consider later
+  switch (iSameMv) {
+  case 3:
+    //MERGE_16x16
+    //from test results of multiple sequences show that using the following 0x0F to merge 16x16
+    //for some seq there is BR saving some loss
+    //on the whole the BR will increase little bit
+    //to save complexity we decided not to merge 16x16 at present (10/12/2012)
+    //TODO: agjusted order, consider re-test later
+    break;
+  case 2:
+    pCurMb->uiMbType = MB_TYPE_16x8;
+    MergeSub16Me (pMe8x8[0], pMe8x8[1], & (pWelsMd->sMe.sMe16x8[0]));
+    MergeSub16Me (pMe8x8[2], pMe8x8[3], & (pWelsMd->sMe.sMe16x8[1]));
+    PredInter16x8Mv (pMbCache, 0, 0, & (pWelsMd->sMe.sMe16x8[0].sMvp));
+    PredInter16x8Mv (pMbCache, 8, 0, & (pWelsMd->sMe.sMe16x8[1].sMvp));
+    break;
+  case 1:
+    pCurMb->uiMbType = MB_TYPE_8x16;
+    MergeSub16Me (pMe8x8[0], pMe8x8[2], & (pWelsMd->sMe.sMe8x16[0]));
+    MergeSub16Me (pMe8x8[1], pMe8x8[3], & (pWelsMd->sMe.sMe8x16[1]));
+    PredInter8x16Mv (pMbCache, 0, 0, & (pWelsMd->sMe.sMe8x16[0].sMvp));
+    PredInter8x16Mv (pMbCache, 4, 0, & (pWelsMd->sMe.sMe8x16[1].sMvp));
+    break;
+  default:
+    break;
+  }
+  return (MB_TYPE_8x8 != pCurMb->uiMbType);
+}
+
+
+void WelsMdInterFinePartitionVaaOnScreen (void* pEnc, void* pMd, SSlice* pSlice, SMB* pCurMb, int32_t iBestCost) {
+  sWelsEncCtx* pEncCtx = (sWelsEncCtx*)pEnc;
+  SWelsMD* pWelsMd = (SWelsMD*)pMd;
+  SMbCache* pMbCache = &pSlice->sMbCacheInfo;
+  SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer;
+  int32_t iCostP8x8;
+  uint8_t uiMbSign = pEncCtx->pFuncList->pfGetMbSignFromInterVaa (&pEncCtx->pVaa->sVaaCalcInfo.pSad8x8[pCurMb->iMbXY][0]);
+
+  if (MBVAASIGN_FLAT == uiMbSign) {
+    return;
+  }
+
+  iCostP8x8 = WelsMdP8x8 (pEncCtx->pFuncList, pCurDqLayer, pWelsMd, pSlice);
+  if (iCostP8x8 < iBestCost) {
+    iBestCost = iCostP8x8;
+    pCurMb->uiMbType = MB_TYPE_8x8;
+
+    TryModeMerge (pMbCache, pWelsMd, pCurMb);
+  }
+  pWelsMd->iCostLuma = iBestCost;
+}
+
+
+
+
+
+//
+// SetScrollingMvToMd
+//
+void SetScrollingMvToMd (void* pVaa, void* pMd) {
+  SVAAFrameInfoExt* pVaaExt		= static_cast<SVAAFrameInfoExt*> (pVaa);
+  SWelsMD* pWelsMd             = static_cast<SWelsMD*> (pMd);
+
+  SMVUnitXY          sTempMv;
+  sTempMv.iMvX = pVaaExt->sScrollDetectInfo.iScrollMvX;
+  sTempMv.iMvY = pVaaExt->sScrollDetectInfo.iScrollMvY;
+
+  (pWelsMd->sMe.sMe16x16).sDirectionalMv =
+    (pWelsMd->sMe.sMe8x8[0]).sDirectionalMv =
+      (pWelsMd->sMe.sMe8x8[1]).sDirectionalMv =
+        (pWelsMd->sMe.sMe8x8[2]).sDirectionalMv =
+          (pWelsMd->sMe.sMe8x8[3]).sDirectionalMv = sTempMv;
+}
+
+void SetScrollingMvToMdNull (void* pVaa, void* pWelsMd) {
+}
 
 } // namespace WelsSVCEnc
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -71,6 +71,8 @@
 }
 
 void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent) {
+  pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
+
   if (!bScreenContent) {
     pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse;
     pFuncList->pfCalculateBlockFeatureOfFrame[0] =
@@ -77,7 +79,7 @@
       pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL;
     pFuncList->pfCalculateSingleBlockFeature[0] =
       pFuncList->pfCalculateSingleBlockFeature[1] = NULL;
-    pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
+
   } else {
     pFuncList->pfCheckDirectionalMv = CheckDirectionalMv;
 
@@ -84,6 +86,7 @@
     //for cross serarch
     pFuncList->pfVerticalFullSearch = LineFullSearch_c;
     pFuncList->pfHorizontalFullSearch = LineFullSearch_c;
+
 #if defined (X86_ASM)
     if (uiCpuFlag & WELS_CPU_SSE41) {
       pFuncList->pfSampleSadHor8[0] = SampleSad8x8Hor8_sse41;
@@ -99,7 +102,6 @@
     //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
     pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
     pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
-    pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
   }
 }
 
@@ -129,6 +131,38 @@
                               kiStrideRef);
 }
 
+void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice) {
+  SDqLayer* pCurDqLayer      = (SDqLayer*)pLplayer;
+  SWelsME* pMe            = (SWelsME*)pLpme;
+  SSlice* pSlice          = (SSlice*)pLpslice;
+  const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
+  const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
+
+  pMe->sMv.iMvX = pMe->sMv.iMvY = 0;
+  pMe->uiSadCost =
+    pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef) ;
+  pMe->uiSadCost += COST_MVD (pMe->pMvdCost, - pMe->sMvp.iMvX, - pMe->sMvp.iMvY);
+  MeEndIntepelSearch (pMe);
+  pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
+                              kiStrideRef);
+}
+
+void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice) {
+  SDqLayer* pCurDqLayer      = (SDqLayer*)pLplayer;
+  SWelsME* pMe            = (SWelsME*)pLpme;
+  SSlice* pSlice          = (SSlice*)pLpslice;
+  const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
+  const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
+
+  pMe->sMv = pMe->sDirectionalMv;
+  pMe->pRefMb = pMe->pColoRefMb + pMe->sMv.iMvY * kiStrideRef + pMe->sMv.iMvX;
+  pMe->uiSadCost =
+    pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef)
+    + COST_MVD (pMe->pMvdCost, (pMe->sMv.iMvX << 2) - pMe->sMvp.iMvX, (pMe->sMv.iMvY << 2) - pMe->sMvp.iMvY);
+  MeEndIntepelSearch (pMe);
+  pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
+                              kiStrideRef);
+}
 /*!
  * \brief  EL mb motion estimate initial point testing
  *
@@ -456,7 +490,7 @@
     SMVUnitXY sBestMv;
     sBestMv.iMvX = iBestPos - kiCurMeBlockPix;
     sBestMv.iMvY = 0;
-    UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY], pMe);
+    UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
   }
 }
 #endif
@@ -488,7 +522,7 @@
     SMVUnitXY sBestMv;
     sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - kiCurMeBlockPix);
     sBestMv.iMvY = bVerticalSearch ? (iBestPos - kiCurMeBlockPix) : 0;
-    UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiStride], pMe);
+    UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
   }
 }
 
--- a/codec/encoder/core/src/wels_preprocess.cpp
+++ b/codec/encoder/core/src/wels_preprocess.cpp
@@ -982,7 +982,7 @@
   SPicture* pRefPic = NULL;
   SRefInfoParam* pRefPicInfo = NULL;
   uint8_t*  pCurBlockStaticPointer = NULL;
-  SLogContext* pLogCtx = &(pCtx->sLogCtx);
+  SLogContext* pLogCtx = & (pCtx->sLogCtx);
   const int32_t iNegligibleMotionBlocks = (static_cast<int32_t> ((pCurPicture->iWidthInPixel >> 3) *
                                           (pCurPicture->iHeightInPixel >> 3) * STATIC_SCENE_MOTION_RATIO));
   const uint8_t iCurTid = GetTemporalLevel (&pSvcParam->sDependencyLayers[m_pEncCtx->sSpatialIndexMap[0].iDid],