shithub: openh264

Download patch

ref: f27b22f07c6973e78cf18aee4471c49af9d62462
parent: 32fc4bea7959a7aa52424485f7430bb34e797c99
parent: 64ef420e67a8cb5a7bae383927b7d5dd25e7b8b1
author: Licai Guo <[email protected]>
date: Fri Apr 11 18:45:47 EDT 2014

Merge pull request #676 from sijchen/fme_merge54

[Encoder ME] add memory allocation for feature search

--- a/codec/encoder/core/inc/picture.h
+++ b/codec/encoder/core/inc/picture.h
@@ -68,7 +68,6 @@
   int32_t iHighFreMbCount;
 }SFeatureSearchPreparation;//maintain only one
 
-
 /*
  *  Reconstructed Picture definition
  *  It is used to express reference picture, also consequent reconstruction picture for output
@@ -108,6 +107,9 @@
   uint8_t    uiTemporalId;
   uint8_t    uiSpatialId;
   int32_t   iFrameAverageQp;
+
+  /*******************************for screen reference frames****************************/
+  SScreenBlockFeatureStorage* pScreenBlockFeatureStorage;
 } SPicture;
 
 /*
--- a/codec/encoder/core/inc/picture_handle.h
+++ b/codec/encoder/core/inc/picture_handle.h
@@ -46,13 +46,13 @@
 namespace WelsSVCEnc {
 /*!
  * \brief	alloc picture pData with borders for each plane based width and height of picture
- * \param	cx				width of picture in pixels
- * \param	cy				height of picture in pixels
- * \param	need_data		need pData allocation
- * \pram	need_expand		need borders expanding
+ * \param	kiWidth				width of picture in pixels
+ * \param	kiHeight				height of picture in pixels
+ * \param	bNeedMbInfo		need pData allocation
+ * \pram	iNeedFeatureStorage		need storage for FME
  * \return	successful if effective picture pointer returned, otherwise failed with NULL
  */
-SPicture* AllocPicture (CMemoryAlign* pMa, const int32_t kiWidth, const int32_t kiHeight, bool bNeedMbInfo);
+SPicture* AllocPicture (CMemoryAlign* pMa, const int32_t kiWidth, const int32_t kiHeight, bool bNeedMbInfo, int32_t iNeedFeatureStorage);
 
 /*!
  * \brief	free picture pData planes
--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -54,8 +54,7 @@
 #define EXPANDED_MV_RANGE (504) //=512-8 rather than 511 to sacrifice same edge point but save complexity in assemblys
 #define EXPANDED_MVD_RANGE ((504+1)<<1)
 
-enum
-{
+enum {
   ME_DIA    = 0x01,  // LITTLE DIAMOND= 0x01
   ME_CROSS  = 0x02,  // CROSS=  0x02
   ME_FME    = 0x04,  // FME = 0x04
@@ -229,6 +228,9 @@
                                               uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
 void SumOf16x16BlockOfFrame_c(uint8_t *pRefPicture, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
                                               uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
+int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
+                                         SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
+int32_t ReleaseScreenBlockFeatureStorage( CMemoryAlign *pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage );
 //inline functions
 inline void SetMvWithinIntegerMvRange( const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX, const int32_t kiMbY,
                         const int32_t kiMaxMvRange,
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -138,14 +138,14 @@
 typedef void (*PMotionSearchFunc) (SWelsFuncPtrList* pFuncList, void* pCurDqLayer, void* pMe,
                                    void* pSlice);
 typedef void (*PSearchMethodFunc) (SWelsFuncPtrList* pFuncList, void* pMe, void* pSlice, const int32_t kiEncStride, const int32_t kiRefStride);
-typedef void (*PCalculateSatdFunc) ( PSampleSadSatdCostFunc pSatd, void * vpMe, const int32_t kiEncStride, const int32_t kiRefStride);
+typedef void (*PCalculateSatdFunc) ( PSampleSadSatdCostFunc pSatd, void * vpMe, const int32_t kiEncStride, const int32_t kiRefStride );
 typedef bool (*PCheckDirectionalMv) (PSampleSadSatdCostFunc pSad, void * vpMe,
                       const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
                       int32_t& iBestSadCost);
-typedef void (*PLineFullSearchFunc) (  void *pFunc, void *vpMe,
-                          uint16_t* pMvdTable, const int32_t kiFixedMvd,
-                          const int32_t kiEncStride, const int32_t kiRefStride,
-                          const int32_t kiMinPos, const int32_t kiMaxPos,
+typedef void (*PLineFullSearchFunc) (	void *pFunc, void *vpMe,
+													uint16_t* pMvdTable, const int32_t kiFixedMvd,
+													const int32_t kiEncStride, const int32_t kiRefStride,
+													const int32_t kiMinPos, const int32_t kiMaxPos,
                           const bool bVerticalSearch );
 typedef void (*PCalculateBlockFeatureOfFrame)(uint8_t *pRef, const int32_t kiWidth, const int32_t kiHeight, const int32_t kiRefStride,
                                               uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -707,8 +707,13 @@
   pParam	= (*ppCtx)->pSvcParam;
   iDlayerCount	= pParam->iSpatialLayerNum;
   iNumRef	= pParam->iNumRefFrame;
-//	highest_layers_in_temporal = 1 + WELS_MAX(pParam->iDecompStages, 1);
 
+  const int32_t kiFeatureStrategyIndex = 0;
+  const int32_t kiMe16x16 = ME_DIA_CROSS;
+  const int32_t kiMe8x8 = ME_DIA_CROSS_FME;
+  const int32_t kiNeedFeatureStorage = (pParam->iUsageType != SCREEN_CONTENT_REAL_TIME)?0:
+    ((kiFeatureStrategyIndex<<16) +  ((kiMe16x16 & 0x00FF)<<8) + (kiMe8x8 & 0x00FF));
+
   iDlayerIndex			= 0;
   while (iDlayerIndex < iDlayerCount) {
     SRefList* pRefList			= NULL;
@@ -727,9 +732,8 @@
     // pRef list
     pRefList		= (SRefList*)pMa->WelsMallocz (sizeof (SRefList), "pRefList");
     WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList), FreeMemorySvc (ppCtx))
-
     do {
-      pRefList->pRef[i]	= AllocPicture (pMa, kiWidth, kiHeight, true);	// to use actual size of current layer
+      pRefList->pRef[i]	= AllocPicture (pMa, kiWidth, kiHeight, true, kiNeedFeatureStorage);	// to use actual size of current layer
       WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList->pRef[i]), FreeMemorySvc (ppCtx))
       ++ i;
     } while (i < 1 + iNumRef);
--- a/codec/encoder/core/src/picture_handle.cpp
+++ b/codec/encoder/core/src/picture_handle.cpp
@@ -37,6 +37,7 @@
  *
  *************************************************************************************/
 #include "picture_handle.h"
+#include "svc_motion_estimate.h"
 
 namespace WelsSVCEnc {
 /*!
@@ -47,7 +48,8 @@
  * \pram	need_expand		need borders expanding
  * \return	successful if effective picture pointer returned, otherwise failed with NULL
  */
-SPicture* AllocPicture (CMemoryAlign* pMa, const int32_t kiWidth , const int32_t kiHeight, bool bNeedMbInfo) {
+SPicture* AllocPicture (CMemoryAlign* pMa, const int32_t kiWidth , const int32_t kiHeight,
+                        bool bNeedMbInfo, int32_t iNeedFeatureStorage) {
   SPicture* pPic = NULL;
   int32_t iPicWidth = 0;
   int32_t iPicHeight = 0;
@@ -107,6 +109,15 @@
     WELS_VERIFY_RETURN_PROC_IF (NULL, NULL == pPic->pMbSkipSad, FreePicture (pMa, &pPic));
   }
 
+  if (iNeedFeatureStorage) {
+    pPic->pScreenBlockFeatureStorage = static_cast<SScreenBlockFeatureStorage*> (pMa->WelsMallocz (sizeof (SScreenBlockFeatureStorage), "pScreenBlockFeatureStorage"));
+    int32_t iReturn = RequestScreenBlockFeatureStorage(pMa, kiWidth,  kiHeight, iNeedFeatureStorage,
+      pPic->pScreenBlockFeatureStorage );
+
+    WELS_VERIFY_RETURN_PROC_IF (NULL, ENC_RETURN_SUCCESS != iReturn, FreePicture (pMa, &pPic));
+  } else {
+    pPic->pScreenBlockFeatureStorage = NULL;
+  }
   return pPic;
 }
 
@@ -126,10 +137,10 @@
     pPic->pBuffer		= NULL;
     pPic->pData[0]	=
       pPic->pData[1]	=
-        pPic->pData[2]	= NULL;
+      pPic->pData[2]	= NULL;
     pPic->iLineSize[0] =
       pPic->iLineSize[1] =
-        pPic->iLineSize[2] = 0;
+      pPic->iLineSize[2] = 0;
 
     pPic->iWidthInPixel		= 0;
     pPic->iHeightInPixel	= 0;
@@ -157,6 +168,13 @@
       pMa->WelsFree (pPic->pMbSkipSad, "pPic->pMbSkipSad");
       pPic->pMbSkipSad = NULL;
     }
+
+    if (pPic->pScreenBlockFeatureStorage) {
+      ReleaseScreenBlockFeatureStorage(pMa, pPic->pScreenBlockFeatureStorage);
+      pMa->WelsFree (pPic->pScreenBlockFeatureStorage, "pPic->pScreenBlockFeatureStorage");
+      pPic->pScreenBlockFeatureStorage = NULL;
+    }
+
     pMa->WelsFree (*ppPic, "pPic");
     *ppPic = NULL;
   }
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -96,9 +96,9 @@
  */
 
 void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, void* pLplayer, void* pLpme, void* pLpslice) {
-  SDqLayer* pCurDqLayer			= (SDqLayer*)pLplayer;
-  SWelsME* pMe						= (SWelsME*)pLpme;
-  SSlice* pSlice					= (SSlice*)pLpslice;
+  SDqLayer* pCurDqLayer      = (SDqLayer*)pLplayer;
+  SWelsME* pMe            = (SWelsME*)pLpme;
+  SSlice* pSlice          = (SSlice*)pLpslice;
   const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
   const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
 
@@ -237,8 +237,8 @@
 
 void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, void* pLpme, void* pLpslice,
                         const int32_t kiStrideEnc,  const int32_t kiStrideRef) {
-  SWelsME* pMe						= (SWelsME*)pLpme;
-  PSample4SadCostFunc			pSad					=  pFuncList->sSampleDealingFuncs.pfSample4Sad[pMe->uiBlockSize];
+  SWelsME* pMe            = (SWelsME*)pLpme;
+  PSample4SadCostFunc      pSad          =  pFuncList->sSampleDealingFuncs.pfSample4Sad[pMe->uiBlockSize];
 
   uint8_t* pFref = pMe->pRefMb;
   uint8_t* const kpEncMb = pMe->pEncMb;
@@ -542,18 +542,19 @@
   }
   return ENC_RETURN_UNEXPECTED;
 }
-int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFeatureStrategyIndex,
-                                         const int32_t kiFrameWidth,  const int32_t kiFrameHeight, const int32_t kiMe16x16,  const int32_t kiMe8x8,
+
+int32_t RequestScreenBlockFeatureStorage( CMemoryAlign *pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight, const int32_t iNeedFeatureStorage,
                                          SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
-#define LIST_SIZE_SUM_16x16  0x0FF01    //(256*255+1)
-#define LIST_SIZE_SUM_8x8      0x03FC1    //(64*255+1)
 
-  if (((kiMe8x8&ME_FME)==ME_FME) && ((kiMe16x16&ME_FME)==ME_FME)) {
+  const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage>>16;
+  const int32_t kiMe8x8FME = iNeedFeatureStorage & 0x0000FF & ME_FME;
+  const int32_t kiMe16x16FME = ((iNeedFeatureStorage & 0x00FF00)>>8) & ME_FME;
+  if ((kiMe8x8FME==ME_FME) && (kiMe16x16FME==ME_FME)) {
     return ENC_RETURN_UNSUPPORTED_PARA;
     //the following memory allocation cannot support when FME at both size
   }
 
-  const bool bIsBlock8x8 = ((kiMe8x8 & ME_FME)==ME_FME);
+  const bool bIsBlock8x8 = (kiMe8x8FME==ME_FME);
   const int32_t kiMarginSize = bIsBlock8x8?8:16;
   const int32_t kiFrameSize = (kiFrameWidth-kiMarginSize) * (kiFrameHeight-kiMarginSize);
   const int32_t kiListSize  = (0==kiFeatureStrategyIndex)?(bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16):256;
@@ -749,6 +750,7 @@
   pFeatureSearchOut->uiBestSadCost = uiBestSadCost;
   pFeatureSearchOut->pBestRef = pRef;
 }
+
 bool FeatureSearchOne( SFeatureSearchIn &sFeatureSearchIn, const int32_t iFeatureDifference, const uint32_t kuiExpectedSearchTimes,
                       SFeatureSearchOut* pFeatureSearchOut) {
   const int32_t iFeatureOfRef = (sFeatureSearchIn.iFeatureOfCurrent + iFeatureDifference);
@@ -819,6 +821,7 @@
   return (i < iSearchTimesx2);
 }
 
+
 void MotionEstimateFeatureFullSearch( SFeatureSearchIn &sFeatureSearchIn,
                                         const uint32_t kuiMaxSearchPoint,
                                         SWelsME* pMe) {
@@ -840,8 +843,8 @@
 // Search function options
 /////////////////////////
 void WelsDiamondCrossSearch(SWelsFuncPtrList *pFunc, void* vpMe, void* vpSlice, const int32_t kiEncStride,  const int32_t kiRefStride) {
-    SWelsME* pMe			 = static_cast<SWelsME *>(vpMe);
-    SSlice* pSlice				 = static_cast<SSlice *>(vpSlice);
+    SWelsME* pMe       = static_cast<SWelsME *>(vpMe);
+    SSlice* pSlice         = static_cast<SSlice *>(vpSlice);
 
     //  Step 1: diamond search
     WelsDiamondSearch(pFunc, vpMe, vpSlice, kiEncStride, kiRefStride);
@@ -854,8 +857,8 @@
     }
 }
 void WelsDiamondCrossFeatureSearch(SWelsFuncPtrList *pFunc, void* vpMe, void* vpSlice, const int32_t kiEncStride, const int32_t kiRefStride) {
-    SWelsME* pMe			 = static_cast<SWelsME *>(vpMe);
-    SSlice* pSlice				 = static_cast<SSlice *>(vpSlice);
+    SWelsME* pMe       = static_cast<SWelsME *>(vpMe);
+    SSlice* pSlice         = static_cast<SSlice *>(vpSlice);
 
     //  Step 1: diamond search + cross
     WelsDiamondCrossSearch(pFunc, pMe, pSlice, kiEncStride, kiRefStride);
@@ -875,5 +878,6 @@
         pSlice->uiSliceFMECostDown -= pMe->uiSadCost;
     }
 }
+
 } // namespace WelsSVCEnc
 
--- a/codec/encoder/core/src/wels_preprocess.cpp
+++ b/codec/encoder/core/src/wels_preprocess.cpp
@@ -134,7 +134,7 @@
     uint8_t i = 0;
 
     do {
-      SPicture* pPic = AllocPicture (pMa, kiPicWidth, kiPicHeight, false);
+      SPicture* pPic = AllocPicture (pMa, kiPicWidth, kiPicHeight, false, 0);
       WELS_VERIFY_RETURN_IF (1, (NULL == pPic))
       m_pSpatialPic[iDlayerIndex][i] = pPic;
       ++ i;
@@ -435,7 +435,7 @@
   bool bInputPicNeedScaling = JudgeNeedOfScaling (pParam, pScaledPicture);
   if (bInputPicNeedScaling) {
     pScaledPicture->pScaledInputPicture = AllocPicture (pMemoryAlign, pParam->SUsedPicRect.iWidth,
-                                          pParam->SUsedPicRect.iHeight, false);
+                                          pParam->SUsedPicRect.iHeight, false, 0);
     if (pScaledPicture->pScaledInputPicture == NULL)
       return -1;
   }