shithub: openh264

Download patch

ref: 881298ed3180f5cf682311e5dee8297d665ba116
parent: 9722adfe09c17ffc5cf1bf31cf2e768e160006fb
parent: 12616019b60a87bbcf71c945ea4a0f91ea175a49
author: Licai Guo <[email protected]>
date: Mon Mar 31 04:59:09 EDT 2014

Merge pull request #595 from sijchen/fme_merge12

[Encoder ME] Add feature search basic functions

--- a/codec/encoder/core/inc/picture.h
+++ b/codec/encoder/core/inc/picture.h
@@ -39,6 +39,27 @@
 #include "wels_common_basis.h"
 
 namespace WelsSVCEnc {
+#define LIST_SIZE			0x10000		//(256*256)
+typedef struct TagScreenBlockFeatureStorage
+{
+	uint16_t*	pFeatureOfBlock;		// Feature of every block (8x8), begin with the point
+	uint32_t*	pTimesOfFeatureValue;		// times of every value in Feature
+	uint16_t**	pLocationOfFeature;			// uint16_t *pLocationOfFeature[LIST_SIZE], pLocationOfFeature[i] saves all the location(x,y) whose Feature = i;
+	uint16_t*	pLocationPointer;	// buffer of position array
+	int32_t		iActualListSize;			// actual list size (8x8 based)
+} SScreenBlockFeatureStorage;
+
+typedef struct TagScreenContentStorage{
+  SScreenBlockFeatureStorage	sRefBlockFeature[MAX_MULTI_REF_PIC_COUNT];
+	bool						bRefBlockFeatureCalculated; // flag of whether pre-process is done
+	uint8_t				uiFeatureStrategyIndex;// index of hash strategy
+
+	/* for FME frame-level switch */
+	bool bFMESwitchFlag;
+	uint8_t uiFMEGoodFrameCount;
+	int32_t iHighFreMbCount;
+}SScreenContentStorage;
+
 
 /*
  *	Reconstructed Picture definition
--- a/codec/encoder/core/inc/svc_enc_frame.h
+++ b/codec/encoder/core/inc/svc_enc_frame.h
@@ -47,10 +47,6 @@
 
 #include "svc_enc_slice_segment.h"
 namespace WelsSVCEnc {
-/*
- *	Need fine adjust below structure later for SVC extension optimization
- */
-
 
 /*
  *	Frame level in SVC DQLayer instead.
--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -82,6 +82,39 @@
 SMVUnitXY					sMv;
 } SWelsME;
 
+typedef struct TagFeatureSearchIn{
+    PSampleSadSatdCostFunc pSad;
+
+    uint32_t* pTimesOfFeature;
+    uint16_t** pQpelLocationOfFeature;
+    uint16_t *pMvdCostX;
+    uint16_t *pMvdCostY;
+
+    uint8_t* pEnc;
+    uint8_t* pColoRef;
+    int32_t iEncStride;
+    int32_t iRefStride;
+    uint16_t uiSadCostThresh;
+
+    int32_t iFeatureOfCurrent;
+
+    int32_t iCurPixX;
+    int32_t iCurPixY;
+    int32_t iCurPixXQpel;
+    int32_t iCurPixYQpel;
+
+    int32_t iMinQpelX;
+    int32_t iMinQpelY;
+    int32_t iMaxQpelX;
+    int32_t iMaxQpelY;
+}SFeatureSearchIn;
+
+typedef struct TagFeatureSearchOut{
+    SMVUnitXY sBestMv;
+    uint32_t uiBestSadCost;
+    uint8_t* pBestRef;
+}SFeatureSearchOut;
+
 #define  COST_MVD(table, mx, my)  (table[mx] + table[my])
 
 // Function definitions below
--- a/codec/encoder/core/inc/wels_const.h
+++ b/codec/encoder/core/inc/wels_const.h
@@ -150,6 +150,7 @@
 #define MAX_LONG_REF_COUNT		2 // 16 in standard, maximal count number of long reference pictures
 #define MAX_REF_PIC_COUNT		16 // 32 in standard, maximal Short + Long reference pictures
 #define MIN_REF_PIC_COUNT		1		// minimal count number of reference pictures, 1 short + 2 key reference based?
+#define MAX_MULTI_REF_PIC_COUNT	1	//maximum multi-reference number
 //#define TOTAL_REF_MINUS_HALF_GOP	1	// last t0 in last gop
 #define MAX_MMCO_COUNT			66
 
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -66,7 +66,6 @@
 
     //for cross serarch
     pFuncList->pfLineFullSearch = LineFullSearch_c;
-    pFuncList->pfLineFullSearch = LineFullSearch_c;
     if ( uiCpuFlag & WELS_CPU_SSE41 ) {
     }
   }
@@ -171,6 +170,21 @@
   return false;
 }
 
+void CalculateSatdCost( PSampleSadSatdCostFunc pSatd, void * vpMe,
+                       const int32_t kiEncStride, const int32_t kiRefStride ) {
+  SWelsME* pMe						 = static_cast<SWelsME *>(vpMe);
+  pMe->uSadPredISatd.uiSatd = pSatd(pMe->pEncMb, kiEncStride, pMe->pRefMb, kiRefStride);
+  pMe->uiSatdCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, pMe->sMv.iMvX - pMe->sMvp.iMvX,
+                                                            pMe->sMv.iMvY - pMe->sMvp.iMvY);
+}
+void NotCalculateSatdCost( PSampleSadSatdCostFunc pSatd, void * vpMe,
+                          const int32_t kiEncStride, const int32_t kiRefStride ) {
+}
+
+
+/////////////////////////
+// Diamond Search Related
+/////////////////////////
 bool WelsMeSadCostSelect (int32_t* iSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx,
                             const int32_t kiDy, int32_t* pIx, int32_t* pIy) {
   int32_t iTempSadCost[4];
@@ -203,8 +217,6 @@
     *pIx = -1;
     *pIy = 0;
   }
-
-
   return (*pBestCost == iInputSadCost);
 }
 
@@ -247,18 +259,9 @@
   pMe->pRefMb = pRefMb;
 }
 
-void CalculateSatdCost( PSampleSadSatdCostFunc pSatd, void * vpMe,
-                       const int32_t kiEncStride, const int32_t kiRefStride ) {
-  SWelsME* pMe						 = static_cast<SWelsME *>(vpMe);
-  pMe->uSadPredISatd.uiSatd = pSatd(pMe->pEncMb, kiEncStride, pMe->pRefMb, kiRefStride);
-  pMe->uiSatdCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, pMe->sMv.iMvX - pMe->sMvp.iMvX,
-                                                            pMe->sMv.iMvY - pMe->sMvp.iMvY);
-}
-void NotCalculateSatdCost( PSampleSadSatdCostFunc pSatd, void * vpMe,
-                          const int32_t kiEncStride, const int32_t kiRefStride ) {
-}
-
-
+/////////////////////////
+// DirectionalMv Related
+/////////////////////////
 bool CheckDirectionalMv(PSampleSadSatdCostFunc pSad, void * vpMe,
                       const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
                       int32_t& iBestSadCost) {
@@ -287,6 +290,9 @@
   return false;
 }
 
+/////////////////////////
+// Cross Search Related
+/////////////////////////
 void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
 														uint16_t* pMvdTable, const int32_t kiFixedMvd,
 														const int32_t kiEncStride, const int32_t kiRefStride,
@@ -360,4 +366,130 @@
       false );
   }
 }
+
+/////////////////////////
+// Feature Search Related
+/////////////////////////
+void SetFeatureSearchIn( SWelsFuncPtrList *pFunc,  const SWelsME& sMe,
+                        const SSlice *pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
+                        const int32_t kiEncStride, const int32_t kiRefStride,
+                        SFeatureSearchIn* pFeatureSearchIn ) {
+  pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize];
+  //pFeatureSearchIn->iFeatureOfCurrent=
+
+  pFeatureSearchIn->pEnc       = sMe.pEncMb;
+  pFeatureSearchIn->pColoRef = sMe.pColoRefMb;
+  pFeatureSearchIn->iEncStride = kiEncStride;
+  pFeatureSearchIn->iRefStride = kiRefStride;
+  pFeatureSearchIn->uiSadCostThresh = sMe.uiSadCostThreshold;
+
+  pFeatureSearchIn->iCurPixX = sMe.iCurMeBlockPixX;
+  pFeatureSearchIn->iCurPixXQpel = (pFeatureSearchIn->iCurPixX<<2);
+  pFeatureSearchIn->iCurPixY = sMe.iCurMeBlockPixY;
+  pFeatureSearchIn->iCurPixYQpel = (pFeatureSearchIn->iCurPixY<<2);
+
+  pFeatureSearchIn->pTimesOfFeature = pRefFeatureStorage->pTimesOfFeatureValue;
+  pFeatureSearchIn->pQpelLocationOfFeature = pRefFeatureStorage->pLocationOfFeature;
+  pFeatureSearchIn->pMvdCostX = sMe.pMvdCost - pFeatureSearchIn->iCurPixXQpel - sMe.sMvp.iMvX;
+  pFeatureSearchIn->pMvdCostY = sMe.pMvdCost - pFeatureSearchIn->iCurPixYQpel - sMe.sMvp.iMvY;
+
+  pFeatureSearchIn->iMinQpelX = pFeatureSearchIn->iCurPixXQpel+((pSlice->sMvStartMin.iMvX)<<2);
+  pFeatureSearchIn->iMinQpelY = pFeatureSearchIn->iCurPixYQpel+((pSlice->sMvStartMin.iMvY)<<2);
+  pFeatureSearchIn->iMaxQpelX = pFeatureSearchIn->iCurPixXQpel+((pSlice->sMvStartMax.iMvX)<<2);
+  pFeatureSearchIn->iMaxQpelY = pFeatureSearchIn->iCurPixYQpel+((pSlice->sMvStartMax.iMvY)<<2);
+}
+void SaveFeatureSearchOut( const SMVUnitXY sBestMv, const uint32_t uiBestSadCost, uint8_t* pRef, SFeatureSearchOut* pFeatureSearchOut) {
+  pFeatureSearchOut->sBestMv = sBestMv;
+  pFeatureSearchOut->uiBestSadCost = uiBestSadCost;
+  pFeatureSearchOut->pBestRef = pRef;
+}
+bool FeatureSearchOne( SFeatureSearchIn &sFeatureSearchIn, const int32_t iFeatureDifference, const uint32_t kuiExpectedSearchTimes,
+                      SFeatureSearchOut* pFeatureSearchOut) {
+  const int32_t iFeatureOfRef = (sFeatureSearchIn.iFeatureOfCurrent + iFeatureDifference);
+  if(iFeatureOfRef < 0 || iFeatureOfRef >= LIST_SIZE)
+    return true;
+
+  PSampleSadSatdCostFunc pSad = sFeatureSearchIn.pSad;
+  uint8_t* pEnc =  sFeatureSearchIn.pEnc;
+  uint8_t* pColoRef = sFeatureSearchIn.pColoRef;
+  const int32_t iEncStride=  sFeatureSearchIn.iEncStride;
+  const int32_t iRefStride =  sFeatureSearchIn.iRefStride;
+  const uint16_t uiSadCostThresh = sFeatureSearchIn.uiSadCostThresh;
+
+  const int32_t iCurPixX = sFeatureSearchIn.iCurPixX;
+  const int32_t iCurPixY = sFeatureSearchIn.iCurPixY;
+  const int32_t iCurPixXQpel = sFeatureSearchIn.iCurPixXQpel;
+  const int32_t iCurPixYQpel = sFeatureSearchIn.iCurPixYQpel;
+
+  const int32_t iMinQpelX =  sFeatureSearchIn.iMinQpelX;
+  const int32_t iMinQpelY =  sFeatureSearchIn.iMinQpelY;
+  const int32_t iMaxQpelX =  sFeatureSearchIn.iMaxQpelX;
+  const int32_t iMaxQpelY =  sFeatureSearchIn.iMaxQpelY;
+
+  const int32_t iSearchTimes = WELS_MIN(sFeatureSearchIn.pTimesOfFeature[iFeatureOfRef], kuiExpectedSearchTimes);
+  const int32_t iSearchTimesx2 = (iSearchTimes<<1);
+  const uint16_t* pQpelPosition = sFeatureSearchIn.pQpelLocationOfFeature[iFeatureOfRef];
+
+  SMVUnitXY sBestMv;
+  uint32_t uiBestCost, uiTmpCost;
+  uint8_t *pBestRef, *pCurRef;
+  int32_t iQpelX, iQpelY;
+  int32_t iIntepelX, iIntepelY;
+  int32_t i;
+
+  sBestMv.iMvX = pFeatureSearchOut->sBestMv.iMvX;
+  sBestMv.iMvY = pFeatureSearchOut->sBestMv.iMvY;
+  uiBestCost = pFeatureSearchOut->uiBestSadCost;
+  pBestRef = pFeatureSearchOut->pBestRef;
+
+  for( i = 0; i < iSearchTimesx2; i+=2) {
+    iQpelX = pQpelPosition[i];
+    iQpelY = pQpelPosition[i+1];
+
+    if((iQpelX > iMaxQpelX) || (iQpelX < iMinQpelX)
+      || (iQpelY > iMaxQpelY) || (iQpelY < iMinQpelY)
+      || (iQpelX == iCurPixXQpel) || (iQpelY == iCurPixYQpel) )
+      continue;
+
+    uiTmpCost = sFeatureSearchIn.pMvdCostX[ iQpelX ] + sFeatureSearchIn.pMvdCostY[ iQpelY ];
+    if(uiTmpCost + iFeatureDifference >= uiBestCost)
+      continue;
+
+    iIntepelX = (iQpelX>>2) - iCurPixX;
+    iIntepelY = (iQpelY>>2) - iCurPixY;
+    pCurRef = &pColoRef[iIntepelX + iIntepelY * iRefStride];
+    uiTmpCost += pSad( pEnc, iEncStride, pCurRef, iRefStride );
+    if( uiTmpCost < uiBestCost ) {
+      sBestMv.iMvX = iIntepelX;
+      sBestMv.iMvY = iIntepelY;
+      uiBestCost = uiTmpCost;
+      pBestRef = pCurRef;
+
+      if(uiBestCost < uiSadCostThresh)
+        break;
+    }
+  }
+  SaveFeatureSearchOut(sBestMv, uiBestCost, pBestRef, pFeatureSearchOut);
+  return (i < iSearchTimesx2);
+}
+
+
+void MotionEstimateFeatureFullSearchScc( SFeatureSearchIn &sFeatureSearchIn,
+                                        const uint32_t kiMaxSearchPoint,
+                                        SWelsME* pMe) {
+  SFeatureSearchOut sFeatureSearchOut = {0};
+  sFeatureSearchOut.uiBestSadCost = pMe->uiSadCost;
+  sFeatureSearchOut.sBestMv = pMe->sMv;
+  sFeatureSearchOut.pBestRef = pMe->pRefMb;
+
+  FeatureSearchOne( sFeatureSearchIn, 0, kiMaxSearchPoint, &sFeatureSearchOut );
+  if ( sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost ) {
+    UpdateMeResults(sFeatureSearchOut.sBestMv,
+      sFeatureSearchOut.uiBestSadCost, sFeatureSearchOut.pBestRef,
+      pMe);
+  }
+}
+
+
 } // namespace WelsSVCEnc
+