shithub: openh264

Download patch

ref: e508c86daca3c0b97b7603b6ba61ddeb554c8ff7
parent: beacba76e33841be0b26013a735081a8b8f54407
author: sijchen <[email protected]>
date: Thu Nov 12 08:15:07 EST 2015

fix the missing loadbalancing part

--- a/codec/encoder/core/inc/svc_enc_slice_segment.h
+++ b/codec/encoder/core/inc/svc_enc_slice_segment.h
@@ -89,6 +89,8 @@
 int32_t*                pCountMbNumInSlice;     /* count number of MBs in every slice respectively; */
 uint32_t                uiSliceSizeConstraint;  /* in byte */
 int32_t                 iMaxSliceNumConstraint; /* maximal number of slices constraint */
+
+uint32_t*               pSliceConsumeTime;
 } SSliceCtx;
 
 
--- a/codec/encoder/core/inc/wels_task_encoder.h
+++ b/codec/encoder/core/inc/wels_task_encoder.h
@@ -90,8 +90,8 @@
 
 class CWelsLoadBalancingSlicingEncodingTask : public CWelsSliceEncodingTask {
  public:
-  CWelsLoadBalancingSlicingEncodingTask (sWelsEncCtx* pCtx, const int32_t iSliceIdx);
-  ~CWelsLoadBalancingSlicingEncodingTask();
+  CWelsLoadBalancingSlicingEncodingTask(sWelsEncCtx* pCtx, const int32_t iSliceIdx) : CWelsSliceEncodingTask (pCtx, iSliceIdx) {
+  };
 
   virtual WelsErrorType InitTask();
   virtual void FinishTask();
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -4839,6 +4839,17 @@
   pMA->WelsFree (pCurLayer->pSliceEncCtx->pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
   pCurLayer->pSliceEncCtx->pCountMbNumInSlice = pCountMbNumInSlice;
 
+  uint32_t* pSliceConsumeTime = (uint32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (uint32_t),
+                                                          "pSliceSeg->pSliceConsumeTime");
+  if (NULL == pSliceConsumeTime) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+             "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSliceConsumeTime not successful");
+    return ENC_RETURN_MEMALLOCERR;
+  }
+  memcpy (pSliceConsumeTime, pCurLayer->pSliceEncCtx->pSliceConsumeTime, sizeof (int32_t) * iMaxSliceNumOld);
+  pMA->WelsFree (pCurLayer->pSliceEncCtx->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
+  pCurLayer->pSliceEncCtx->pSliceConsumeTime = pSliceConsumeTime;
+
   //deal with rate control variables
   const int32_t kiCurDid = pCtx->uiDependencyId;
   SRCSlicing* pSlcingOverRc = (SRCSlicing*)pMA->WelsMalloc (iMaxSliceNum * sizeof (SRCSlicing), "SlicingOverRC");
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -65,7 +65,7 @@
 #include "wels_task_management.h"
 
 #if defined(ENABLE_TRACE_MT)
-#define MT_TRACE_LOG(x, ...) WelsLog(x, __VA_ARGS__)
+#define MT_TRACE_LOG(pLog, x, ...) WelsLog(pLog, x, __VA_ARGS__)
 #else
 #define MT_TRACE_LOG(x, ...)
 #endif
@@ -812,7 +812,7 @@
         if (bDsaFlag) {
           pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx] = (uint32_t) (
                 WelsTime() - iSliceStart);
-          MT_TRACE_LOG (pEncPEncCtx, WELS_LOG_INFO,
+          MT_TRACE_LOG (&(pEncPEncCtx->sLogCtx), WELS_LOG_INFO,
                         "[MT] CodingSliceThreadProc(), coding_idx %d, uiSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, pFirstMbInSlice %d, count_num_mb_in_slice %d",
                         pEncPEncCtx->iCodingIndex, iSliceIdx,
                         pEncPEncCtx->pSliceThreading->pSliceConsumeTime[pEncPEncCtx->uiDependencyId][iSliceIdx], iSliceSize,
@@ -1060,7 +1060,7 @@
 #endif//MT_DEBUG
 
   pCtx->pCurDqLayer = pCurDq;
-
+  memcpy((pCtx->pSliceThreading->pSliceConsumeTime[0]), (pCurDq->pSliceEncCtx->pSliceConsumeTime), pCurDq->pSliceEncCtx->iSliceNumInFrame*sizeof(uint32_t));
   // do not need adjust due to not different at both slices of consumed time
   iNeedAdj = NeedDynamicAdjust (pCtx->pSliceThreading->pSliceConsumeTime[0], pCurDq->pSliceEncCtx->iSliceNumInFrame);
   if (iNeedAdj)
@@ -1092,6 +1092,7 @@
                                       && (pCtx->pSvcParam->sSpatialLayers[iCurDid - 1].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE
                                           && pCtx->pSvcParam->iMultipleThreadIdc >= pCtx->pSvcParam->sSpatialLayers[iCurDid -
                                               1].sSliceArgument.uiSliceNum);
+  memcpy((pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]), (pCtx->pCurDqLayer->pSliceEncCtx->pSliceConsumeTime), pCtx->pCurDqLayer->pSliceEncCtx->iSliceNumInFrame*sizeof(uint32_t));
 
   if (kbModelingFromSpatial) { // using spatial base layer for complexity estimation
     // do not need adjust due to not different at both slices of consumed time
--- a/codec/encoder/core/src/svc_enc_slice_segment.cpp
+++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp
@@ -382,6 +382,11 @@
 
       pSliceSeg->pCountMbNumInSlice = NULL;
     }
+    if (NULL != pSliceSeg->pSliceConsumeTime) {
+      pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
+
+      pSliceSeg->pSliceConsumeTime = NULL;
+    }
     // just for safe
     pSliceSeg->iSliceNumInFrame = 0;
     pSliceSeg->iMbNumInFrame    = 0;
@@ -405,6 +410,7 @@
                                     "pSliceSeg->pCountMbNumInSlice");
 
     WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
+    pSliceSeg->pSliceConsumeTime = NULL;
     pSliceSeg->uiSliceMode              = uiSliceMode;
     pSliceSeg->iMbWidth                 = kiMbWidth;
     pSliceSeg->iMbHeight                = kiMbHeight;
@@ -419,7 +425,6 @@
       return 1;
 
     pSliceSeg->pOverallMbMap = (uint16_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap");
-
     WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap)
 
     WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap, 0, kiCountMbNum, sizeof(uint16_t));
@@ -426,20 +431,23 @@
 
     //SM_SIZELIMITED_SLICE: init, set pSliceSeg->iSliceNumInFrame = 1;
     pSliceSeg->iSliceNumInFrame = GetInitialSliceNum (kiMbWidth, kiMbHeight, pSliceArgument);
-
     if (-1 == pSliceSeg->iSliceNumInFrame)
       return 1;
 
     pSliceSeg->pCountMbNumInSlice = (int32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (int32_t),
                                     "pSliceSeg->pCountMbNumInSlice");
-
     WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pCountMbNumInSlice)
 
     pSliceSeg->pFirstMbInSlice = (int32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (int32_t),
                                     "pSliceSeg->pFirstMbInSlice");
-
     WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pFirstMbInSlice)
+
+    pSliceSeg->pSliceConsumeTime = (uint32_t*)pMa->WelsMalloc (pSliceSeg->iSliceNumInFrame * sizeof (uint32_t),
+                                                             "pSliceSeg->pSliceConsumeTime");
+    WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pSliceConsumeTime)
+
     pSliceSeg->uiSliceMode      = pSliceArgument->uiSliceMode;
+
     pSliceSeg->iMbWidth         = kiMbWidth;
     pSliceSeg->iMbHeight        = kiMbHeight;
     pSliceSeg->iMbNumInFrame    = kiCountMbNum;
@@ -487,6 +495,11 @@
       pMa->WelsFree (pSliceSeg->pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
 
       pSliceSeg->pCountMbNumInSlice = NULL;
+    }
+    if (NULL != pSliceSeg->pSliceConsumeTime) {
+      pMa->WelsFree (pSliceSeg->pSliceConsumeTime, "pSliceSeg->pSliceConsumeTime");
+
+      pSliceSeg->pSliceConsumeTime = NULL;
     }
 
     pSliceSeg->iMbNumInFrame    = 0;
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -192,14 +192,6 @@
 
 
 // CWelsLoadBalancingSlicingEncodingTask
-CWelsLoadBalancingSlicingEncodingTask::CWelsLoadBalancingSlicingEncodingTask (sWelsEncCtx* pCtx,
-    const int32_t iSliceIdx) :
-  CWelsSliceEncodingTask (pCtx, iSliceIdx) {
-}
-
-CWelsLoadBalancingSlicingEncodingTask::~CWelsLoadBalancingSlicingEncodingTask() {
-}
-
 WelsErrorType CWelsLoadBalancingSlicingEncodingTask::InitTask() {
   WelsErrorType iReturn = CWelsSliceEncodingTask::InitTask();
   if (ENC_RETURN_SUCCESS != iReturn) {
@@ -217,12 +209,12 @@
 void CWelsLoadBalancingSlicingEncodingTask::FinishTask() {
   CWelsSliceEncodingTask::FinishTask();
 
-  m_pCtx->pSliceThreading->pSliceConsumeTime[m_uiDependencyId][m_iSliceIdx] = (uint32_t) (WelsTime() - m_iSliceStart);
+  m_pCtx->pCurDqLayer->pSliceEncCtx->pSliceConsumeTime[m_iSliceIdx] = (uint32_t) (WelsTime() - m_iSliceStart);
   WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
            "[MT] CWelsLoadBalancingSlicingEncodingTask()FinishTask, coding_idx %d, um_iSliceIdx %d, pSliceConsumeTime %d, iSliceSize %d, pFirstMbInSlice %d, count_num_mb_in_slice %d",
            m_pCtx->iCodingIndex,
            m_iSliceIdx,
-           m_pCtx->pSliceThreading->pSliceConsumeTime[m_uiDependencyId][m_iSliceIdx],
+           m_pCtx->pCurDqLayer->pSliceEncCtx->pSliceConsumeTime[m_iSliceIdx],
            m_iSliceSize,
            m_pCtx->pCurDqLayer->pSliceEncCtx->pFirstMbInSlice[m_iSliceIdx],
            m_pCtx->pCurDqLayer->pSliceEncCtx->pCountMbNumInSlice[m_iSliceIdx]);
--- a/codec/encoder/core/src/wels_task_management.cpp
+++ b/codec/encoder/core/src/wels_task_management.cpp
@@ -123,7 +123,8 @@
   }
 
   for (int idx = 0; idx < kiTaskCount; idx++) {
-    pTask = WELS_NEW_OP (CWelsSliceEncodingTask (pEncCtx, idx), CWelsSliceEncodingTask);
+    pTask = WELS_NEW_OP (CWelsLoadBalancingSlicingEncodingTask (pEncCtx, idx), CWelsLoadBalancingSlicingEncodingTask);
+    //TODO: set this after loadbalancing flagpTask = WELS_NEW_OP (CWelsSliceEncodingTask (pEncCtx, idx), CWelsSliceEncodingTask);
     WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pTask)
     m_cEncodingTaskList->push_back (pTask);
   }
--- a/test/api/encode_decode_api_test.cpp
+++ b/test/api/encode_decode_api_test.cpp
@@ -3921,3 +3921,122 @@
   }
 }
 
+
+TEST_F (EncodeDecodeTestAPI, TriggerLoadBalancing) {
+  int iSpatialLayerNum = 1;
+  int iWidth       = WelsClip3 ((((rand() % MAX_WIDTH) >> 1)  + 1) << 1, (64 << 2), MAX_WIDTH);
+  int iHeight      = WelsClip3 ((((rand() % MAX_HEIGHT) >> 1)  + 1) << 1, (64 << 2),
+                                2240);//TODO: use MAX_HEIGHT after the limit is removed
+  float fFrameRate = rand() + 0.5f;
+  int iEncFrameNum = WelsClip3 ((rand() % ENCODE_FRAME_NUM) + 1, 1, ENCODE_FRAME_NUM);
+
+  // prepare params
+  SEncParamExt   sParam;
+  encoder_->GetDefaultParams (&sParam);
+  prepareParamDefault (iSpatialLayerNum, 1, iWidth, iHeight, fFrameRate, &sParam);
+  sParam.iMultipleThreadIdc = (rand() % 8) + 1;
+  sParam.bSimulcastAVC = 1;
+  sParam.sSpatialLayers[0].iVideoWidth = iWidth;
+  sParam.sSpatialLayers[0].iVideoHeight = iHeight;
+  sParam.sSpatialLayers[0].sSliceArgument.uiSliceMode = SM_FIXEDSLCNUM_SLICE;
+  sParam.sSpatialLayers[0].sSliceArgument.uiSliceNum = sParam.iMultipleThreadIdc;
+
+  int rv = encoder_->InitializeExt (&sParam);
+  ASSERT_TRUE (rv == cmResultSuccess) << "Init Failed sParam: rv = " << rv;;
+
+  unsigned char*  pBsBuf[MAX_SPATIAL_LAYER_NUM];
+  ISVCDecoder* decoder[MAX_SPATIAL_LAYER_NUM];
+
+  int iIdx = 0;
+  int aLen[MAX_SPATIAL_LAYER_NUM] = {};
+
+  //create decoder
+  for (iIdx = 0; iIdx < iSpatialLayerNum; iIdx++) {
+    pBsBuf[iIdx] = static_cast<unsigned char*> (malloc (iWidth * iHeight * 3 * sizeof (unsigned char) / 2));
+    EXPECT_TRUE (pBsBuf[iIdx] != NULL);
+
+    long rv = WelsCreateDecoder (&decoder[iIdx]);
+    ASSERT_EQ (0, rv);
+    EXPECT_TRUE (decoder[iIdx] != NULL);
+
+    SDecodingParam decParam;
+    memset (&decParam, 0, sizeof (SDecodingParam));
+    decParam.eOutputColorFormat  = videoFormatI420;
+    decParam.uiTargetDqLayer = UCHAR_MAX;
+    decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
+    decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
+
+    rv = decoder[iIdx]->Initialize (&decParam);
+    ASSERT_EQ (0, rv);
+  }
+
+  rv = encoder_->SetOption (ENCODER_OPTION_SVC_ENCODE_PARAM_EXT, &sParam);
+  ASSERT_TRUE (rv == cmResultSuccess) << "SetOption Failed pParam: rv = " << rv;
+
+  //begin testing
+  for (int iFrame = 0; iFrame < iEncFrameNum; iFrame++) {
+    int iResult;
+    int iLayerLen = 0;
+    unsigned char* pData[3] = { NULL };
+
+    InitialEncDec (sParam.iPicWidth, sParam.iPicHeight);
+    int frameSize = EncPic.iPicWidth * EncPic.iPicHeight * 3 / 2;
+    memset (buf_.data(), rand() % 256, (frameSize >> 2));
+    memset (buf_.data() + (frameSize >> 2), rand() % 256, (frameSize - (frameSize >> 2)));
+
+    int iStartStrip = 0;
+    //during first half the complex strip is at top, then during the second half it is at bottom
+    if (iFrame > iEncFrameNum / 2) {
+      iStartStrip = EncPic.iPicHeight * EncPic.iPicWidth;
+    }
+    for (int k = 0; k < (EncPic.iPicHeight / 2); k++) {
+      memset (buf_.data() + k * EncPic.iPicWidth + iStartStrip, rand() % 256, (EncPic.iPicWidth));
+    }
+
+    int rv = encoder_->EncodeFrame (&EncPic, &info);
+    ASSERT_TRUE (rv == cmResultSuccess);
+
+    // init
+    for (iIdx = 0; iIdx < iSpatialLayerNum; iIdx++) {
+      aLen[iIdx] = 0;
+    }
+    for (int iLayer = 0; iLayer < info.iLayerNum; ++iLayer) {
+      iLayerLen = 0;
+      const SLayerBSInfo& layerInfo = info.sLayerInfo[iLayer];
+      for (int iNal = 0; iNal < layerInfo.iNalCount; ++iNal) {
+        iLayerLen += layerInfo.pNalLengthInByte[iNal];
+      }
+
+      iIdx = layerInfo.uiSpatialId;
+      EXPECT_TRUE (iIdx < iSpatialLayerNum) << "iIdx = " << iIdx << ", iSpatialLayerNum = " << iSpatialLayerNum;
+      memcpy ((pBsBuf[iIdx] + aLen[iIdx]), layerInfo.pBsBuf, iLayerLen * sizeof (unsigned char));
+      aLen[iIdx] += iLayerLen;
+    }
+
+    for (iIdx = 0; iIdx < iSpatialLayerNum; iIdx++) {
+      pData[0] = pData[1] = pData[2] = 0;
+      memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+
+#ifdef DEBUG_FILE_SAVE4
+      fwrite (pBsBuf[iIdx], aLen[iIdx], 1, fEnc[iIdx]);
+#endif
+      iResult = decoder[iIdx]->DecodeFrame2 (pBsBuf[iIdx], aLen[iIdx], pData, &dstBufInfo_);
+      EXPECT_TRUE (iResult == cmResultSuccess) << "iResult=" << iResult << ", LayerIdx=" << iIdx;
+
+      iResult = decoder[iIdx]->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_);
+      EXPECT_TRUE (iResult == cmResultSuccess) << "iResult=" << iResult << ", LayerIdx=" << iIdx;
+      EXPECT_EQ (dstBufInfo_.iBufferStatus, 1) << "LayerIdx=" << iIdx;
+    }
+  }
+
+  for (iIdx = 0; iIdx < iSpatialLayerNum; iIdx++) {
+    free (pBsBuf[iIdx]);
+
+    if (decoder[iIdx] != NULL) {
+      decoder[iIdx]->Uninitialize();
+      WelsDestroyDecoder (decoder[iIdx]);
+    }
+
+  }
+}
+