shithub: openh264

Download patch

ref: 095f0b663ecc2aea22fdf4b1b66f1f7f57bcdc45
parent: 623599024f8e4add2aa15ab980c5ee346468f2d0
author: huade <[email protected]>
date: Sun Jan 8 17:42:57 EST 2017

Multi-thread-fixed:RBC#1751:Dynamic slice mode under multiThread reallocate feature enable

--- a/codec/encoder/core/inc/deblocking.h
+++ b/codec/encoder/core/inc/deblocking.h
@@ -83,8 +83,8 @@
 
 void DeblockingFilterFrameAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc);
 
-void DeblockingFilterSliceAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, const int32_t kiSliceIdx);
-void DeblockingFilterSliceAvcbaseNull (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, const int32_t kiSliceIdx);
+void DeblockingFilterSliceAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice);
+void DeblockingFilterSliceAvcbaseNull (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice);
 }
 
 #endif
--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -87,6 +87,7 @@
 bool                            bThreadBsBufferUsage[MAX_THREADS_NUM];
 WELS_MUTEX                      mutexThreadBsBufferUsage;
 WELS_MUTEX                      mutexEvent;
+WELS_MUTEX                      mutexThreadSlcBuffReallocate;
 } SSliceThreading;
 
 #endif//MULTIPLE_THREADING_DEFINES_H__
--- a/codec/encoder/core/inc/svc_enc_frame.h
+++ b/codec/encoder/core/inc/svc_enc_frame.h
@@ -114,10 +114,10 @@
 SPicture*               pRefOri[MAX_REF_PIC_COUNT];
 
 int32_t                 iMaxSliceNum;
-int32_t*                pNumSliceCodedOfPartition;      // for dynamic slicing mode
-int32_t*                pLastCodedMbIdxOfPartition;     // for dynamic slicing mode
-int32_t*                pFirstMbIdxOfPartition;         // for dynamic slicing mode
-int32_t*                pEndMbIdxOfPartition;           // for dynamic slicing mode
+int32_t                 NumSliceCodedOfPartition[MAX_THREADS_NUM];      // for dynamic slicing mode
+int32_t                 LastCodedMbIdxOfPartition[MAX_THREADS_NUM];     // for dynamic slicing mode
+int32_t                 FirstMbIdxOfPartition[MAX_THREADS_NUM];         // for dynamic slicing mode
+int32_t                 EndMbIdxOfPartition[MAX_THREADS_NUM];           // for dynamic slicing mode
 int32_t*                pFirstMbIdxOfSlice;
 int32_t*                pCountMbNumInSlice;
 
@@ -126,7 +126,6 @@
 SFeatureSearchPreparation* pFeatureSearchPreparation;
 
 SDqLayer*               pRefLayer;              // pointer to referencing dq_layer of current layer to be decoded
-
 };
 
 ///////////////////////////////////////////////////////////////////////
--- a/codec/encoder/core/inc/svc_encode_slice.h
+++ b/codec/encoder/core/inc/svc_encode_slice.h
@@ -140,6 +140,9 @@
 
 int32_t ReallocSliceBuffer (sWelsEncCtx* pCtx);
 
+int32_t GetCurLayerNalCount(const SDqLayer* pCurDq, const int32_t kiCodedSliceNum);
+int32_t GetTotalCodedNalCount(SFrameBSInfo* pFbi);
+
 int32_t FrameBsRealloc (sWelsEncCtx* pCtx,
                         SFrameBSInfo* pFrameBsInfo,
                         SLayerBSInfo* pLayerBsInfo,
@@ -148,7 +151,7 @@
 int32_t SliceLayerInfoUpdate (sWelsEncCtx* pCtx,
                               SFrameBSInfo* pFrameBsInfo,
                               SLayerBSInfo* pLayerBsInfo,
-                              const int32_t kiPartitionNum);
+                              const SliceModeEnum kuiSliceMode);
 
 //slice encoding process
 int32_t WelsCodePSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice);
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -83,7 +83,7 @@
     int32_t iBeta);
 typedef void (*PDeblockingBSCalc) (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
                                    int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag);
-typedef void (*PDeblockingFilterSlice) (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, const int32_t kiSliceIdx);
+typedef void (*PDeblockingFilterSlice) (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice);
 
 typedef struct tagDeblockingFunc {
   PLumaDeblockingLT4Func    pfLumaDeblockingLT4Ver;
--- a/codec/encoder/core/src/deblocking.cpp
+++ b/codec/encoder/core/src/deblocking.cpp
@@ -690,9 +690,9 @@
   }
 }
 
-void DeblockingFilterSliceAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, const int32_t kiSliceIdx) {
+void DeblockingFilterSliceAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice) {
   SMB* pMbList                          = pCurDq->sMbDataP;
-  SSliceHeaderExt* sSliceHeaderExt      = &pCurDq->ppSliceInLayer[kiSliceIdx]->sSliceHeaderExt;
+  SSliceHeaderExt* sSliceHeaderExt      = &pSlice->sSliceHeaderExt;
   SMB* pCurrentMbBlock;
 
   const int32_t kiMbWidth               = pCurDq->iMbWidth;
@@ -738,14 +738,12 @@
   }
 }
 
-void DeblockingFilterSliceAvcbaseNull (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, const int32_t kiSliceIdx) {
+void DeblockingFilterSliceAvcbaseNull (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice) {
 }
 
 void PerformDeblockingFilter (sWelsEncCtx* pEnc) {
-  const int32_t kiCurDid                = pEnc->uiDependencyId;
-  SWelsSvcCodingParam* pSvcParam        = pEnc->pSvcParam;
-  SSpatialLayerConfig* pSpatialLayer    = &pSvcParam->sSpatialLayers[kiCurDid];
-  SDqLayer* pCurLayer                   = pEnc->pCurDqLayer;
+  SDqLayer* pCurLayer = pEnc->pCurDqLayer;
+  SSlice* pSlice      = NULL;
 
   if (pCurLayer->iLoopFilterDisableIdc == 0) {
     DeblockingFilterFrameAvcbase (pCurLayer, pEnc->pFuncList);
@@ -753,26 +751,13 @@
     int32_t iSliceCount = 0;
     int32_t iSliceIdx   = 0;
 
-    if (SM_SIZELIMITED_SLICE != pSpatialLayer->sSliceArgument.uiSliceMode) {
-      iSliceCount = GetCurrentSliceNum (pCurLayer);
-      do {
-        DeblockingFilterSliceAvcbase (pCurLayer, pEnc->pFuncList, iSliceIdx);
-        ++ iSliceIdx;
-      } while (iSliceIdx < iSliceCount);
-    } else { // for dynamic slicing mode
-      const int32_t kiNumPicPartition = pEnc->iActiveThreadsNum;
-      int32_t iPartitionIdx = 0;
-
-      while (iPartitionIdx < kiNumPicPartition) {
-        iSliceCount     = pCurLayer->pNumSliceCodedOfPartition[iPartitionIdx];
-        iSliceIdx       = iPartitionIdx;
-        do {
-          DeblockingFilterSliceAvcbase (pCurLayer, pEnc->pFuncList, iSliceIdx);
-          iSliceIdx += kiNumPicPartition;
-        } while (iSliceIdx < iSliceCount);
-        ++ iPartitionIdx;
-      }
-    }
+    iSliceCount = GetCurrentSliceNum (pCurLayer);
+    do {
+      pSlice = pCurLayer->ppSliceInLayer[iSliceIdx];
+      assert( NULL != pSlice);
+      DeblockingFilterSliceAvcbase (pCurLayer, pEnc->pFuncList, pSlice);
+      ++ iSliceIdx;
+    } while (iSliceIdx < iSliceCount);
   }
 }
 
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -927,7 +927,7 @@
   for (; iIdx < MAX_THREADS_NUM; iIdx ++) {
     FreeSliceBuffer (pDq->sSliceThreadInfo.pSliceInThread[iIdx],
                      pDq->sSliceThreadInfo.iMaxSliceNumInThread[iIdx],
-                     pMa, "pSliceInLayer");
+                     pMa, "pSliceInThread");
   }
 }
 
@@ -939,40 +939,20 @@
   FreeSliceInLayer (pDq, pMa);
 
   if (pDq->ppSliceInLayer) {
-    pMa->WelsFree (pDq->ppSliceInLayer, "pDq->ppSliceInLayer");
+    pMa->WelsFree (pDq->ppSliceInLayer, "ppSliceInLayer");
     pDq->ppSliceInLayer = NULL;
   }
 
   if (pDq->pFirstMbIdxOfSlice) {
-    pMa->WelsFree (pDq->pFirstMbIdxOfSlice, "pDq->pFirstMbIdxOfSlice");
+    pMa->WelsFree (pDq->pFirstMbIdxOfSlice, "pFirstMbIdxOfSlice");
     pDq->pFirstMbIdxOfSlice = NULL;
   }
 
   if (pDq->pCountMbNumInSlice) {
-    pMa->WelsFree (pDq->pCountMbNumInSlice, "pDq->pCountMbNumInSlice");
+    pMa->WelsFree (pDq->pCountMbNumInSlice, "pCountMbNumInSlice");
     pDq->pCountMbNumInSlice = NULL;
   }
 
-  if (pDq->pNumSliceCodedOfPartition) {
-    pMa->WelsFree (pDq->pNumSliceCodedOfPartition, "pNumSliceCodedOfPartition");
-    pDq->pNumSliceCodedOfPartition = NULL;
-  }
-
-  if (pDq->pLastCodedMbIdxOfPartition) {
-    pMa->WelsFree (pDq->pLastCodedMbIdxOfPartition, "pLastCodedMbIdxOfPartition");
-    pDq->pLastCodedMbIdxOfPartition = NULL;
-  }
-
-  if (pDq->pFirstMbIdxOfPartition) {
-    pMa->WelsFree (pDq->pFirstMbIdxOfPartition, "pFirstMbIdxOfPartition");
-    pDq->pFirstMbIdxOfPartition = NULL;
-  }
-
-  if (pDq->pEndMbIdxOfPartition) {
-    pMa->WelsFree (pDq->pEndMbIdxOfPartition, "pEndMbIdxOfPartition");
-    pDq->pEndMbIdxOfPartition = NULL;
-  }
-
   if (pDq->pFeatureSearchPreparation) {
     ReleaseFeatureSearchPreparation (pMa, pDq->pFeatureSearchPreparation->pFeatureOfBlock);
     pMa->WelsFree (pDq->pFeatureSearchPreparation, "pFeatureSearchPreparation");
@@ -1083,21 +1063,6 @@
     pDqLayer = (SDqLayer*)pMa->WelsMallocz (sizeof (SDqLayer), "pDqLayer");
     WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer), FreeDqLayer (pDqLayer, pMa))
 
-    // for dynamic slicing mode
-    if (SM_SIZELIMITED_SLICE == pDlayer->sSliceArgument.uiSliceMode) {
-      const int32_t iSize                       = pParam->iMultipleThreadIdc * sizeof (int32_t);
-
-      pDqLayer->pNumSliceCodedOfPartition       = (int32_t*)pMa->WelsMallocz (iSize, "pNumSliceCodedOfPartition");
-      pDqLayer->pLastCodedMbIdxOfPartition      = (int32_t*)pMa->WelsMallocz (iSize, "pLastCodedMbIdxOfPartition");
-      pDqLayer->pFirstMbIdxOfPartition          = (int32_t*)pMa->WelsMallocz (iSize, "pFirstMbIdxOfPartition");
-      pDqLayer->pEndMbIdxOfPartition            = (int32_t*)pMa->WelsMallocz (iSize, "pEndMbIdxOfPartition");
-      WELS_VERIFY_RETURN_PROC_IF (1,
-                                  (NULL == pDqLayer->pNumSliceCodedOfPartition  ||
-                                   NULL == pDqLayer->pLastCodedMbIdxOfPartition ||
-                                   NULL == pDqLayer->pFirstMbIdxOfPartition     ||
-                                   NULL == pDqLayer->pEndMbIdxOfPartition),
-                                  FreeDqLayer (pDqLayer, pMa))
-    }
     pDqLayer->bNeedAdjustingSlicing = false;
 
     pDqLayer->iMbWidth  = kiMbW;
@@ -2453,7 +2418,13 @@
   else if (iPartitionNum > AVERSLICENUM_CONSTRAINT)
     iPartitionNum = AVERSLICENUM_CONSTRAINT; // AVERSLICENUM_CONSTRAINT might be variable, however not fixed by MACRO
   iCountMbNumPerPartition /= iPartitionNum;
+  if(iCountMbNumPerPartition == 0 || iCountMbNumPerPartition == 1) {
+    iCountMbNumPerPartition = kiMbNumInFrame;
+    iPartitionNum           = 1;
+  }
+
   pSliceCtx->iSliceNumInFrame = iPartitionNum;
+
   i = 0;
   while (i < iPartitionNum) {
     if (i + 1 == iPartitionNum) {
@@ -2462,8 +2433,10 @@
       iCountMbNumInPartition = iCountMbNumPerPartition;
     }
 
-    pCurDq->pFirstMbIdxOfPartition[i] = iFirstMbIdx;
-    pCurDq->pEndMbIdxOfPartition[i]   = iFirstMbIdx + iCountMbNumInPartition - 1;
+    pCurDq->FirstMbIdxOfPartition[i]     = iFirstMbIdx;
+    pCurDq->EndMbIdxOfPartition[i]       = iFirstMbIdx + iCountMbNumInPartition - 1;
+    pCurDq->LastCodedMbIdxOfPartition[i] = 0;
+    pCurDq->NumSliceCodedOfPartition[i]  = 0;
 
     WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdx, i,
                                iCountMbNumInPartition, sizeof (uint16_t));
@@ -2473,6 +2446,14 @@
     iAssignableMbLeft -= iCountMbNumInPartition;
     ++ i;
   }
+
+  while(i<MAX_THREADS_NUM) {
+    pCurDq->FirstMbIdxOfPartition[i]     = 0;
+    pCurDq->EndMbIdxOfPartition[i]       = 0;
+    pCurDq->LastCodedMbIdxOfPartition[i] = 0;
+    pCurDq->NumSliceCodedOfPartition[i]  = 0;
+    ++ i;
+  }
 }
 
 void WelsInitCurrentDlayerMltslc (sWelsEncCtx* pCtx, int32_t iPartitionNum) {
@@ -2537,12 +2518,11 @@
   SSlice*   pBaseSlice          = pCurDq->ppSliceInLayer[0];
   const uint8_t kiCurDid        = pCtx->uiDependencyId;
   const bool kbUseSubsetSpsFlag = (!pParam->bSimulcastAVC) && (kiCurDid > BASE_DEPENDENCY_ID);
-  SSpatialLayerConfig* fDlp     = &pParam->sSpatialLayers[kiCurDid];
   SNalUnitHeaderExt* pNalHdExt  = &pCurDq->sLayerInfo.sNalHeaderExt;
   SNalUnitHeader* pNalHd        = &pNalHdExt->sNalUnitHeader;
   SDqIdc* pDqIdc                = &pCtx->pDqIdcMap[kiCurDid];
   int32_t iIdx                  = 0;
-  int32_t iSliceCount           = 0;
+  int32_t iSliceCount           = pCurDq->iMaxSliceNum;
   SSpatialLayerInternal* pParamInternal = &pParam->sDependencyLayers[kiCurDid];
   if (NULL == pCurDq || NULL == pBaseSlice)
     return;
@@ -2549,10 +2529,6 @@
 
   pCurDq->pDecPic = pDecPic;
 
-  if (fDlp->sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE) // need get extra slices for update
-    iSliceCount = GetInitialSliceNum (pCurDq->iMbWidth, pCurDq->iMbHeight, &fDlp->sSliceArgument);
-  else
-    iSliceCount = GetCurrentSliceNum (pCurDq);
   assert (iSliceCount > 0);
 
   int32_t iCurPpsId = pDqIdc->iPpsId;
@@ -2809,7 +2785,7 @@
  * \brief   prefetch reference picture after WelsBuildRefList
  */
 static inline void PrefetchReferencePicture (sWelsEncCtx* pCtx, const EVideoFrameType keFrameType) {
-  const int32_t kiSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer);
+  const int32_t kiSliceCount = pCtx->pCurDqLayer->iMaxSliceNum;
   int32_t iIdx = 0;
   uint8_t uiRefIdx = -1;
 
@@ -3494,6 +3470,7 @@
                                          pCtx->pSvcParam->sSpatialLayers[pCtx->pSvcParam->iSpatialLayerNum - 1].fFrameRate);
   for (int32_t iNalIdx = 0; iNalIdx < MAX_LAYER_NUM_OF_FRAME; iNalIdx++) {
     pFbi->sLayerInfo[iNalIdx].eFrameType = videoFrameTypeSkip;
+    pFbi->sLayerInfo[iNalIdx].iNalCount  = 0;
   }
   // perform csc/denoise/downsample/padding, generate spatial layers
   iSpatialNum = pCtx->pVpp->BuildSpatialPicList (pCtx, pSrcPic);
@@ -3666,7 +3643,6 @@
 
     //TODO Complexity Calculation here for screen content
     iLayerSize = 0;
-
     if (SM_SINGLE_SLICE == pParam->sSliceArgument.uiSliceMode) { // only one slice within a sQualityStat layer
       int32_t iSliceSize   = 0;
       int32_t iPayloadSize = 0;
@@ -3744,7 +3720,14 @@
         pLayerBsInfo->eFrameType    = eFrameType;
         pLayerBsInfo->iSubSeqId = GetSubSequenceId (pCtx, eFrameType);
 
-        //InitAllSlicesInThread(pCtx);
+        int32_t iRet = InitAllSlicesInThread(pCtx);
+        if (iRet) {
+          WelsLog (pLogCtx, WELS_LOG_ERROR,
+                "WelsEncoderEncodeExt(), multi-slice (mode %d) InitAllSlicesInThread() error!",
+                   pParam->sSliceArgument.uiSliceMode);
+          return ENC_RETURN_UNEXPECTED;
+        }
+
         pCtx->pTaskManage->ExecuteTasks();
         if (pCtx->iEncoderError) {
           WelsLog (pLogCtx, WELS_LOG_ERROR,
@@ -3753,8 +3736,14 @@
           return pCtx->iEncoderError;
         }
 
-        //TO DO: add update ppSliceInLayer module based on pSliceInThread[ThreadNum]
-        //SliceLayerInfoUpdate (pCtx, pFbi, pLayerBsInfo, 1);
+        iRet = SliceLayerInfoUpdate (pCtx, pFbi, pLayerBsInfo, pParam->sSliceArgument.uiSliceMode);
+        if (iRet) {
+          WelsLog (pLogCtx, WELS_LOG_ERROR,
+                   "WelsEncoderEncodeExt(), multi-slice (mode %d) SliceLayerInfoUpdate() error!",
+                   pParam->sSliceArgument.uiSliceMode);
+          return ENC_RETURN_UNEXPECTED;
+        }
+
         iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount);
       }
       // THREAD_FULLY_FIRE_MODE && SM_SIZELIMITED_SLICE
@@ -3797,6 +3786,13 @@
           ++ iIdx;
         }
 
+        int32_t iRet = InitAllSlicesInThread(pCtx);
+        if (iRet) {
+          WelsLog (pLogCtx, WELS_LOG_ERROR,
+                  "WelsEncoderEncodeExt(), multi-slice (mode %d) InitAllSlicesInThread() error!",
+                   pParam->sSliceArgument.uiSliceMode);
+          return ENC_RETURN_UNEXPECTED;
+        }
         pCtx->pTaskManage->ExecuteTasks();
 
         if (pCtx->iEncoderError) {
@@ -3805,10 +3801,17 @@
                    pParam->sSliceArgument.uiSliceMode, pCtx->iEncoderError);
           return pCtx->iEncoderError;
         }
-#endif     
-        //TO DO: add update ppSliceInLayer module based on pSliceInThread[ThreadNum]
-        // UpdateSliceInLayerInfo(); // reordering
-        iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt);
+#endif
+        iRet = SliceLayerInfoUpdate (pCtx, pFbi, pLayerBsInfo, pParam->sSliceArgument.uiSliceMode);
+        if (iRet) {
+          WelsLog (pLogCtx, WELS_LOG_ERROR,
+                   "WelsEncoderEncodeExt(), multi-slice (mode %d) InitAllSlicesInThread() error!",
+                   pParam->sSliceArgument.uiSliceMode);
+          return ENC_RETURN_UNEXPECTED;
+        }
+
+        iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer);
+        iLayerSize  = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount);
       } else { // for non-dynamic-slicing mode single threading branch..
         const bool bNeedPrefix = pCtx->bNeedPrefixNalFlag;
         int32_t iSliceIdx    = 0;
@@ -3833,7 +3836,6 @@
           pCurSlice = &pCtx->pCurDqLayer->sSliceThreadInfo.pSliceInThread[uiTheadIdx][iSliceIdx];
           assert (iSliceIdx == pCurSlice->iSliceIdx);
           pCtx->iEncoderError   = SetSliceBoundaryInfo(pCtx->pCurDqLayer, pCurSlice, iSliceIdx);
-          WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
 
           pCtx->iEncoderError = WelsCodeOneSlice (pCtx, pCurSlice, eNalType);
           WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
@@ -4492,14 +4494,8 @@
   const int32_t kiSliceIdxStep          = pCtx->iActiveThreadsNum;
   int32_t iReturn = ENC_RETURN_SUCCESS;
 
+  pStartSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = iFirstMbIdxInPartition;
 
-  //init
-  {
-    pStartSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = iFirstMbIdxInPartition;
-    pCurLayer->pNumSliceCodedOfPartition[kiPartitionId] = 1;
-  }
-  pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId] = 0;
-
   while (iAnyMbLeftInPartition > 0) {
     int32_t iSliceSize      = 0;
     int32_t iPayloadSize    = 0;
@@ -4530,7 +4526,7 @@
 
     WelsLoadNal (pCtx->pOut, keNalType, keNalRefIdc);
     pCurSlice = &pCtx->pCurDqLayer->sSliceThreadInfo.pSliceInThread[uiTheadIdx][iSliceIdx];
-    assert (iSliceIdx == pCurSlice->iSliceIdx);
+    pCurSlice->iSliceIdx = iSliceIdx;
 
     iReturn = WelsCodeOneSlice (pCtx, pCurSlice, keNalType);
     WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
@@ -4558,7 +4554,7 @@
 
     ++ iNalIdxInLayer;
     iSliceIdx += kiSliceStep; //if iSliceIdx is not continuous
-    iAnyMbLeftInPartition = iEndMbIdxInPartition - pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId];
+    iAnyMbLeftInPartition = iEndMbIdxInPartition - pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId];
   }
 
   *pLayerSize           = iPartitionBsSize;
--- a/codec/encoder/core/src/nal_encap.cpp
+++ b/codec/encoder/core/src/nal_encap.cpp
@@ -104,7 +104,6 @@
 
   /* count payload size of pRawNal NAL */
   pRawNal->iPayloadSize = kiEndPos - pRawNal->iStartPos;
-
   ++ (*pIdx);
 }
 
--- a/codec/encoder/core/src/ratectl.cpp
+++ b/codec/encoder/core/src/ratectl.cpp
@@ -547,7 +547,7 @@
   SRCTemporal* pTOverRc         = &pWelsSvcRc->pTemporalOverRc[pEncCtx->uiTemporalId];
 
   pWelsSvcRc->iCurrentBitsLevel = BITS_NORMAL;
-//allocate bits
+  //allocate bits
   if (pEncCtx->eSliceType == I_SLICE) {
     pWelsSvcRc->iTargetBits = pWelsSvcRc->iBitsPerFrame * IDR_BITRATE_RATIO;
   } else {
--- a/codec/encoder/core/src/ref_list_mgr_svc.cpp
+++ b/codec/encoder/core/src/ref_list_mgr_svc.cpp
@@ -493,7 +493,7 @@
 
 void WelsMarkPic (sWelsEncCtx* pCtx) {
   SLTRState* pLtr               = &pCtx->pLtr[pCtx->uiDependencyId];
-  const int32_t kiCountSliceNum = GetCurrentSliceNum (pCtx->pCurDqLayer);
+  const int32_t kiCountSliceNum = pCtx->pCurDqLayer->iMaxSliceNum;
 
   if (pCtx->pSvcParam->bEnableLongTermReference && pLtr->bLTRMarkEnable && pCtx->uiTemporalId == 0) {
     if (!pLtr->bReceivedT0LostFlag && pLtr->uiLtrMarkInterval > pCtx->pSvcParam->iLtrMarkPeriod
@@ -660,7 +660,7 @@
 
 void WelsUpdateSliceHeaderSyntax (sWelsEncCtx* pCtx,  const int32_t iAbsDiffPicNumMinus1,
                                   SSlice** ppSliceList, const int32_t uiFrameType) {
-  const int32_t kiCountSliceNum = GetCurrentSliceNum (pCtx->pCurDqLayer);
+  const int32_t kiCountSliceNum = pCtx->pCurDqLayer->iMaxSliceNum;
   SLTRState* pLtr               = &pCtx->pLtr[pCtx->uiDependencyId];
   int32_t iIdx = 0;
 
@@ -985,7 +985,7 @@
     }
   }
 
-  const int32_t iSliceNum = GetCurrentSliceNum (pCtx->pCurDqLayer);
+  const int32_t iSliceNum = pCtx->pCurDqLayer->iMaxSliceNum;
 
   WlesMarkMMCORefInfoScreen (pCtx, pLtr, pCtx->pCurDqLayer->ppSliceInLayer, iSliceNum);
 
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -99,6 +99,7 @@
   int32_t iSliceIdx             = 0;
   int32_t iAvI[MAX_SLICES_NUM];
 
+  assert(kiSliceCount <= MAX_SLICES_NUM);
   WelsEmms();
 
   while (iSliceIdx < kiSliceCount) {
@@ -354,9 +355,14 @@
     WELS_VERIFY_RETURN_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx]))
   }
   iReturn = WelsMutexInit (&pSmt->mutexThreadBsBufferUsage);
-  WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn))
+  WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))
+
   iReturn = WelsMutexInit (&pSmt->mutexEvent);
-  WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn))
+  WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx));
+
+  iReturn = WelsMutexInit (&pSmt->mutexThreadSlcBuffReallocate);
+  WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))
+
   iReturn = WelsMutexInit (& (*ppCtx)->mutexEncoderError);
   WELS_VERIFY_RETURN_IF (1, (WELS_THREAD_ERROR_OK != iReturn))
 
@@ -405,6 +411,7 @@
 
   WelsMutexDestroy (&pSmt->mutexSliceNumUpdate);
   WelsMutexDestroy (&pSmt->mutexThreadBsBufferUsage);
+  WelsMutexDestroy (&pSmt->mutexThreadSlcBuffReallocate);
   WelsMutexDestroy (& ((*ppCtx)->mutexEncoderError));
   WelsMutexDestroy (&pSmt->mutexEvent);
   if (pSmt->pThreadPEncCtx != NULL) {
@@ -436,75 +443,36 @@
 }
 
 int32_t AppendSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, const int32_t iSliceCount) {
-  SWelsSvcCodingParam* pCodingParam     = pCtx->pSvcParam;
-  SSpatialLayerConfig* pDlp             = &pCodingParam->sSpatialLayers[pCtx->uiDependencyId];
-  SSlice** ppSliceInlayer               = pCtx->pCurDqLayer->ppSliceInLayer;
-  SWelsSliceBs* pSliceBs                = NULL;
-  const bool kbIsDynamicSlicingMode     = (pDlp->sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE);
+  SSlice** ppSliceInlayer = pCtx->pCurDqLayer->ppSliceInLayer;
+  SWelsSliceBs* pSliceBs  = NULL;
+  int32_t iLayerSize      = 0;
+  int32_t iNalIdxBase     = pLbi->iNalCount;
+  int32_t iSliceIdx       = 0;
 
-  int32_t iLayerSize    = 0;
-  int32_t iNalIdxBase   = pLbi->iNalCount;
-  int32_t iSliceIdx     = 0;
+  iNalIdxBase  = pLbi->iNalCount = 0;
+  while (iSliceIdx < iSliceCount) {
+    pSliceBs    = &ppSliceInlayer[iSliceIdx]->sSliceBs;
+    if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
+    int32_t iNalIdx = 0;
+    const int32_t iCountNal = pSliceBs->iNalIndex;
 
-  if (!kbIsDynamicSlicingMode) {
-    iNalIdxBase   = pLbi->iNalCount = 0;
-    while (iSliceIdx < iSliceCount) {
-      pSliceBs    = &ppSliceInlayer[iSliceIdx]->sSliceBs;
-      if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
-        int32_t iNalIdx = 0;
-        const int32_t iCountNal = pSliceBs->iNalIndex;
-
 #if MT_DEBUG_BS_WR
-        assert (pSliceBs->bSliceCodedFlag);
+    assert (pSliceBs->bSliceCodedFlag);
 #endif//MT_DEBUG_BS_WR
 
-        memmove (pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage
-        pCtx->iPosBsBuffer += pSliceBs->uiBsPos;
+    memmove (pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage
+    pCtx->iPosBsBuffer += pSliceBs->uiBsPos;
 
-        iLayerSize += pSliceBs->uiBsPos;
+    iLayerSize += pSliceBs->uiBsPos;
 
-        while (iNalIdx < iCountNal) {
-          pLbi->pNalLengthInByte[iNalIdxBase + iNalIdx] = pSliceBs->iNalLen[iNalIdx];
-          ++ iNalIdx;
-        }
-        pLbi->iNalCount += iCountNal;
-        iNalIdxBase     += iCountNal;
-      }
-      ++ iSliceIdx;
+    while (iNalIdx < iCountNal) {
+        pLbi->pNalLengthInByte[iNalIdxBase + iNalIdx] = pSliceBs->iNalLen[iNalIdx];
+        ++ iNalIdx;
     }
-  } else { // for SM_SIZELIMITED_SLICE
-    const int32_t kiPartitionCnt        = iSliceCount;
-    int32_t iPartitionIdx               = 0;
-
-    // due partition_0 has been written to pFrameBsBuffer
-    // so iLayerSize need add it
-    while (iPartitionIdx < kiPartitionCnt) {
-      const int32_t kiCountSlicesCoded = pCtx->pCurDqLayer->pNumSliceCodedOfPartition[iPartitionIdx];
-      int32_t iIdx = 0;
-
-      iSliceIdx = iPartitionIdx;
-      while (iIdx < kiCountSlicesCoded) {
-        pSliceBs = &ppSliceInlayer[iSliceIdx]->sSliceBs;
-        if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
-          memmove (pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage
-          pCtx->iPosBsBuffer += pSliceBs->uiBsPos;
-
-          const int32_t iCountNal = pSliceBs->iNalIndex;
-          iLayerSize += pSliceBs->uiBsPos;
-          int32_t iNalIdx = 0;
-          while (iNalIdx < iCountNal) {
-            pLbi->pNalLengthInByte[iNalIdxBase + iNalIdx] = pSliceBs->iNalLen[iNalIdx];
-            ++ iNalIdx;
-          }
-          pLbi->iNalCount += iCountNal;
-          iNalIdxBase     += iCountNal;
-        }
-
-        iSliceIdx += kiPartitionCnt;
-        ++ iIdx;
-      }
-      ++ iPartitionIdx;
+    pLbi->iNalCount += iCountNal;
+    iNalIdxBase     += iCountNal;
     }
+    ++ iSliceIdx;
   }
 
   return iLayerSize;
@@ -650,14 +618,6 @@
                                         iEventIdx);
         }
 
-        iReturn = SetSliceBoundaryInfo(pEncPEncCtx->pCurDqLayer, pSlice, iSliceIdx);
-        if (ENC_RETURN_SUCCESS != iReturn) {
-          uiThrdRet = iReturn;
-          WELS_THREAD_SIGNAL_AND_BREAK (pEncPEncCtx->pSliceThreading->pSliceCodedEvent,
-                                        pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent,
-                                        iEventIdx);
-        }
-
         assert (iSliceIdx == pSlice->iSliceIdx);
         iReturn = WelsCodeOneSlice (pEncPEncCtx, pSlice, eNalType);
         if (ENC_RETURN_SUCCESS != iReturn) {
@@ -677,7 +637,7 @@
                                         iEventIdx);
         }
 
-        pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx);
+        pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, pSlice);
 
         if (bDsaFlag) {
           pSlice->uiSliceConsumeTime = (uint32_t) (WelsTime() - iSliceStart);
@@ -710,16 +670,13 @@
       } else { // for SM_SIZELIMITED_SLICE parallelization
         const int32_t kiPartitionId             = iThreadIdx;
         const int32_t kiSliceIdxStep            = pEncPEncCtx->iActiveThreadsNum;
-        const int32_t kiFirstMbInPartition      = pCurDq->pFirstMbIdxOfPartition[kiPartitionId];
-        const int32_t kiEndMbIdxInPartition     = pCurDq->pEndMbIdxOfPartition[kiPartitionId];
+        const int32_t kiFirstMbInPartition      = pCurDq->FirstMbIdxOfPartition[kiPartitionId];
+        const int32_t kiEndMbIdxInPartition     = pCurDq->EndMbIdxOfPartition[kiPartitionId];
         int32_t iAnyMbLeftInPartition           = kiEndMbIdxInPartition - kiFirstMbInPartition + 1;
         SSpatialLayerInternal *pParamInternal   = &pCodingParam->sDependencyLayers[kiCurDid];
         iSliceIdx                               = pPrivateData->iSliceIndex;
         SSlice* pStartSlice                     = &pCurDq->sSliceThreadInfo.pSliceInThread[iThreadIdx][iSliceIdx];
         pStartSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = kiFirstMbInPartition;
-        pCurDq->pNumSliceCodedOfPartition[kiPartitionId]          = 1;
-        pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]         = 0;
-
         while (iAnyMbLeftInPartition > 0) {
           if (iSliceIdx >= pCurDq->iMaxSliceNum) {
             // TODO: need exception handler for not large enough of MAX_SLICES_NUM related memory usage
@@ -778,7 +735,7 @@
                                           iEventIdx);
           }
 
-          pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, iSliceIdx);
+          pEncPEncCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, pEncPEncCtx->pFuncList, pSlice);
 
 #if defined(SLICE_INFO_OUTPUT)
           fprintf (stderr,
@@ -791,12 +748,12 @@
 #endif//SLICE_INFO_OUTPUT
 
           MT_TRACE_LOG (&(pEncPEncCtx->sLogCtx), WELS_LOG_INFO,
-                        "[MT] CodingSliceThreadProc(), coding_idx %d, iPartitionId %d, iSliceIdx %d, iSliceSize %d, count_mb_slice %d, iEndMbInPartition %d, pCurDq->pLastCodedMbIdxOfPartition[%d] %d\n",
+                        "[MT] CodingSliceThreadProc(), coding_idx %d, iPartitionId %d, iSliceIdx %d, iSliceSize %d, count_mb_slice %d, iEndMbInPartition %d, pCurDq->LastCodedMbIdxOfPartition[%d] %d\n",
                         pEncPEncCtx->iCodingIndex, kiPartitionId, iSliceIdx, iSliceSize,
                         pSlice->iCountMbNumInSlice,
-                        kiEndMbIdxInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
+                        kiEndMbIdxInPartition, kiPartitionId, pCurDq->LastCodedMbIdxOfPartition[kiPartitionId]);
 
-          iAnyMbLeftInPartition = kiEndMbIdxInPartition - pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId];
+          iAnyMbLeftInPartition = kiEndMbIdxInPartition - pCurDq->LastCodedMbIdxOfPartition[kiPartitionId];
           iSliceIdx += kiSliceIdxStep;
         }
 
--- a/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/codec/encoder/core/src/svc_encode_slice.cpp
@@ -640,9 +640,9 @@
 
     if (DynSlcJudgeSliceBoundaryStepBack (pEncCtx, pSlice, pSliceCtx, pCurMb, &sDss)) { //islice
       pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
-      pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx -
-          1; // update pLastCodedMbIdxOfPartition, need to -1 due to stepping back
-      ++ pCurLayer->pNumSliceCodedOfPartition[kiPartitionId];
+      pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx -
+          1; // update LastCodedMbIdxOfPartition, need to -1 due to stepping back
+      ++ pCurLayer->NumSliceCodedOfPartition[kiPartitionId];
 
       break;
     }
@@ -661,9 +661,10 @@
     iNextMbIdx = WelsGetNextMbOfSlice (pCurLayer, iCurMbIdx);
     //whether all of MB in current pSlice encoded or not
     if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbCoded >= kiTotalNumMb) {
-      pSlice->iCountMbNumInSlice = iCurMbIdx - pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId];
-      pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId] =
-        iCurMbIdx; // update pLastCodedMbIdxOfPartition, finish coding, use iCurMbIdx directly
+      pSlice->iCountMbNumInSlice = iCurMbIdx - pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId];
+      pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx;
+      ++ pCurLayer->NumSliceCodedOfPartition[kiPartitionId];
+
       break;
     }
   }
@@ -754,10 +755,11 @@
 
 //Allocate slice's MB cache buffer
 int32_t AllocMbCacheAligned (SMbCache* pMbCache, CMemoryAlign* pMa) {
-  pMbCache->pCoeffLevel = (int16_t*)pMa->WelsMallocz (MB_COEFF_LIST_SIZE * sizeof (int16_t), "pMbCache->pCoeffLevel");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pCoeffLevel));
   pMbCache->pMemPredMb = (uint8_t*)pMa->WelsMallocz (2 * 256 * sizeof (uint8_t), "pMbCache->pMemPredMb");
   WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredMb));
+
+  pMbCache->pCoeffLevel = (int16_t*)pMa->WelsMallocz (MB_COEFF_LIST_SIZE * sizeof (int16_t), "pMbCache->pCoeffLevel");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pCoeffLevel));
   pMbCache->pSkipMb = (uint8_t*)pMa->WelsMallocz (384 * sizeof (uint8_t), "pMbCache->pSkipMb");
   WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pSkipMb));
   pMbCache->pMemPredBlk4 = (uint8_t*)pMa->WelsMallocz (2 * 16 * sizeof (uint8_t), "pMbCache->pMemPredBlk4");
@@ -895,7 +897,7 @@
 
   if (bIndependenceBsBuffer) {
     pSlice->pSliceBsa      = &pSlice->sSliceBs.sBsWrite;
-    pSlice->sSliceBs.pBs   = (uint8_t*)pMa->WelsMalloc (iMaxSliceBufferSize, "SliceBs");
+    pSlice->sSliceBs.pBs   = (uint8_t*)pMa->WelsMalloc (iMaxSliceBufferSize, "sSliceBs.pBs");
     if (NULL == pSlice->sSliceBs.pBs) {
       return ENC_RETURN_MEMALLOCERR;
     }
@@ -979,17 +981,22 @@
 }
 
 int32_t InitAllSlicesInThread (sWelsEncCtx* pCtx) {
-  SDqLayer* pDqLayer = pCtx->pCurDqLayer;
-  int32_t iSliceIdx  = 0;
+  SDqLayer* pCurDqLayer = pCtx->pCurDqLayer;
+  int32_t iSliceIdx     = 0;
+  int32_t iTheadIdx     = 0;
 
-  for( ; iSliceIdx < pDqLayer->iMaxSliceNum; iSliceIdx++) {
-    if(NULL == pDqLayer->ppSliceInLayer[iSliceIdx]) {
+  for( ; iSliceIdx < pCurDqLayer->iMaxSliceNum; iSliceIdx++) {
+    if(NULL == pCurDqLayer->ppSliceInLayer[iSliceIdx]) {
       return ENC_RETURN_UNEXPECTED;
     }
 
-    pDqLayer->ppSliceInLayer[iSliceIdx]->iSliceIdx = -1;
+    pCurDqLayer->ppSliceInLayer[iSliceIdx]->iSliceIdx = -1;
   }
 
+  for( ; iTheadIdx < pCtx->iActiveThreadsNum; iTheadIdx++ ) {
+    pCurDqLayer->sSliceThreadInfo.iEncodedSliceNumInThread[iTheadIdx] = 0;
+  }
+
   return ENC_RETURN_SUCCESS;
 }
 
@@ -1000,17 +1007,13 @@
                               const int32_t kiSliceIdx) {
   SDqLayer* pDqLayer                  = pCtx->pCurDqLayer;
   const int32_t kiCodedNumInThread    = pDqLayer->sSliceThreadInfo.iEncodedSliceNumInThread[kiThreadIdx];
-  const int32_t kiMaxSliceNumInThread = pDqLayer->sSliceThreadInfo.iMaxSliceNumInThread[kiThreadIdx];
-  int32_t iRet                        = 0;
 
-  if (kiCodedNumInThread >= kiMaxSliceNumInThread) {
-    iRet = ReallocateSliceInThread(pCtx, pDqLayer, kiDlayerIdx, kiThreadIdx);
-    if (ENC_RETURN_SUCCESS != iRet) {
-      return iRet;
-    }
-  }
+  assert(kiCodedNumInThread < pDqLayer->sSliceThreadInfo.iMaxSliceNumInThread[kiThreadIdx] -1 );
 
-  pSlice = pDqLayer->sSliceThreadInfo.pSliceInThread [kiThreadIdx] + kiCodedNumInThread;
+  pSlice = &pDqLayer->sSliceThreadInfo.pSliceInThread [kiThreadIdx][kiCodedNumInThread];
+  pSlice->iSliceIdx   = kiSliceIdx;
+  pSlice->uiThreadIdx = kiThreadIdx;
+
   // Initialize slice bs buffer info
   pSlice->sSliceBs.uiBsPos   = 0;
   pSlice->sSliceBs.iNalIndex = 0;
@@ -1030,6 +1033,7 @@
   int32_t iIdx                        = 0;
   int32_t iRet                        = 0;
 
+  assert (iThreadNum > 0);
   iMaxSliceNumInThread = pDqLayer->iMaxSliceNum;
   while (iIdx < iThreadNum) {
     pSliceThreadInfo->iMaxSliceNumInThread[iIdx]     = iMaxSliceNumInThread;
@@ -1078,7 +1082,10 @@
     return ENC_RETURN_MEMALLOCERR;
   }
 
-  pDqLayer->iMaxSliceNum = iMaxSliceNum * pCtx->iActiveThreadsNum;
+  pDqLayer->iMaxSliceNum = 0;
+  for(iThreadIdx = 0; iThreadIdx < pCtx->iActiveThreadsNum; iThreadIdx++ ) {
+    pDqLayer->iMaxSliceNum += pDqLayer->sSliceThreadInfo.iMaxSliceNumInThread[iThreadIdx];
+  }
 
   pDqLayer->ppSliceInLayer = (SSlice**)pMa->WelsMallocz (sizeof (SSlice*) * pDqLayer->iMaxSliceNum, "ppSliceInLayer");
   if (NULL ==  pDqLayer->ppSliceInLayer) {
@@ -1120,14 +1127,14 @@
     return;
   }
 
-  SSliceHeaderExt* pBaseSHExt  = &pBaseSlice->sSliceHeaderExt;
-  SSliceHeaderExt* pSHExt      = &pSlice->sSliceHeaderExt;
+  SSliceHeaderExt* pBaseSHExt = &pBaseSlice->sSliceHeaderExt;
+  SSliceHeaderExt* pSHExt     = &pSlice->sSliceHeaderExt;
 
-  pSlice->bSliceHeaderExtFlag  = pBaseSlice->bSliceHeaderExtFlag;
-  pSHExt->sSliceHeader.iPpsId     = pBaseSHExt->sSliceHeader.iPpsId;
-  pSHExt->sSliceHeader.pPps       = pBaseSHExt->sSliceHeader.pPps;
-  pSHExt->sSliceHeader.iSpsId     = pBaseSHExt->sSliceHeader.iSpsId;
-  pSHExt->sSliceHeader.pSps       = pBaseSHExt->sSliceHeader.pSps;
+  pSlice->bSliceHeaderExtFlag = pBaseSlice->bSliceHeaderExtFlag;
+  pSHExt->sSliceHeader.iPpsId = pBaseSHExt->sSliceHeader.iPpsId;
+  pSHExt->sSliceHeader.pPps   = pBaseSHExt->sSliceHeader.pPps;
+  pSHExt->sSliceHeader.iSpsId = pBaseSHExt->sSliceHeader.iSpsId;
+  pSHExt->sSliceHeader.pSps   = pBaseSHExt->sSliceHeader.pSps;
 }
 
 void InitSliceRefInfoWithBase (SSlice* pSlice, SSlice* pBaseSlice, const uint8_t kuiRefCount) {
@@ -1181,7 +1188,7 @@
     return ENC_RETURN_INVALIDINPUT;
   }
 
-  pNewSliceList = (SSlice*)pMA->WelsMallocz (sizeof (SSlice) * kiMaxSliceNumNew, "Slice");
+  pNewSliceList = (SSlice*)pMA->WelsMallocz (sizeof (SSlice) * kiMaxSliceNumNew, "pSliceInThread");
   if (NULL == pNewSliceList) {
     WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::ReallocateSliceList: pNewSliceList is NULL");
     return ENC_RETURN_MEMALLOCERR;
@@ -1188,21 +1195,34 @@
   }
 
   memcpy (pNewSliceList, pSliceList, sizeof (SSlice) * kiMaxSliceNumOld);
-  iSliceIdx   = kiMaxSliceNumOld;
-  pBaseSlice  = &pSliceList[0];
+
+  //update Bs writer
+  for (iSliceIdx = 0; iSliceIdx < kiMaxSliceNumOld; iSliceIdx++) {
+    pSlice = pNewSliceList + iSliceIdx;
+    if (NULL == pSlice) {
+      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceInThread");
+      return ENC_RETURN_MEMALLOCERR;
+    }
+
+    if (bIndependenceBsBuffer) {
+      pSlice->pSliceBsa  = &pSlice->sSliceBs.sBsWrite;
+    }
+  }
+
+  pBaseSlice = &pSliceList[0];
   if (NULL == pBaseSlice) {
     FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "ReallocateSliceList()::InitSliceBsBuffer()");
     return ENC_RETURN_MEMALLOCERR;
   }
 
-  for (; iSliceIdx < kiMaxSliceNumNew; iSliceIdx++) {
+  for (iSliceIdx = kiMaxSliceNumOld; iSliceIdx < kiMaxSliceNumNew; iSliceIdx++) {
     pSlice = pNewSliceList + iSliceIdx;
     if (NULL == pSlice) {
-      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "ReallocateSliceList()::InitSliceBsBuffer()");
+      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceInThread");
       return ENC_RETURN_MEMALLOCERR;
     }
 
-    pSlice->iSliceIdx = iSliceIdx;
+    pSlice->iSliceIdx = -1;
     pSlice->uiThreadIdx = 0;
     pSlice->iCountMbNumInSlice = 0;
     pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice = 0;
@@ -1213,13 +1233,13 @@
                               iMaxSliceBufferSize,
                               pMA);
     if (ENC_RETURN_SUCCESS != iRet) {
-      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "ReallocateSliceList()::InitSliceBsBuffer()");
+      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceInThread");
       return iRet;
     }
 
     iRet = AllocateSliceMBBuffer (pSlice, pMA);
     if (ENC_RETURN_SUCCESS != iRet) {
-      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "ReallocateSliceList()::InitSliceBsBuffer()");
+      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceInThread");
       return iRet;
     }
 
@@ -1228,12 +1248,12 @@
 
     iRet = InitSliceRC (pSlice, pCtx->iGlobalQp);
     if (ENC_RETURN_SUCCESS != iRet) {
-      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "ReallocateSliceList()::InitSliceBsBuffer()");
+      FreeSliceBuffer(pNewSliceList, kiMaxSliceNumNew, pMA, "pSliceInThread");
       return iRet;
     }
   }
 
-  pMA->WelsFree (pSliceList, "Slice");
+  pMA->WelsFree (pSliceList, "pSliceInThread");
   pSliceList = pNewSliceList;
 
   return ENC_RETURN_SUCCESS;
@@ -1290,6 +1310,8 @@
     return iRet;
   }
 
+  pDqLayer->sSliceThreadInfo.iMaxSliceNumInThread[kiThreadIndex] = iMaxSliceNumUpdate;
+
   return ENC_RETURN_SUCCESS;
 }
 
@@ -1299,11 +1321,11 @@
   CMemoryAlign* pMA            = pCtx->pMemAlign;
   SDqLayer* pCurLayer          = pCtx->pCurDqLayer;
   SSlice** ppSlice             = NULL;
-  int32_t* pFirstMbIdxofSlice  = NULL;
+  int32_t* pFirstMbIdxOfSlice  = NULL;
   int32_t* pCountMbNumInSlice  = NULL;
 
   // update for ppsliceInlayer
-  ppSlice = (SSlice**)pMA->WelsMallocz (sizeof (SSlice*) * kiMaxSliceNumNew, "ppSlice");
+  ppSlice = (SSlice**)pMA->WelsMallocz (sizeof (SSlice*) * kiMaxSliceNumNew, "ppSliceInLayer");
   if (NULL == ppSlice) {
     WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::ExtendLayerBuffer: ppSlice is NULL");
     return ENC_RETURN_MEMALLOCERR;
@@ -1312,15 +1334,15 @@
   pCurLayer->ppSliceInLayer = ppSlice;
 
   // update for pFirstMbIdxInSlice
-  pFirstMbIdxofSlice = (int32_t*)pMA->WelsMallocz (sizeof (int32_t*) * kiMaxSliceNumNew, "pFirstMbIdxofSlice");
-  if (NULL == pFirstMbIdxofSlice) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::ExtendLayerBuffer: pFirstMbIdxofSlice is NULL");
+  pFirstMbIdxOfSlice = (int32_t*)pMA->WelsMallocz (sizeof (int32_t*) * kiMaxSliceNumNew, "pFirstMbIdxOfSlice");
+  if (NULL == pFirstMbIdxOfSlice) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::ExtendLayerBuffer: pFirstMbIdxOfSlice is NULL");
     return ENC_RETURN_MEMALLOCERR;
   }
-  memset (pFirstMbIdxofSlice, 0, sizeof(int32_t) * kiMaxSliceNumNew);
-  memcpy (pFirstMbIdxofSlice, pCurLayer->pFirstMbIdxOfSlice, sizeof (int32_t) * kiMaxSliceNumOld);
-  pMA->WelsFree (pCurLayer->pFirstMbIdxOfSlice, "pFirstMbIdxofSlice");
-  pCurLayer->pFirstMbIdxOfSlice = pFirstMbIdxofSlice;
+  memset (pFirstMbIdxOfSlice, 0, sizeof(int32_t) * kiMaxSliceNumNew);
+  memcpy (pFirstMbIdxOfSlice, pCurLayer->pFirstMbIdxOfSlice, sizeof (int32_t) * kiMaxSliceNumOld);
+  pMA->WelsFree (pCurLayer->pFirstMbIdxOfSlice, "pFirstMbIdxOfSlice");
+  pCurLayer->pFirstMbIdxOfSlice = pFirstMbIdxOfSlice;
 
   // update for pCountMbNumInSlice
   pCountMbNumInSlice = (int32_t*)pMA->WelsMallocz (sizeof (int32_t*) * kiMaxSliceNumNew, "pCountMbNumInSlice");
@@ -1405,10 +1427,10 @@
   return ENC_RETURN_SUCCESS;
 }
 
-
-int32_t ReOrderSliceInLayer (SDqLayer* pCurLayer,
-                             const int32_t kiThreadNum,
-                             const int32_t kiPartitionNum) {
+int32_t ReOrderSliceInLayer (sWelsEncCtx* pCtx,
+                             const SliceModeEnum kuiSliceMode,
+                             const int32_t kiThreadNum) {
+  SDqLayer* pCurLayer       = pCtx->pCurDqLayer;
   SSlice* pSliceInThread    = NULL;
   int32_t iThreadIdx        = 0;
   int32_t iPartitionIdx     = 0;
@@ -1419,12 +1441,19 @@
   int32_t iActualSliceIdx   = 0;
   int32_t iNonUsedBufferNum = 0;
   int32_t iUsedSliceNum     = 0;
+
+  int32_t iPartitionNum     = 0;
   int32_t aiPartitionOffset[MAX_THREADS_NUM] = {0};
 
-  //for non-dynamic slice mode, kiPartitionNum = 1, iPartitionOffset = 0
-  for(iPartitionIdx = 0; iPartitionIdx < kiPartitionNum; iPartitionIdx++) {
+  //for non-dynamic slice mode, iPartitionNum = 1, iPartitionOffset = 0
+  iPartitionNum = (SM_SIZELIMITED_SLICE == kuiSliceMode) ? kiThreadNum : 1;
+  for(iPartitionIdx = 0; iPartitionIdx < iPartitionNum; iPartitionIdx++) {
     aiPartitionOffset[iPartitionIdx] = iEncodeSliceNum;
-    iEncodeSliceNum                 += pCurLayer->pNumSliceCodedOfPartition[iPartitionIdx];
+    if ( SM_SIZELIMITED_SLICE == kuiSliceMode) {
+      iEncodeSliceNum  += pCurLayer->NumSliceCodedOfPartition[iPartitionIdx];
+    } else {
+      iEncodeSliceNum = pCurLayer->sSliceEncCtx.iSliceNumInFrame;
+    }
   }
 
   if( iEncodeSliceNum != pCurLayer->sSliceEncCtx.iSliceNumInFrame) {
@@ -1443,8 +1472,9 @@
       }
 
       if( -1 != pSliceInThread->iSliceIdx) {
-        iPartitionID    = pSliceInThread->iSliceIdx % kiPartitionNum;
-        iActualSliceIdx = aiPartitionOffset[iPartitionID] + pSliceInThread->iSliceIdx / kiPartitionNum;
+        iPartitionID    = pSliceInThread->iSliceIdx % iPartitionNum;
+        iActualSliceIdx = aiPartitionOffset[iPartitionID] + pSliceInThread->iSliceIdx / iPartitionNum;
+        pSliceInThread->iSliceIdx                  = iActualSliceIdx;
         pCurLayer->ppSliceInLayer[iActualSliceIdx] = pSliceInThread;
         iUsedSliceNum ++;
       } else {
@@ -1466,6 +1496,29 @@
   return ENC_RETURN_SUCCESS;
 }
 
+int32_t GetCurLayerNalCount(const SDqLayer* pCurDq, const int32_t kiCodedSliceNum) {
+  int32_t iTotalNalCount  = 0;
+  int32_t iSliceIdx       = 0;
+  SWelsSliceBs* pSliceBs  = NULL;
+  for ( ; iSliceIdx < kiCodedSliceNum; iSliceIdx++ ) {
+    pSliceBs  = &pCurDq->ppSliceInLayer[iSliceIdx]->sSliceBs;
+    if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
+      iTotalNalCount += pSliceBs->iNalIndex;
+    }
+  }
+
+  return iTotalNalCount;
+}
+
+int32_t GetTotalCodedNalCount(SFrameBSInfo* pFbi) {
+  int32_t iTotalCodedNalCount = 0;
+  for (int32_t iNalIdx = 0; iNalIdx < MAX_LAYER_NUM_OF_FRAME; iNalIdx++) {
+    iTotalCodedNalCount += pFbi->sLayerInfo[iNalIdx].iNalCount;
+  }
+
+  return iTotalCodedNalCount;
+}
+
 int32_t FrameBsRealloc (sWelsEncCtx* pCtx,
                         SFrameBSInfo* pFrameBsInfo,
                         SLayerBSInfo* pLayerBsInfo,
@@ -1508,33 +1561,27 @@
 int32_t SliceLayerInfoUpdate (sWelsEncCtx* pCtx,
                               SFrameBSInfo* pFrameBsInfo,
                               SLayerBSInfo* pLayerBsInfo,
-                              const int32_t kiPartitionNum) {
-  SDqLayer* pCurLayer     = pCtx->pCurDqLayer;
-  int32_t iMaxSliceNum    = 0;
-  int32_t iThreadIdx      = 0;
-  int32_t iRet            = 0;
-  int32_t iThreadNum      = 1; //TODO: should be equal to pCurLayer->iMaxSliceNum;
+                              const SliceModeEnum kuiSliceMode) {
+  int32_t iMaxSliceNum   = 0;
+  int32_t iCodedSliceNum = 0;
+  int32_t iCodedNalCount = 0;
+  int32_t iRet           = 0;
 
-  for ( ; iThreadIdx < iThreadNum; iThreadIdx++) {
-    iMaxSliceNum += pCurLayer->sSliceThreadInfo.iMaxSliceNumInThread[iThreadIdx];
+  for ( int32_t iThreadIdx = 0; iThreadIdx < pCtx->iActiveThreadsNum; iThreadIdx++) {
+    iMaxSliceNum += pCtx->pCurDqLayer->sSliceThreadInfo.iMaxSliceNumInThread[iThreadIdx];
   }
 
   //reallocate ppSliceInLayer if total encoded slice num exceed max slice num
-  if (iMaxSliceNum > pCurLayer->iMaxSliceNum) {
-    iRet = FrameBsRealloc (pCtx, pFrameBsInfo, pLayerBsInfo, pCtx->pCurDqLayer->iMaxSliceNum);
-    if(ENC_RETURN_SUCCESS != iRet) {
-      return iRet;
-    }
-
+  if (iMaxSliceNum > pCtx->pCurDqLayer->iMaxSliceNum) {
     iRet = ExtendLayerBuffer(pCtx, pCtx->pCurDqLayer->iMaxSliceNum, iMaxSliceNum);
     if (ENC_RETURN_SUCCESS != iRet) {
       return iRet;
     }
-    pCurLayer->iMaxSliceNum = iMaxSliceNum;
+    pCtx->pCurDqLayer->iMaxSliceNum = iMaxSliceNum;
   }
 
   //update ppSliceInLayer based on pSliceInThread, reordering based on slice index
-  iRet = ReOrderSliceInLayer (pCurLayer, iThreadNum, kiPartitionNum);
+  iRet = ReOrderSliceInLayer (pCtx, kuiSliceMode, pCtx->iActiveThreadsNum);
   if (ENC_RETURN_SUCCESS != iRet) {
     WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
              "CWelsH264SVCEncoder::SliceLayerInfoUpdate: ReOrderSliceInLayer failed");
@@ -1541,6 +1588,18 @@
     return iRet;
   }
 
+  //Extend NalList buffer if exceed
+  iCodedSliceNum          = GetCurrentSliceNum (pCtx->pCurDqLayer);
+  pLayerBsInfo->iNalCount = GetCurLayerNalCount(pCtx->pCurDqLayer, iCodedSliceNum);
+  iCodedNalCount          = GetTotalCodedNalCount(pFrameBsInfo);
+
+  if( iCodedNalCount > pCtx->pOut->iCountNals) {
+    iRet = FrameBsRealloc (pCtx, pFrameBsInfo, pLayerBsInfo, pCtx->pCurDqLayer->iMaxSliceNum);
+    if(ENC_RETURN_SUCCESS != iRet) {
+      return iRet;
+    }
+  }
+
   return ENC_RETURN_SUCCESS;
 }
 
@@ -1602,7 +1661,8 @@
 void AddSliceBoundary (sWelsEncCtx* pEncCtx, SSlice* pCurSlice, SSliceCtx* pSliceCtx, SMB* pCurMb,
                        int32_t iFirstMbIdxOfNextSlice, const int32_t kiLastMbIdxInPartition) {
   SDqLayer*     pCurLayer       = pEncCtx->pCurDqLayer;
-  SSlice*       pSliceInThread  = pCurLayer->sSliceThreadInfo.pSliceInThread[0];
+  SSlice*       pSliceInThread  = pCurLayer->sSliceThreadInfo.pSliceInThread[pCurSlice->uiThreadIdx];
+  int32_t       iCodedSliceNum  = pCurLayer->sSliceThreadInfo.iEncodedSliceNumInThread[pCurSlice->uiThreadIdx];
   int32_t       iCurMbIdx       = pCurMb->iMbXY;
   uint16_t      iCurSliceIdc    = pSliceCtx->pOverallMbMap[ iCurMbIdx ];
   const int32_t kiSliceIdxStep  = pEncCtx->iActiveThreadsNum;
@@ -1615,7 +1675,11 @@
   pCurSlice->sSliceHeaderExt.uiNumMbsInSlice = 1 + iCurMbIdx - pCurSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
 
   //pNextSlice pointer/initialization
-  pNextSlice = &pSliceInThread[ iNextSliceIdc ];
+  if(pEncCtx->iActiveThreadsNum > 1) {
+    pNextSlice = &pSliceInThread[ iCodedSliceNum + 1 ];
+  } else {
+    pNextSlice = &pSliceInThread[ iNextSliceIdc ];
+  }
 
 #if _DEBUG
   assert (NULL != pNextSlice);
@@ -1643,7 +1707,7 @@
   int32_t iPosBitOffset = 0;
   const int32_t  kiActiveThreadsNum    = pEncCtx->iActiveThreadsNum;
   const int32_t  kiPartitaionId        = pCurSlice->iSliceIdx % kiActiveThreadsNum;
-  const int32_t  kiEndMbIdxOfPartition = pEncCtx->pCurDqLayer->pEndMbIdxOfPartition[kiPartitaionId];
+  const int32_t  kiEndMbIdxOfPartition = pEncCtx->pCurDqLayer->EndMbIdxOfPartition[kiPartitaionId];
   const int32_t  kiMaxSliceNum         = pEncCtx->pCurDqLayer->sSliceThreadInfo.iMaxSliceNumInThread[pCurSlice->uiThreadIdx];
   const bool    kbCurMbNotFirstMbOfCurSlice  = ((iCurMbIdx > 0) && (pSliceCtx->pOverallMbMap[iCurMbIdx] ==
       pSliceCtx->pOverallMbMap[iCurMbIdx - 1]));
@@ -1676,37 +1740,14 @@
     //      as iMaxSliceNum is always equal to iMaxSliceNumConstraint in origin design
     //      and will also extend when reallocated,
     //  tmp change is:  iMaxSliceNumConstraint is alway set to be MAXSLICENUM, will not change even reallocate
-    const bool kbSliceNumNotExceedConstraint = pSliceCtx->iSliceNumInFrame < kiMaxSliceNum;
-    const bool kbSliceIdxNotExceedConstraint = ((int) pCurSlice->iSliceIdx + kiActiveThreadsNum) < kiMaxSliceNum;
-    const bool kbSliceNumReachConstraint     = (pSliceCtx->iSliceNumInFrame == kiMaxSliceNum);
+    AddSliceBoundary (pEncCtx, pCurSlice, pSliceCtx, pCurMb, iCurMbIdx, kiEndMbIdxOfPartition);
+    ++ pSliceCtx->iSliceNumInFrame;
 
-    //DYNAMIC_SLICING_ONE_THREAD: judge jump_avoiding_pack_exceed
-    if (kbSliceNumNotExceedConstraint && kbSliceIdxNotExceedConstraint) {//able to add new pSlice
-
-      AddSliceBoundary (pEncCtx, pCurSlice, pSliceCtx, pCurMb, iCurMbIdx, kiEndMbIdxOfPartition);
-
-      ++ pSliceCtx->iSliceNumInFrame;
-
-      if (pEncCtx->pSvcParam->iMultipleThreadIdc > 1) {
-        WelsMutexUnlock (&pEncCtx->pSliceThreading->mutexSliceNumUpdate);
-      }
-
-      return true;
-    }
     if (pEncCtx->pSvcParam->iMultipleThreadIdc > 1) {
       WelsMutexUnlock (&pEncCtx->pSliceThreading->mutexSliceNumUpdate);
     }
 
-    if ((kbSliceNumReachConstraint || !kbSliceIdxNotExceedConstraint)
-        && kbCurMbNotLastMbOfCurPartition
-        && JUMPPACKETSIZE_JUDGE (uiLen, iCurMbIdx,
-                                 pSliceCtx->uiSliceSizeConstraint - ((kiEndMbIdxOfPartition - iCurMbIdx) <<
-                                     (pCurSlice->uiAssumeLog2BytePerMb) //assume each MB consumes these byte under largest QP
-                                                                    ))
-       ) {
-      // to minimize the impact under the risk of exceeding the size constraint when pSlice num reaches constraint
-      pCurSlice->bDynamicSlicingSliceSizeCtrlFlag = true;
-    }
+    return true;
   }
 
   return false;
@@ -1903,9 +1944,9 @@
     sDss.iCurrentPos = pEncCtx->pFuncList->pfGetBsPosition (pSlice);
     if (DynSlcJudgeSliceBoundaryStepBack (pEncCtx, pSlice, pSliceCtx, pCurMb, &sDss)) {
       pSlice->iMbSkipRun = pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
-      pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx -
-          1; // update pLastCodedMbIdxOfPartition, need to -1 due to stepping back
-      ++ pCurLayer->pNumSliceCodedOfPartition[kiPartitionId];
+      pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx -
+          1; // update LastCodedMbIdxOfPartition, need to -1 due to stepping back
+      ++ pCurLayer->NumSliceCodedOfPartition[kiPartitionId];
 
       break;
     }
@@ -1926,8 +1967,9 @@
     iNextMbIdx = WelsGetNextMbOfSlice (pCurLayer, iCurMbIdx);
     //whether all of MB in current pSlice encoded or not
     if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbCoded >= kiTotalNumMb) {
-      pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId] =
-        iCurMbIdx; // update pLastCodedMbIdxOfPartition, finish coding, use pCurMb_idx directly
+      pCurLayer->LastCodedMbIdxOfPartition[kiPartitionId] = iCurMbIdx;
+      ++ pCurLayer->NumSliceCodedOfPartition[kiPartitionId];
+
       break;
     }
   }
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -112,13 +112,12 @@
     return ENC_RETURN_UNEXPECTED;
   }
 
-  //m_pCtx->iEncoderError = InitOneSliceInThread (m_pCtx, m_pSlice, m_iThreadIdx, m_pCtx->uiDependencyId, m_iSliceIdx);
-  //WELS_VERIFY_RETURN_IFNEQ (m_pCtx->iEncoderError, ENC_RETURN_SUCCESS)
-  m_pSlice = &m_pCtx->pCurDqLayer->sSliceThreadInfo.pSliceInThread[0][m_iSliceIdx];
+  int32_t iReturn = InitOneSliceInThread (m_pCtx, m_pSlice, m_iThreadIdx, m_pCtx->uiDependencyId, m_iSliceIdx);
+  WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
   m_pSliceBs = &m_pSlice->sSliceBs;
 
-  m_pSliceBs->uiBsPos       = 0;
-  m_pSliceBs->iNalIndex     = 0;
+  iReturn   = SetSliceBoundaryInfo(m_pCtx->pCurDqLayer, m_pSlice, m_iSliceIdx);
+  WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
 
   m_pCtx->iEncoderError   = SetSliceBoundaryInfo(m_pCtx->pCurDqLayer, m_pSlice, m_iSliceIdx);
   WELS_VERIFY_RETURN_IFNEQ (m_pCtx->iEncoderError, ENC_RETURN_SUCCESS)
@@ -150,6 +149,7 @@
 }
 
 WelsErrorType CWelsSliceEncodingTask::ExecuteTask() {
+
 #if MT_DEBUG_BS_WR
   m_pSliceBs->bSliceCodedFlag = false;
 #endif//MT_DEBUG_BS_WR
@@ -176,7 +176,6 @@
 
   m_iSliceSize = 0;
   iReturn      = WriteSliceBs (m_pCtx, m_pSliceBs, m_iSliceIdx, m_iSliceSize);
-
   if (ENC_RETURN_SUCCESS != iReturn) {
     WelsLog (&m_pCtx->sLogCtx, WELS_LOG_WARNING,
              "[MT] CWelsSliceEncodingTask ExecuteTask(), WriteSliceBs not successful: coding_idx %d, um_iSliceIdx %d",
@@ -185,7 +184,7 @@
     return iReturn;
   }
 
-  m_pCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (m_pCtx->pCurDqLayer, m_pCtx->pFuncList, m_iSliceIdx);
+  m_pCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (m_pCtx->pCurDqLayer, m_pCtx->pFuncList, m_pSlice);
 
   WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DETAIL,
            "@pSlice=%-6d sliceType:%c idc:%d size:%-6d",  m_iSliceIdx,
@@ -196,9 +195,7 @@
 #if MT_DEBUG_BS_WR
   m_pSliceBs->bSliceCodedFlag = true;
 #endif//MT_DEBUG_BS_WR
-
-  //m_pCtx->pCurDqLayer->sSliceThreadInfo.iEncodedSliceNumInThread[m_iThreadIdx] ++;
-
+    m_pCtx->pCurDqLayer->sSliceThreadInfo.iEncodedSliceNumInThread[m_iThreadIdx] ++;
   return ENC_RETURN_SUCCESS;
 }
 
@@ -238,38 +235,42 @@
 WelsErrorType CWelsConstrainedSizeSlicingEncodingTask::ExecuteTask() {
 
   SDqLayer* pCurDq                        = m_pCtx->pCurDqLayer;
-  SSliceCtx* pSliceCtx                    = &pCurDq->sSliceEncCtx;
   const int32_t kiSliceIdxStep            = m_pCtx->iActiveThreadsNum;
   SSpatialLayerInternal* pParamInternal   = &m_pCtx->pSvcParam->sDependencyLayers[m_pCtx->uiDependencyId];
   const int32_t kiPartitionId             = m_iSliceIdx % kiSliceIdxStep;
-  const int32_t kiFirstMbInPartition      = pCurDq->pFirstMbIdxOfPartition[kiPartitionId];
-  const int32_t kiEndMbIdxInPartition     = pCurDq->pEndMbIdxOfPartition[kiPartitionId];
-  m_pSlice                                = &pCurDq->sSliceThreadInfo.pSliceInThread[0][m_iSliceIdx];
+  const int32_t kiFirstMbInPartition      = pCurDq->FirstMbIdxOfPartition[kiPartitionId];
+  const int32_t kiEndMbIdxInPartition     = pCurDq->EndMbIdxOfPartition[kiPartitionId];
+  const int32_t kiCodedSliceNumByThread   = pCurDq->sSliceThreadInfo.iEncodedSliceNumInThread[m_iThreadIdx];
+  m_pSlice                                = &pCurDq->sSliceThreadInfo.pSliceInThread[m_iThreadIdx][kiCodedSliceNumByThread];
   m_pSlice->sSliceHeaderExt.sSliceHeader.iFirstMbInSlice  = kiFirstMbInPartition;
-  pCurDq->pNumSliceCodedOfPartition[kiPartitionId]        = 1;
-  pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]       = 0;
+  int32_t iReturn      = 0;
+  bool bNeedReallocate = false;
 
   //deal with partition: TODO: here SSliceThreadPrivateData is just for parition info and actually has little relationship with threadbuffer, and iThreadIndex is not used in threadpool model, need renaming after removing old logic to avoid confusion
+  int32_t iDiffMbIdx = kiEndMbIdxInPartition - kiFirstMbInPartition;
+  if( 0 == iDiffMbIdx) {
+    m_pSlice->iSliceIdx = -1;
+    return ENC_RETURN_SUCCESS;
+  }
 
-  int32_t iAnyMbLeftInPartition           = kiEndMbIdxInPartition - kiFirstMbInPartition + 1;
+  int32_t iAnyMbLeftInPartition = iDiffMbIdx + 1;
   int32_t iLocalSliceIdx = m_iSliceIdx;
   while (iAnyMbLeftInPartition > 0) {
-    //TODO: will reallocate
-    if (iLocalSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
-      WelsLog (&m_pCtx->sLogCtx, WELS_LOG_WARNING,
-               "[MT] CWelsConstrainedSizeSlicingEncodingTask ExecuteTask() coding_idx %d, uiLocalSliceIdx %d, pSliceCtx->iMaxSliceNumConstraint %d",
-               pParamInternal->iCodingIndex,
-               iLocalSliceIdx, pSliceCtx->iMaxSliceNumConstraint);
-      return ENC_RETURN_KNOWN_ISSUE;
-    }
+      bNeedReallocate = (pCurDq->sSliceThreadInfo.iEncodedSliceNumInThread[m_iThreadIdx]
+                         >=  pCurDq->sSliceThreadInfo.iMaxSliceNumInThread[m_iThreadIdx] -1) ? true : false;
+      if (bNeedReallocate) {
+          WelsMutexLock (&m_pCtx->pSliceThreading->mutexThreadSlcBuffReallocate);
+          //for memory statistic variable
+          iReturn = ReallocateSliceInThread(m_pCtx, pCurDq, m_pCtx->uiDependencyId, m_iThreadIdx);
+          WelsMutexUnlock (&m_pCtx->pSliceThreading->mutexThreadSlcBuffReallocate);
+          if (ENC_RETURN_SUCCESS != iReturn) {
+              return iReturn;
+          }
+      }
 
-    m_pSlice = &pCurDq->sSliceThreadInfo.pSliceInThread[0][iLocalSliceIdx];
+    iReturn = InitOneSliceInThread (m_pCtx, m_pSlice, m_iThreadIdx, m_pCtx->uiDependencyId, iLocalSliceIdx);
+    WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
     m_pSliceBs = &m_pSlice->sSliceBs;
-
-    m_pSliceBs->uiBsPos     = 0;
-    m_pSliceBs->iNalIndex   = 0;
-
-    SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, m_pSlice);
     InitBits (&m_pSliceBs->sBsWrite, m_pSliceBs->pBsBuffer, m_pSliceBs->uiSize);
 
     if (m_bNeedPrefix) {
@@ -302,7 +303,7 @@
       return iReturn;
     }
 
-    m_pCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, m_pCtx->pFuncList, iLocalSliceIdx);
+    m_pCtx->pFuncList->pfDeblocking.pfDeblockingFilterSlice (pCurDq, m_pCtx->pFuncList, m_pSlice);
 
     WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DETAIL,
              "@pSlice=%-6d sliceType:%c idc:%d size:%-6d\n",
@@ -313,12 +314,14 @@
             );
 
     WelsLog (&m_pCtx->sLogCtx, WELS_LOG_DEBUG,
-             "[MT] CWelsConstrainedSizeSlicingEncodingTask(), coding_idx %d, iPartitionId %d, m_iThreadIdx %d, iLocalSliceIdx %d, m_iSliceSize %d, ParamValidationExt(), invalid uiMaxNalSizeiEndMbInPartition %d, pCurDq->pLastCodedMbIdxOfPartition[%d] %d\n",
+             "[MT] CWelsConstrainedSizeSlicingEncodingTask(), coding_idx %d, iPartitionId %d, m_iThreadIdx %d, iLocalSliceIdx %d, m_iSliceSize %d, ParamValidationExt(), invalid uiMaxNalSizeiEndMbInPartition %d, pCurDq->LastCodedMbIdxOfPartition[%d] %d\n",
              pParamInternal->iCodingIndex, kiPartitionId, m_iThreadIdx, iLocalSliceIdx, m_iSliceSize,
-             kiEndMbIdxInPartition, kiPartitionId, pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId]);
+             kiEndMbIdxInPartition, kiPartitionId, pCurDq->LastCodedMbIdxOfPartition[kiPartitionId]);
 
-    iAnyMbLeftInPartition = kiEndMbIdxInPartition - pCurDq->pLastCodedMbIdxOfPartition[kiPartitionId];
+    iAnyMbLeftInPartition = kiEndMbIdxInPartition - pCurDq->LastCodedMbIdxOfPartition[kiPartitionId];
     iLocalSliceIdx += kiSliceIdxStep;
+    m_pCtx->pCurDqLayer->sSliceThreadInfo.iEncodedSliceNumInThread[m_iThreadIdx] ++;
+
   }
 
   return ENC_RETURN_SUCCESS;
--- a/test/api/encoder_test.cpp
+++ b/test/api/encoder_test.cpp
@@ -130,7 +130,7 @@
   },
   {
     "res/Cisco_Absolute_Power_1280x720_30fps.yuv",
-    {"5f4580523b761bebfb02b7a780df2a696eaddd33"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, false
+    {"d6a42dfac92eb8e9a29f59b2da73e6af41f6d803"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, false
   },
   {
     "res/Cisco_Absolute_Power_1280x720_30fps.yuv",
@@ -139,6 +139,7 @@
     { "fe7c5fa35c5c8f1eac0408b93d356a6d0bb7e5bd", "91ba01ef0067b0d211a6fbe5e2f96e82ce319a41" },
     CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SINGLE_SLICE, false, 4, false, false, false
   },
+
   // the following values may be adjusted for times since we start tuning the strategy
   {
     "res/CiscoVT2people_320x192_12fps.yuv",
@@ -154,25 +155,28 @@
   },
   {
     "res/Cisco_Absolute_Power_1280x720_30fps.yuv",
-    {"c2c53f088acce547851d9adc81042376adb3a50f"}, SCREEN_CONTENT_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, false
+    {"d625df1540b074ee4b13034c728b9a02f8bb1371"}, SCREEN_CONTENT_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, false
   },
   //for different strategy
   {
     "res/Cisco_Absolute_Power_1280x720_30fps.yuv",
-    {"4398b0d99cdb06197441b42ee495a003aa1f25f0"}, SCREEN_CONTENT_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, true, true, false
+    {"d1c2ee11318521a911718d883d720e9e83bf533f"}, SCREEN_CONTENT_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, true, true, false
   },
   {
     "res/CiscoVT2people_320x192_12fps.yuv",
     {"ab23d0ae245283b3f12db766df155ced7b4a610e"}, CAMERA_VIDEO_REAL_TIME, 320, 192, 12.0f, SM_SINGLE_SLICE, false, 1, false, false, true //turn on cabac
   },
+
   {
     "res/Cisco_Absolute_Power_1280x720_30fps.yuv",
-    {"7e59555ff5832e65e776ced13b35d054cf93a32c"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, true
+    {"a511da916ba97967e18540953d2c0b1287b50aa3"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_SIZELIMITED_SLICE, false, 1, false, false, true
   },
+
   {
     "res/Cisco_Absolute_Power_1280x720_30fps.yuv",
     {"c82965b5119341b288a7c39135ce44521a6f23e1"}, CAMERA_VIDEO_REAL_TIME, 1280, 720, 30.0f, SM_FIXEDSLCNUM_SLICE, false, 1, false, false, true
   },
+
 };
 
 INSTANTIATE_TEST_CASE_P (EncodeFile, EncoderOutputTest,