shithub: openh264

Download patch

ref: d868289147497f0806e1b2ebeff715018c374d42
parent: 9a81539c5b758d745ddb9960ef714013cfaeb56f
author: xiaotianshicomcast <[email protected]>
date: Wed Dec 19 10:04:22 EST 2018

Add clean CAVLC B-Frame decoder support without whitespace change and general fix.

--- a/codec/decoder/core/inc/decode_slice.h
+++ b/codec/decoder/core/inc/decode_slice.h
@@ -42,6 +42,10 @@
 
 int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx);
 int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+
+int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx);
+int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+
 typedef int32_t (*PWelsDecMbFunc) (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
 
 int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
--- a/codec/decoder/core/inc/parse_mb_syn_cavlc.h
+++ b/codec/decoder/core/inc/parse_mb_syn_cavlc.h
@@ -131,6 +131,7 @@
  */
 int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
                         PBitStringAux pBs);
-
+int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A],
+                         int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs);
 } // namespace WelsDec
 #endif//WELS_PARSE_MB_SYN_CAVLC_H__
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -1525,6 +1525,8 @@
   } else {
     if (P_SLICE == pSliceHeader->eSliceType) {
       pDecMbFunc = WelsDecodeMbCavlcPSlice;
+    } else if (B_SLICE == pSliceHeader->eSliceType) {
+      pDecMbFunc = WelsDecodeMbCavlcBSlice;
     } else { //I_SLICE
       pDecMbFunc = WelsDecodeMbCavlcISlice;
     }
@@ -2339,6 +2341,448 @@
              (int64_t) iUsedBits, pBs->iBits);
     return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
   }
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PBitStringAux pBs = pCurLayer->pBitStringAux;
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0];
+  PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1];
+  intX_t iUsedBits;
+  const int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int32_t iBaseModeFlag, i;
+  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
+  uint32_t uiCode;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  if (-1 == pSlice->iMbSkipRun) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
+    pSlice->iMbSkipRun = uiCode;
+    if (-1 == pSlice->iMbSkipRun) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
+    }
+  }
+  if (pSlice->iMbSkipRun--) {
+    int16_t iMv[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+    int8_t  ref[LIST_A] = { 0 };
+
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
+    ST32A4 (&pNzc[0], 0);
+    ST32A4 (&pNzc[4], 0);
+    ST32A4 (&pNzc[8], 0);
+    ST32A4 (&pNzc[12], 0);
+    ST32A4 (&pNzc[16], 0);
+    ST32A4 (&pNzc[20], 0);
+
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    memset (pCurLayer->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
+    memset (pCurLayer->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
+    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && ppRefPicL0[0]->bIsComplete)
+                            || ! (ppRefPicL1[0] && ppRefPicL1[0]->bIsComplete);
+    //predict iMv
+    if (pSliceHeader->iDirectSpatialMvPredFlag) {
+
+      //predict direct spatial mv
+      SubMbType subMbType;
+      int32_t ret = PredMvBDirectSpatial (pCtx, iMv, ref, subMbType);
+      if (ret != ERR_NONE) {
+        return ret;
+      }
+    } else {
+      //temporal direct mode
+      ComputeColocated (pCtx);
+      int32_t ret = PredBDirectTemporal (pCtx, iMv, ref);
+      if (ret != ERR_NONE) {
+        return ret;
+      }
+    }
+
+    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+    //}
+
+    //reset rS
+    if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
+        (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
+      pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+      for (i = 0; i < 2; i++) {
+        pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                         pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+      }
+    }
+
+    pCurLayer->pCbp[iMbXy] = 0;
+  } else {
+    if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+      iBaseModeFlag = uiCode;
+    } else {
+      iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
+    }
+    if (!iBaseModeFlag) {
+      iRet = WelsActualDecodeMbCavlcBSlice (pCtx);
+    } else {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+               iBaseModeFlag);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+    }
+    if (iRet) { //occur error when parsing, MUST STOP decoding
+      return iRet;
+    }
+  }
+  // check whether there is left bits to read next time in case multiple slices
+  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+  // sub 1, for stop bit
+  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+    uiEosFlag = 1;
+  }
+  if (iUsedBits > (pBs->iBits -
+                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
+             (int64_t)iUsedBits, pBs->iBits);
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx) {
+  SVlcTable* pVlcTable = &pCtx->sVlcTable;
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PBitStringAux pBs = pCurLayer->pBitStringAux;
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+
+  SWelsNeighAvail sNeighAvail;
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  const int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int32_t i;
+  int32_t iRet = ERR_NONE;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+  int32_t iMbResProperty;
+
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
+  uiMbType = uiCode;
+  if (uiMbType < 23) { //inter MB type
+    int16_t iMotionVector[LIST_A][30][MV_A];
+    int8_t  iRefIndex[LIST_A][30];
+    pCurLayer->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
+    WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
+
+    if ((iRet = ParseInterBInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
+      return iRet;//abnormal
+    }
+
+    if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
+      pCurLayer->pResidualPredFlag[iMbXy] = uiCode;
+    } else {
+      pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+    }
+
+    if (pCurLayer->pResidualPredFlag[iMbXy] == 0) {
+      pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    } else {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+    }
+  } else { //intra MB type
+    uiMbType -= 23;
+    if (uiMbType > 25)
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+    if (25 == uiMbType) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
+      int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
+      int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+
+      int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
+      int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
+
+      uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
+      uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
+      uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+
+      uint8_t* pTmpBsBuf;
+
+      int32_t i;
+      int32_t iCopySizeY = (sizeof (uint8_t) << 4);
+      int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
+
+      int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
+
+      pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+
+      //step 1: locating bit-stream pointer [must align into integer byte]
+      pBs->pCurBuf -= iIndex;
+
+      //step 2: copy pixel from bit-stream into fdec [reconstruction]
+      pTmpBsBuf = pBs->pCurBuf;
+      if (!pCtx->pParam->bParseOnly) {
+        for (i = 0; i < 16; i++) { //luma
+          memcpy (pDecY, pTmpBsBuf, iCopySizeY);
+          pDecY += iDecStrideL;
+          pTmpBsBuf += 16;
+        }
+
+        for (i = 0; i < 8; i++) { //cb
+          memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
+          pDecU += iDecStrideC;
+          pTmpBsBuf += 8;
+        }
+        for (i = 0; i < 8; i++) { //cr
+          memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
+          pDecV += iDecStrideC;
+          pTmpBsBuf += 8;
+        }
+      }
+
+      pBs->pCurBuf += 384;
+
+      //step 3: update QP and pNonZeroCount
+      pCurLayer->pLumaQp[iMbXy] = 0;
+      pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
+      //Rec. 9.2.1 for PCM, nzc=16
+      ST32A4 (&pNzc[0], 0x10101010);
+      ST32A4 (&pNzc[4], 0x10101010);
+      ST32A4 (&pNzc[8], 0x10101010);
+      ST32A4 (&pNzc[12], 0x10101010);
+      ST32A4 (&pNzc[16], 0x10101010);
+      ST32A4 (&pNzc[20], 0x10101010);
+      WELS_READ_VERIFY (InitReadBits (pBs, 0));
+      return ERR_NONE;
+    } else {
+      if (0 == uiMbType) {
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+        if (pCtx->pPps->bTransform8x8ModeFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+          pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+          if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+            uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+          }
+        }
+        if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+        } else {
+          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+        }
+      } else { //I_PCM exclude, we can ignore it
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+        uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+        uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+        WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+        if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) != ERR_NONE) {
+          return iRet;
+        }
+      }
+    }
+  }
+
+  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
+    uiCbp = uiCode;
+    {
+      if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+      if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+      if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) {
+
+        uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
+      } else //inter
+        uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
+    }
+
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
+    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+
+    // Need modification when B picutre add in
+    bool bNeedParseTransformSize8x8Flag =
+      (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
+        || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+       && (uiCbpL > 0)
+       && (pCtx->pPps->bTransform8x8ModeFlag));
+
+    if (bNeedParseTransformSize8x8Flag) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+    }
+  }
+
+  ST32A4 (&pNzc[0], 0);
+  ST32A4 (&pNzc[4], 0);
+  ST32A4 (&pNzc[8], 0);
+  ST32A4 (&pNzc[12], 0);
+  ST32A4 (&pNzc[16], 0);
+  ST32A4 (&pNzc[20], 0);
+  if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    int32_t iQpDelta, iId8x8, iId4x4;
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
+    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
+    iQpDelta = iCode;
+
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
+                                       51)];
+    }
+
+    BsStartCavlc (pBs);
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
+                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+        return iRet;//abnormal
+      }
+      //step2: Luma AC
+      if (uiCbpL) {
+        for (i = 0; i < 16; i++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
+                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
+                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else { // Normal T4x4
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                  g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;//abnormal
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpC || 2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        else
+          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
+                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+          return iRet;//abnormal
+        }
+      }
+    } else {
+    }
+    //step2: AC
+    if (2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        else
+          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+
+        int32_t iIndex = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+          iIndex++;
+        }
+      }
+      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
+      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
+      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
+      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
+    }
+    BsEndCavlc (pBs);
+  }
+
   return ERR_NONE;
 }
 
--- a/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
+++ b/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
@@ -434,6 +434,13 @@
   int32_t iLeftTopXy  = 0;
   int32_t iRightTopXy = 0;
 
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  int32_t listCount = 1;
+  if (pSliceHeader->eSliceType == B_SLICE) {
+    listCount = 2;
+  }
+
   //stuff non_zero_coeff_count from pNeighAvail(left and top)
   WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
 
@@ -450,86 +457,88 @@
     iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
   }
 
-  //stuff mv_cache and iRefIdxArray from left and top (inter)
-  if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
-    ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3]));
-    ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7]));
-    ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11]));
-    ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15]));
-    iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3];
-    iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7];
-    iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11];
-    iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15];
-  } else {
-    ST32 (iMvArray[0][ 6], 0);
-    ST32 (iMvArray[0][12], 0);
-    ST32 (iMvArray[0][18], 0);
-    ST32 (iMvArray[0][24], 0);
+  for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) {
+    //stuff mv_cache and iRefIdxArray from left and top (inter)
+    if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
+      ST32 (iMvArray[listIdx][6], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3]));
+      ST32 (iMvArray[listIdx][12], LD32 (pCurLayer->pMv[listIdx][iLeftXy][7]));
+      ST32 (iMvArray[listIdx][18], LD32 (pCurLayer->pMv[listIdx][iLeftXy][11]));
+      ST32 (iMvArray[listIdx][24], LD32 (pCurLayer->pMv[listIdx][iLeftXy][15]));
+      iRefIdxArray[listIdx][6] = pCurLayer->pRefIndex[listIdx][iLeftXy][3];
+      iRefIdxArray[listIdx][12] = pCurLayer->pRefIndex[listIdx][iLeftXy][7];
+      iRefIdxArray[listIdx][18] = pCurLayer->pRefIndex[listIdx][iLeftXy][11];
+      iRefIdxArray[listIdx][24] = pCurLayer->pRefIndex[listIdx][iLeftXy][15];
+    } else {
+      ST32 (iMvArray[listIdx][6], 0);
+      ST32 (iMvArray[listIdx][12], 0);
+      ST32 (iMvArray[listIdx][18], 0);
+      ST32 (iMvArray[listIdx][24], 0);
 
-    if (0 == pNeighAvail->iLeftAvail) { //not available
-      iRefIdxArray[0][ 6] =
-        iRefIdxArray[0][12] =
-          iRefIdxArray[0][18] =
-            iRefIdxArray[0][24] = REF_NOT_AVAIL;
-    } else { //available but is intra mb type
-      iRefIdxArray[0][ 6] =
-        iRefIdxArray[0][12] =
-          iRefIdxArray[0][18] =
-            iRefIdxArray[0][24] = REF_NOT_IN_LIST;
+      if (0 == pNeighAvail->iLeftAvail) { //not available
+        iRefIdxArray[listIdx][6] =
+          iRefIdxArray[listIdx][12] =
+            iRefIdxArray[listIdx][18] =
+              iRefIdxArray[listIdx][24] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRefIdxArray[listIdx][6] =
+          iRefIdxArray[listIdx][12] =
+            iRefIdxArray[listIdx][18] =
+              iRefIdxArray[listIdx][24] = REF_NOT_IN_LIST;
+      }
     }
-  }
-  if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
-    ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15]));
-    iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15];
-  } else {
-    ST32 (iMvArray[0][0], 0);
-    if (0 == pNeighAvail->iLeftTopAvail) { //not available
-      iRefIdxArray[0][0] = REF_NOT_AVAIL;
-    } else { //available but is intra mb type
-      iRefIdxArray[0][0] = REF_NOT_IN_LIST;
+    if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
+      ST32 (iMvArray[listIdx][0], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15]));
+      iRefIdxArray[listIdx][0] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15];
+    } else {
+      ST32 (iMvArray[listIdx][0], 0);
+      if (0 == pNeighAvail->iLeftTopAvail) { //not available
+        iRefIdxArray[listIdx][0] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRefIdxArray[listIdx][0] = REF_NOT_IN_LIST;
+      }
     }
-  }
-  if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
-    ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12]));
-    ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14]));
-    ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12]));
-  } else {
-    ST64 (iMvArray[0][1], 0);
-    ST64 (iMvArray[0][3], 0);
-    if (0 == pNeighAvail->iTopAvail) { //not available
-      iRefIdxArray[0][1] =
-        iRefIdxArray[0][2] =
-          iRefIdxArray[0][3] =
-            iRefIdxArray[0][4] = REF_NOT_AVAIL;
-    } else { //available but is intra mb type
-      iRefIdxArray[0][1] =
-        iRefIdxArray[0][2] =
-          iRefIdxArray[0][3] =
-            iRefIdxArray[0][4] = REF_NOT_IN_LIST;
+    if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
+      ST64 (iMvArray[listIdx][1], LD64 (pCurLayer->pMv[listIdx][iTopXy][12]));
+      ST64 (iMvArray[listIdx][3], LD64 (pCurLayer->pMv[listIdx][iTopXy][14]));
+      ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurLayer->pRefIndex[listIdx][iTopXy][12]));
+    } else {
+      ST64 (iMvArray[listIdx][1], 0);
+      ST64 (iMvArray[listIdx][3], 0);
+      if (0 == pNeighAvail->iTopAvail) { //not available
+        iRefIdxArray[listIdx][1] =
+          iRefIdxArray[listIdx][2] =
+            iRefIdxArray[listIdx][3] =
+              iRefIdxArray[listIdx][4] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRefIdxArray[listIdx][1] =
+          iRefIdxArray[listIdx][2] =
+            iRefIdxArray[listIdx][3] =
+              iRefIdxArray[listIdx][4] = REF_NOT_IN_LIST;
+      }
     }
-  }
-  if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
-    ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12]));
-    iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12];
-  } else {
-    ST32 (iMvArray[0][5], 0);
-    if (0 == pNeighAvail->iRightTopAvail) { //not available
-      iRefIdxArray[0][5] = REF_NOT_AVAIL;
-    } else { //available but is intra mb type
-      iRefIdxArray[0][5] = REF_NOT_IN_LIST;
+    if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
+      ST32 (iMvArray[listIdx][5], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12]));
+      iRefIdxArray[listIdx][5] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12];
+    } else {
+      ST32 (iMvArray[listIdx][5], 0);
+      if (0 == pNeighAvail->iRightTopAvail) { //not available
+        iRefIdxArray[listIdx][5] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRefIdxArray[listIdx][5] = REF_NOT_IN_LIST;
+      }
     }
+    //right-top 4*4 block unavailable
+    ST32 (iMvArray[listIdx][9], 0);
+    ST32 (iMvArray[listIdx][21], 0);
+    ST32 (iMvArray[listIdx][11], 0);
+    ST32 (iMvArray[listIdx][17], 0);
+    ST32 (iMvArray[listIdx][23], 0);
+    iRefIdxArray[listIdx][9] =
+      iRefIdxArray[listIdx][21] =
+        iRefIdxArray[listIdx][11] =
+          iRefIdxArray[listIdx][17] =
+            iRefIdxArray[listIdx][23] = REF_NOT_AVAIL;
   }
-  //right-top 4*4 block unavailable
-  ST32 (iMvArray[0][ 9], 0);
-  ST32 (iMvArray[0][21], 0);
-  ST32 (iMvArray[0][11], 0);
-  ST32 (iMvArray[0][17], 0);
-  ST32 (iMvArray[0][23], 0);
-  iRefIdxArray[0][ 9] =
-    iRefIdxArray[0][21] =
-      iRefIdxArray[0][11] =
-        iRefIdxArray[0][17] =
-          iRefIdxArray[0][23] = REF_NOT_AVAIL;
 }
 
 int32_t PredIntra4x4Mode (int8_t* pIntraPredMode, int32_t iIdx4) {
@@ -1313,5 +1322,512 @@
 
   return ERR_NONE;
 }
+int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A],
+                         int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs) {
+  PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PPicture* ppRefPic[2];
+  ppRefPic[LIST_0] = pCtx->sRefPic.pRefList[LIST_0];
+  ppRefPic[LIST_1] = pCtx->sRefPic.pRefList[LIST_1];
+  int8_t ref_idx_list[LIST_A][4];
+  int8_t iRef[2] = { 0, 0 };
+  int32_t iRefCount[2];
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  uint8_t iMotionPredFlag[LIST_A][4];
+  int16_t iMv[2];
+  uint32_t uiCode;
+  int32_t iCode;
+  int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv;
+  int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv;
+  memset (ref_idx_list, -1, LIST_A * 4);
+  memset (iMotionPredFlag, (pSlice->sSliceHeaderExt.bDefaultMotionPredFlag ? 1 : 0), LIST_A * 4);
+  iRefCount[0] = pSliceHeader->uiRefCount[0];
+  iRefCount[1] = pSliceHeader->uiRefCount[1];
 
+  MbType mbType = pCurDqLayer->pMbType[iMbXy];
+  if (IS_DIRECT (mbType)) {
+
+    int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+    if (pSliceHeader->iDirectSpatialMvPredFlag) {
+      //predict direct spatial mv
+      SubMbType subMbType;
+      int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, subMbType);
+      if (ret != ERR_NONE) {
+        return ret;
+      }
+    } else {
+      //temporal direct 16x16 mode
+      ComputeColocated (pCtx);
+      int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef);
+      if (ret != ERR_NONE) {
+        return ret;
+      }
+    }
+  } else if (IS_INTER_16x16 (mbType)) {
+    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
+      for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+        if (IS_DIR (mbType, 0, listIdx)) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ]
+          iMotionPredFlag[listIdx][0] = uiCode > 0;
+        }
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (IS_DIR (mbType, 0, listIdx)) {
+        if (iMotionPredFlag[listIdx][0] == 0) {
+          WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ]
+          ref_idx_list[listIdx][0] = uiCode;
+          // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive
+          // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1.
+          if ((ref_idx_list[listIdx][0] < 0) || (ref_idx_list[listIdx][0] >= iRefCount[listIdx])
+              || (ppRefPic[listIdx][ref_idx_list[listIdx][0]] == NULL)) { //error ref_idx
+            pCtx->bMbRefConcealed = true;
+            if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+              ref_idx_list[listIdx][0] = 0;
+              pCtx->iErrorCode |= dsBitstreamError;
+            } else {
+              return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+            }
+          }
+          pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][ref_idx_list[listIdx][0]]
+                                  && ppRefPic[listIdx][ref_idx_list[listIdx][0]]->bIsComplete);
+        } else {
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
+          return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+        }
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (IS_DIR (mbType, 0, listIdx)) {
+        PredMv (iMvArray, iRefIdxArray, listIdx, 0, 4, ref_idx_list[listIdx][0], iMv);
+        WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ]
+        iMv[0] += iCode;
+        WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ]
+        iMv[1] += iCode;
+        WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
+      } else {
+        * (uint32_t*)iMv = 0;
+      }
+      UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref_idx_list[listIdx][0], iMv);
+    }
+  } else if (IS_INTER_16x8 (mbType)) {
+    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
+      for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+        for (int32_t i = 0; i < 2; ++i) {
+          if (IS_DIR (mbType, i, listIdx)) {
+            WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ]
+            iMotionPredFlag[listIdx][i] = uiCode > 0;
+          }
+        }
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 2; ++i) {
+        if (IS_DIR (mbType, i, listIdx)) {
+          if (iMotionPredFlag[listIdx][i] == 0) {
+            WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ]
+            int32_t iRefIdx = uiCode;
+            // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive
+            // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1.
+            if ((iRefIdx < 0) || (iRefIdx >= iRefCount[listIdx]) || (ppRefPic[listIdx][iRefIdx] == NULL)) { //error ref_idx
+              pCtx->bMbRefConcealed = true;
+              if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+                iRefIdx = 0;
+                pCtx->iErrorCode |= dsBitstreamError;
+              } else {
+                return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+              }
+            }
+            ref_idx_list[listIdx][i] = iRefIdx;
+            pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx]
+                                    && ppRefPic[listIdx][iRefIdx]->bIsComplete);
+          } else {
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
+            return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+          }
+        }
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 2; i++) {
+        int iPartIdx = i << 3;
+        int32_t iRefIdx = ref_idx_list[listIdx][i];
+        if (IS_DIR (mbType, i, listIdx)) {
+          PredInter16x8Mv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv);
+
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ]
+          iMv[0] += iCode;
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ]
+          iMv[1] += iCode;
+
+          WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
+        } else {
+          * (uint32_t*)iMv = 0;
+        }
+        UpdateP16x8MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv);
+      }
+    }
+  } else if (IS_INTER_8x16 (mbType)) {
+    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
+      for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+        for (int32_t i = 0; i < 2; ++i) {
+          if (IS_DIR (mbType, i, listIdx)) {
+            WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ]
+            iMotionPredFlag[listIdx][i] = uiCode > 0;
+          }
+        }
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 2; ++i) {
+        if (IS_DIR (mbType, i, listIdx)) {
+          if (iMotionPredFlag[listIdx][i] == 0) {
+            WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ]
+            int32_t iRefIdx = uiCode;
+            // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive
+            // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1.
+            if ((iRefIdx < 0) || (iRefIdx >= iRefCount[listIdx]) || (ppRefPic[listIdx][iRefIdx] == NULL)) { //error ref_idx
+              pCtx->bMbRefConcealed = true;
+              if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+                iRefIdx = 0;
+                pCtx->iErrorCode |= dsBitstreamError;
+              } else {
+                return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+              }
+            }
+            ref_idx_list[listIdx][i] = iRefIdx;
+            pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx]
+                                    && ppRefPic[listIdx][iRefIdx]->bIsComplete);
+          } else {
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
+            return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+          }
+        }
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 2; i++) {
+        int iPartIdx = i << 2;
+        int32_t iRefIdx = ref_idx_list[listIdx][i];
+        if (IS_DIR (mbType, i, listIdx)) {
+          PredInter8x16Mv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv);
+
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ]
+          iMv[0] += iCode;
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ]
+          iMv[1] += iCode;
+          WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
+        } else {
+          * (uint32_t*)iMv = 0;
+        }
+        UpdateP8x16MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv);
+      }
+    }
+  } else if (IS_Inter_8x8 (mbType)) {
+    int8_t pSubPartCount[4], pPartW[4];
+    uint32_t uiSubMbType;
+    //sub_mb_type, partition
+    int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+    bool has_direct_called = false;
+    SubMbType directSubMbType = 0;
+
+    //uiSubMbType, partition
+    for (int32_t i = 0; i < 4; i++) {
+      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //sub_mb_type[ mbPartIdx ]
+      uiSubMbType = uiCode;
+      if (uiSubMbType > 13) { //invalid uiSubMbType
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE);
+      }
+      pSubPartCount[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartCount;
+      pPartW[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartWidth;
+
+      // Need modification when B picture add in, reference to 7.3.5
+      if (pSubPartCount[i] > 1)
+        pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = false;
+
+      if (IS_DIRECT (g_ksInterBSubMbTypeInfo[uiSubMbType].iType)) {
+        if (!has_direct_called) {
+          if (pSliceHeader->iDirectSpatialMvPredFlag) {
+            int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, directSubMbType);
+            if (ret != ERR_NONE) {
+              return ret;
+            }
+
+          } else {
+            //temporal direct mode
+            ComputeColocated (pCtx);
+            int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef);
+            if (ret != ERR_NONE) {
+              return ret;
+            }
+          }
+          has_direct_called = true;
+        }
+        pCurDqLayer->pSubMbType[iMbXy][i] = directSubMbType;
+        if (IS_SUB_4x4 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+          pSubPartCount[i] = 4;
+          pPartW[i] = 1;
+        }
+      } else {
+        pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType;
+      }
+    }
+    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
+      for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+        for (int32_t i = 0; i < 4; i++) {
+          bool is_dir = IS_DIR (pCurDqLayer->pSubMbType[iMbXy][i], 0, listIdx) > 0;
+          if (is_dir) {
+            WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0[ mbPartIdx ]
+            iMotionPredFlag[listIdx][i] = uiCode > 0;
+          }
+        }
+      }
+    }
+    for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
+      int16_t iIdx8 = i << 2;
+      if (IS_DIRECT (pCurDqLayer->pSubMbType[iMbXy][i])) {
+        int8_t iPartCount = pSubPartCount[i];
+        int16_t iPartIdx, iBlockW = pPartW[i];
+        uint8_t iScan4Idx, iCacheIdx, iColocIdx;
+        iCacheIdx = g_kuiCache30ScanIdx[iIdx8];
+
+        if (!pSliceHeader->iDirectSpatialMvPredFlag) {
+          iRef[LIST_1] = 0;
+          if (pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
+            iRef[LIST_0] = 0;
+          } else {
+            if (pCurDqLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) {
+              iRef[LIST_0] = pCurDqLayer->iColocRefIndex[LIST_0][iIdx8];
+            } else {
+              iRef[LIST_0] = pCurDqLayer->iColocRefIndex[LIST_1][iIdx8];
+            }
+          }
+        }
+        for (int32_t j = 0; j < iPartCount; j++) {
+          iPartIdx = iIdx8 + j * iBlockW;
+          iColocIdx = g_kuiScan4[iPartIdx];
+          iScan4Idx = g_kuiScan4[iPartIdx];
+          iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+
+          if (pSliceHeader->iDirectSpatialMvPredFlag) {
+            int16_t pMV[4] = { 0 };
+            if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+              * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
+              ST32 ((pMV + 2), LD32 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
+              ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+              ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+              ST64 (iMvArray[LIST_0][iCacheIdx], LD64 (pMV));
+              ST64 (iMvArray[LIST_0][iCacheIdx + 6], LD64 (pMV));
+              * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
+              ST32 ((pMV + 2), LD32 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
+              ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+              ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+              ST64 (iMvArray[LIST_1][iCacheIdx], LD64 (pMV));
+              ST64 (iMvArray[LIST_1][iCacheIdx + 6], LD64 (pMV));
+            } else { //SUB_4x4
+              * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
+              ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
+              ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+              ST32 (iMvArray[LIST_0][iCacheIdx], LD32 (pMV));
+              * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
+              ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
+              ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+              ST32 (iMvArray[LIST_1][iCacheIdx], LD32 (pMV));
+            }
+
+            if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) {
+              bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef;
+              uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
+                                       (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
+                                           && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
+              const int16_t (*mvColoc)[2] = pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 ? pCurDqLayer->iColocMv[LIST_0] :
+                                            pCurDqLayer->iColocMv[LIST_1];
+              const int16_t* mv = mvColoc[iColocIdx];
+              if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+                if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
+                  if (iRef[LIST_0] == 0) {
+                    ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
+                    ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
+                    ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+                    ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+                    ST64 (iMvArray[LIST_0][iCacheIdx], 0);
+                    ST64 (iMvArray[LIST_0][iCacheIdx + 6], 0);
+                  }
+
+                  if (iRef[LIST_1] == 0) {
+                    ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
+                    ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
+                    ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+                    ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+                    ST64 (iMvArray[LIST_1][iCacheIdx], 0);
+                    ST64 (iMvArray[LIST_1][iCacheIdx + 6], 0);
+                  }
+                }
+              } else {
+                if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
+                  if (iRef[LIST_0] == 0) {
+                    ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
+                    ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+                    ST32 (iMvArray[LIST_0][iCacheIdx], 0);
+                  }
+                  if (iRef[LIST_1] == 0) {
+                    ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
+                    ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+                    ST32 (iMvArray[LIST_1][iCacheIdx], 0);
+                  }
+                }
+              }
+            }
+          } else {
+            int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
+            int16_t* mv = mvColoc[iColocIdx];
+            int16_t pMV[4] = { 0 };
+            int16_t iMvp[LIST_A][2];
+            if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+              iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
+              iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
+              ST32 (pMV, LD32 (iMvp[LIST_0]));
+              ST32 ((pMV + 2), LD32 (iMvp[LIST_0]));
+              ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
+              ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+              ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+              iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
+              iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
+              ST32 (pMV, LD32 (iMvp[LIST_1]));
+              ST32 ((pMV + 2), LD32 (iMvp[LIST_1]));
+              ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
+              ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+              ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+            } else { //SUB_4x4
+              iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
+              iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
+              ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (iMvp[LIST_0]));
+              ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+              iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
+              iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
+              ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (iMvp[LIST_1]));
+              ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+            }
+          }
+        }
+      }
+    }
+    //ref no-direct
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 4; i++) {
+        int16_t iIdx8 = i << 2;
+        uint8_t uiScan4Idx = g_kuiScan4[iIdx8];
+        int32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+        int8_t iref = REF_NOT_IN_LIST;
+        if (IS_DIRECT (subMbType)) {
+          if (pSliceHeader->iDirectSpatialMvPredFlag) {
+            iref = iRef[listIdx];
+          } else {
+            iref = 0;
+            if (listIdx == LIST_0) {
+              if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
+                if (pCurDqLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) {
+                  iref = pCurDqLayer->iColocRefIndex[LIST_0][iIdx8];
+                } else {
+                  iref = pCurDqLayer->iColocRefIndex[LIST_1][iIdx8];
+                }
+              }
+            }
+          }
+        } else {
+          if (IS_DIR (subMbType, 0, listIdx)) {
+            if (iMotionPredFlag[listIdx][i] == 0) {
+              WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //ref_idx_l0[ mbPartIdx ]
+              iref = uiCode;
+              if ((iref < 0) || (iref >= iRefCount[0]) || (ppRefPic[listIdx][iref] == NULL)) { //error ref_idx
+                pCtx->bMbRefConcealed = true;
+                if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+                  iref = 0;
+                  pCtx->iErrorCode |= dsBitstreamError;
+                } else {
+                  return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+                }
+              }
+              pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iref]
+                                      && ppRefPic[listIdx][iref]->bIsComplete);
+            } else {
+              WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
+              return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+            }
+          }
+        }
+        pCurDqLayer->pRefIndex[listIdx][iMbXy][uiScan4Idx] = pCurDqLayer->pRefIndex[listIdx][iMbXy][uiScan4Idx + 1] =
+              pCurDqLayer->pRefIndex[listIdx][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pRefIndex[listIdx][iMbXy][uiScan4Idx + 5] = iref;
+        ref_idx_list[listIdx][i] = iref;
+      }
+    }
+    //mv
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 4; i++) {
+        int8_t iPartCount = pSubPartCount[i];
+        int16_t iPartIdx, iBlockW = pPartW[i];
+        uint8_t uiScan4Idx, uiCacheIdx;
+
+        uiCacheIdx = g_kuiCache30ScanIdx[i << 2];
+
+        int8_t iref = ref_idx_list[listIdx][i];
+        iRefIdxArray[listIdx][uiCacheIdx] = iRefIdxArray[listIdx][uiCacheIdx + 1] =
+                                              iRefIdxArray[listIdx][uiCacheIdx + 6] = iRefIdxArray[listIdx][uiCacheIdx + 7] = iref;
+
+        uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+        if (IS_DIRECT (subMbType)) {
+          continue;
+        }
+        bool is_dir = IS_DIR (subMbType, 0, listIdx) > 0;
+        for (int32_t j = 0; j < iPartCount; j++) {
+          iPartIdx = (i << 2) + j * iBlockW;
+          uiScan4Idx = g_kuiScan4[iPartIdx];
+          uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+          if (is_dir) {
+            PredMv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iBlockW, iref, iMv);
+
+            WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ subMbPartIdx ][ compIdx ]
+            iMv[0] += iCode;
+            WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ subMbPartIdx ][ compIdx ]
+            iMv[1] += iCode;
+            WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
+          } else {
+            * (uint32_t*)iMv = 0;
+          }
+          if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv));
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv));
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 5], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx + 7], LD32 (iMv));
+          } else if (IS_SUB_8x4 (subMbType)) {
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv));
+          } else if (IS_SUB_4x8 (subMbType)) {
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv));
+          } else { //SUB_MB_TYPE_4x4 == uiSubMbType
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
+            ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
+          }
+        }
+      }
+    }
+  }
+  return ERR_NONE;
+}
 } // namespace WelsDec
--- a/codec/decoder/plus/src/welsDecoderExt.cpp
+++ b/codec/decoder/plus/src/welsDecoderExt.cpp
@@ -471,7 +471,7 @@
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER == eOptID) {
-    if (m_pDecContext->pSps && m_pDecContext->pSps->uiProfileIdc != 66 && m_pDecContext->pPps->bEntropyCodingModeFlag) {
+    if (m_pDecContext->pSps && m_pDecContext->pSps->uiProfileIdc != 66) {
       * ((int*)pOption) = m_iNumOfPicts > 0 ? m_iNumOfPicts : 0;
     } else {
       * ((int*)pOption) = 0;
@@ -743,8 +743,7 @@
 }
 
 DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo) {
-  if (pDstInfo->iBufferStatus == 1 && m_pDecContext->pSps->uiProfileIdc != 66
-      && m_pDecContext->pPps->bEntropyCodingModeFlag) {
+  if (pDstInfo->iBufferStatus == 1 && m_pDecContext->pSps->uiProfileIdc != 66) {
     if (m_pDecContext->pSliceHeader->iPicOrderCntLsb == 0) {
       if (m_iNumOfPicts > 0) {
         m_iLastGOPRemainPicts = m_iNumOfPicts;
binary files /dev/null b/res/Cisco_Adobe_PDF_sample_a_1024x768_CAVLC_Bframe_9.264 differ
--- a/test/api/decoder_test.cpp
+++ b/test/api/decoder_test.cpp
@@ -132,6 +132,8 @@
   {"res/test_vd_1d.264", "5827d2338b79ff82cd091c707823e466197281d3"},
   {"res/test_vd_rc.264", "eea02e97bfec89d0418593a8abaaf55d02eaa1ca"},
   {"res/Cisco_Men_whisper_640x320_CABAC_Bframe_9.264", "88b8864a69cee7656202bc54d2ffa8b7b6f1f6c5"},
+  {"res/Cisco_Men_whisper_640x320_CAVLC_Bframe_9.264", "270a500d2f91c9e2c8ffabc03f62e0dc0b3a24ed"},
+  {"res/Cisco_Adobe_PDF_sample_a_1024x768_CAVLC_Bframe_9.264", "d3b2b986178ce3eafa806cd984543d0da830f408"},
 };
 
 INSTANTIATE_TEST_CASE_P (DecodeFile, DecoderOutputTest,
--- a/test/api/encode_options_test.cpp
+++ b/test/api/encode_options_test.cpp
@@ -1033,6 +1033,13 @@
       EXPECT_TRUE (iResult == cmResultSuccess) << "iResult=" << iResult << "LayerIdx=" << iIdx;
       iResult = decoder[iIdx]->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_);
       EXPECT_TRUE (iResult == cmResultSuccess) << "iResult=" << iResult << "LayerIdx=" << iIdx;
+      if (dstBufInfo_.iBufferStatus == 0) {
+        int32_t num_of_frames_in_buffer = 0;
+        decoder[iIdx]->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
+        for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
+          decoder[iIdx]->FlushFrame (pData, &dstBufInfo_);
+        }
+      }
       EXPECT_EQ (dstBufInfo_.iBufferStatus, 1) << "LayerIdx=" << iIdx;
     }
   }
@@ -2299,7 +2306,7 @@
 
     while (fileStream.read (buf_.data(), frameSize) == frameSize) {
 
-      if ( (iStepIdx < 3) && (iFrameNum == ((iTotalFrame / 3) * (iStepIdx + 1)))) {
+      if ((iStepIdx < 3) && (iFrameNum == ((iTotalFrame / 3) * (iStepIdx + 1)))) {
         sParam.iTemporalLayerNum = originalTemporalLayerNum * iSteps[iStepIdx];
         sParam.iTargetBitrate = sParam.sSpatialLayers[0].iSpatialBitrate = originalBR * iSteps[iStepIdx];
         sParam.fMaxFrameRate = sParam.sSpatialLayers[0].fFrameRate = originalFR * pow (2.0f, iSteps[iStepIdx]);
@@ -2314,9 +2321,11 @@
 
       if (bSetOption) {
         if ((iStepIdx == 1) || (iStepIdx == 3)) {
-          EXPECT_TRUE (info.eFrameType == videoFrameTypeIDR) << "iStepIdx=" << iStepIdx << "iFrameNum=" << iFrameNum << "iTotalFrame=" << iTotalFrame;
+          EXPECT_TRUE (info.eFrameType == videoFrameTypeIDR) << "iStepIdx=" << iStepIdx << "iFrameNum=" << iFrameNum <<
+              "iTotalFrame=" << iTotalFrame;
         } else {
-          EXPECT_TRUE (info.eFrameType != videoFrameTypeIDR) << "iStepIdx=" << iStepIdx << "iFrameNum=" << iFrameNum << "iTotalFrame=" << iTotalFrame;
+          EXPECT_TRUE (info.eFrameType != videoFrameTypeIDR) << "iStepIdx=" << iStepIdx << "iFrameNum=" << iFrameNum <<
+              "iTotalFrame=" << iTotalFrame;
         }
 
         bSetOption = false;
--- a/test/decoder/DecUT_ParseSyntax.cpp
+++ b/test/decoder/DecUT_ParseSyntax.cpp
@@ -412,7 +412,7 @@
   int32_t iRet = ERR_NONE;
   Init();
   ASSERT_EQ (iRet, ERR_NONE);
-  ASSERT_TRUE (DecodeBs ("res/Cisco_Men_whisper_640x320_CAVLC_Bframe_9.264", ErrorDec));
+  ASSERT_TRUE (DecodeBs ("res/Cisco_Men_whisper_640x320_CAVLC_Bframe_9.264", CorrectDec));
   Uninit();
 }