ref: 4bc0b8ad188cf373e3224e9471963c116816fea6
parent: c66c305a634c9f63e12d4b3846645b3d7dc81b00
parent: ec09d67a5fd82c7c2e391a7490f6271dc5570c96
author: huili2 <[email protected]>
date: Thu Oct 31 10:06:18 EDT 2019
Merge pull request #3190 from xiaotianshi2/thread_commit_3_updated commit-3 (updated) of multi-threaded decoding support.
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -277,6 +277,7 @@
PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
int32_t iPrevFrameNum;// frame number of previous frame well decoded for non-truncated mode yet
bool bLastHasMmco5;
+ uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
} SWelsLastDecPicInfo, *PWelsLastDecPicInfo;
typedef struct tagPictInfo {
@@ -538,6 +539,8 @@
PPicture pDec;
SWelsDecEvent sImageReady;
SWelsDecEvent sSliceDecodeStart;
+ SWelsDecEvent sSliceDecodeFinsh;
+ int32_t iPicBuffIdx; //picBuff Index
} SWelsDecoderThreadCTX, *PWelsDecoderThreadCTX;
static inline void ResetActiveSPSForEachLayer (PWelsDecoderContext pCtx) {
--- a/codec/decoder/core/inc/pic_queue.h
+++ b/codec/decoder/core/inc/pic_queue.h
@@ -54,6 +54,8 @@
PPicture PrefetchPic (PPicBuff pPicBuff); // To get current node applicable
PPicture PrefetchPicForThread (PPicBuff pPicBuff); // To get current node applicable in the case of threaded mode
+PPicture PrefetchLastPicForThread (PPicBuff pPicBuff,
+ const int32_t& iLast); // To get last node applicable in the case of threaded mode
} // namespace WelsDec
--- a/codec/decoder/core/inc/picture.h
+++ b/codec/decoder/core/inc/picture.h
@@ -89,11 +89,13 @@
uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
int32_t iPicBuffIdx;
EWelsSliceType eSliceType;
+ bool bIsUngroupedMultiSlice; //multi-slice picture with each each slice group contains one slice.
bool bNewSeqBegin;
int32_t iMbEcedNum;
int32_t iMbEcedPropNum;
int32_t iMbNum;
+ bool* pMbCorrectlyDecodedFlag;
uint32_t* pMbType; // mb type used for direct mode
int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; // used for direct mode
int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM]; //used for direct mode
--- a/codec/decoder/core/src/decoder.cpp
+++ b/codec/decoder/core/src/decoder.cpp
@@ -393,6 +393,7 @@
sLastDecPicInfo.pPreviousDecodedPictureInDpb = NULL;
sLastDecPicInfo.iPrevFrameNum = -1;
sLastDecPicInfo.bLastHasMmco5 = false;
+ sLastDecPicInfo.uiDecodingTimeStamp = 0;
}
/*!
@@ -437,6 +438,9 @@
iNumRefFrames = MAX_REF_PIC_COUNT + 2;
} else {
iNumRefFrames = pCtx->pSps->iNumRefFrames + 2;
+ if (pCtx->pThreadCtx != NULL) {
+ iNumRefFrames = MAX_REF_PIC_COUNT + 1;
+ }
}
#ifdef LONG_TERM_REF
@@ -478,7 +482,9 @@
&& kiPicHeight == pCtx->iImgHeightInPixel) && (!bNeedChangePicQueue)) // have same scaled buffer
// sync update pRefList
- WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free
+ if (pCtx->pThreadCtx == NULL) {
+ WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free
+ }
if (pCtx->bHaveGotMemory && (kiPicWidth == pCtx->iImgWidthInPixel && kiPicHeight == pCtx->iImgHeightInPixel)
&& pCtx->pPicBuff != NULL && pCtx->pPicBuff->iCapacity != iPicQueueSize) {
@@ -554,6 +560,17 @@
if (NULL != pPicBuff && NULL != *pPicBuff) {
DestroyPicBuff (pCtx, pPicBuff, pMa);
}
+ if (pCtx->pThreadCtx != NULL) {
+ //prevent from double destruction of PPicBuff
+ PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pThreadCtx);
+ int32_t threadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
+ int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
+ for (int32_t i = 0; i < threadCount; ++i) {
+ if (pThreadCtx[i - id].pCtx != NULL) {
+ pThreadCtx[i - id].pCtx->pPicBuff = NULL;
+ }
+ }
+ }
if (pCtx->pTempDec) {
FreePicture (pCtx->pTempDec, pCtx->pMemAlign);
@@ -796,7 +813,11 @@
}
CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) {
- ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
+ if (pCtx->pThreadCtx == NULL) {
+ ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
+ } else {
+ pCtx->pAccessUnitList->uiAvailUnitsNum = 1;
+ }
}
}
DecodeFinishUpdate (pCtx);
@@ -852,9 +873,15 @@
if (IS_PARAM_SETS_NALS (pCtx->sCurNalHead.eNalUnitType)) {
iRet = ParseNonVclNal (pCtx, pNalPayload, iDstIdx - iConsumedBytes, pSrcNal - 3, iSrcIdx + 3);
}
- CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
+ if (pCtx->pThreadCtx == NULL) {
+ CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
+ }
if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) {
- ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
+ if (pCtx->pThreadCtx == NULL) {
+ ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
+ } else {
+ pCtx->pAccessUnitList->uiAvailUnitsNum = 1;
+ }
}
}
DecodeFinishUpdate (pCtx);
--- a/codec/decoder/core/src/decoder_core.cpp
+++ b/codec/decoder/core/src/decoder_core.cpp
@@ -194,8 +194,9 @@
"DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ",
pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight);
bFrameCompleteFlag = false; //return later after output buffer is done
- if (pCtx->bInstantDecFlag) //no-delay decoding, wait for new slice
+ if (pCtx->bInstantDecFlag) { //no-delay decoding, wait for new slice
return ERR_INFO_MB_NUM_INADEQUATE;
+ }
} else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag
&& (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done
pCtx->pDec->bIsComplete = true;
@@ -220,9 +221,26 @@
ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
pDstInfo->iBufferStatus = 1;
-
- bool bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
- || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
+ if (pCtx->pThreadCtx != NULL && pPic->bIsComplete == false) {
+ pPic->bIsComplete = true;
+ }
+ if (pCtx->pThreadCtx != NULL) {
+ uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
+ for (uint32_t i = 0; i < uiMbHeight; ++i) {
+ SET_EVENT (&pCtx->pDec->pReadyEvent[i]);
+ }
+ }
+ bool bOutResChange = false;
+ if (pCtx->pThreadCtx == NULL || pCtx->pLastThreadCtx == NULL) {
+ bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
+ || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
+ } else {
+ if (pCtx->pLastThreadCtx != NULL) {
+ PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx);
+ bOutResChange = (pLastThreadCtx->pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
+ || (pLastThreadCtx->pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
+ }
+ }
pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth;
pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight;
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete
@@ -846,8 +864,9 @@
* Parse slice header of bitstream in avc for storing data structure
*/
int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) {
- PNalUnit const kpCurNal = pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
- 1];
+ PNalUnit const kpCurNal =
+ pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
+ 1];
PNalUnitHeaderExt pNalHeaderExt = NULL;
PSliceHeader pSliceHead = NULL;
@@ -1462,7 +1481,6 @@
int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) {
int32_t i = 0;
-
WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0))
pCtx->sMb.iMbWidth = (kiMaxWidth + 15) >> 4;
pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4;
@@ -1508,7 +1526,8 @@
sizeof (
bool),
"pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
- pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+ pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ bool),
"pCtx->sMb.pTransformSize8x8Flag[]");
pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int8_t) * 2,
@@ -1519,9 +1538,11 @@
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
"pCtx->sMb.pCbfDc[]");
- pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
+ pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ int8_t) * 24,
"pCtx->sMb.pNzc[]");
- pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
+ pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ int8_t) * 24,
"pCtx->sMb.pNzcRs[]");
pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
pCtx->sMb.iMbHeight *
@@ -1539,20 +1560,24 @@
"pCtx->sMb.pChromaPredMode[]");
pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
"pCtx->sMb.pCbp[]");
- pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+ pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+ pCtx->sMb.iMbHeight *
sizeof (
uint32_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]");
pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t),
"pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010
- pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+ pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ int8_t),
"pCtx->sMb.pResidualPredFlag[]");
- pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
- int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
+ pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+ sizeof (
+ int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
bool),
"pCtx->sMb.pMbCorrectlyDecodedFlag[]");
- pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+ pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ bool),
"pCtx->pMbRefConcealedFlag[]");
// check memory block valid due above allocated..
@@ -1599,6 +1624,8 @@
return ERR_NONE;
}
+
+
void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
int32_t i = 0;
CMemoryAlign* pMa = pCtx->pMemAlign;
@@ -2307,39 +2334,18 @@
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
- int32_t iErr;
- PAccessUnit pCurAu = pCtx->pAccessUnitList;
- pCtx->bAuReadyFlag = false;
- pCtx->pLastDecPicInfo->bLastHasMmco5 = false;
- bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx);
- pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin;
- iErr = WelsDecodeAccessUnitStart (pCtx);
- GetVclNalTemporalId (pCtx);
-
- if (ERR_NONE != iErr) {
- ForceResetCurrentAccessUnit (pCtx->pAccessUnitList);
- if (!pCtx->pParam->bParseOnly)
- pDstInfo->iBufferStatus = 0;
- pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
- pCtx->bNextNewSeqBegin = false; // reset it
- if (pCtx->bNewSeqBegin)
- ResetActiveSPSForEachLayer (pCtx);
- return iErr;
- }
-
- pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
- pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps;
-
- //try to allocate or relocate DPB memory only when new sequence is coming.
- if (pCtx->bNewSeqBegin) {
- WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
- iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
-
+ int32_t iErr = ERR_NONE;
+ if (pCtx->pThreadCtx == NULL) {
+ iErr = InitConstructAccessUnit (pCtx, pDstInfo);
if (ERR_NONE != iErr) {
- WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed, the error is %d", iErr);
return iErr;
}
}
+ if (pCtx->pCabacDecEngine == NULL) {
+ pCtx->pCabacDecEngine = (SWelsCabacDecEngine*)pCtx->pMemAlign->WelsMallocz (sizeof (SWelsCabacDecEngine),
+ "pCtx->pCabacDecEngine");
+ WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, (NULL == pCtx->pCabacDecEngine))
+ }
iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo);
@@ -2412,6 +2418,9 @@
int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
int32_t iRet = ERR_NONE;
+ if (pCtx->pThreadCtx != NULL && pCtx->bNewSeqBegin) {
+ WelsResetRefPic (pCtx);
+ }
if (pCtx->eSliceType == B_SLICE) {
iRet = WelsInitBSliceRefList (pCtx, iPoc);
CreateImplicitWeightTable (pCtx);
@@ -2466,13 +2475,26 @@
* Decode current access unit when current AU is completed.
*/
int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
- int32_t iRefCount[LIST_A];
- PNalUnit pNalCur = NULL;
+ PNalUnit pNalCur = pCtx->pNalCur = NULL;
PAccessUnit pCurAu = pCtx->pAccessUnitList;
int32_t iIdx = pCurAu->uiStartPos;
int32_t iEndIdx = pCurAu->uiEndPos;
+ //get current thread ctx
+ PWelsDecoderThreadCTX pThreadCtx = NULL;
+ if (pCtx->pThreadCtx != NULL) {
+ pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
+ }
+ //get last thread ctx
+ PWelsDecoderThreadCTX pLastThreadCtx = NULL;
+ if (pCtx->pLastThreadCtx != NULL) {
+ pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx);
+ if (pLastThreadCtx->pDec == NULL) {
+ pLastThreadCtx->pDec = PrefetchLastPicForThread (pCtx->pPicBuff,
+ pLastThreadCtx->iPicBuffIdx);
+ }
+ }
int32_t iPpsId = 0;
int32_t iRet = ERR_NONE;
@@ -2487,7 +2509,7 @@
true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008
//update pCurDqLayer at the starting of AU decoding
- if (pCtx->bInitialDqLayersMem) {
+ if (pCtx->bInitialDqLayersMem || pCtx->pCurDqLayer == NULL) {
pCtx->pCurDqLayer = pCtx->pDqLayersList[0];
}
@@ -2500,8 +2522,71 @@
PSliceHeaderExt pShExt = NULL;
PSliceHeader pSh = NULL;
+ if (pLastThreadCtx != NULL) {
+ pSh = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
+ if (pSh->iFirstMbInSlice == 0) {
+ if (pLastThreadCtx->pCtx->pDec != NULL && pLastThreadCtx->pCtx->pDec->bIsUngroupedMultiSlice) {
+ WAIT_EVENT (&pLastThreadCtx->sSliceDecodeFinsh, WELS_DEC_THREAD_WAIT_INFINITE);
+ }
+ pCtx->pDec = NULL;
+ pCtx->iTotalNumMbRec = 0;
+ } else if (pLastThreadCtx->pCtx->pDec != NULL) {
+ if (pSh->iFrameNum == pLastThreadCtx->pCtx->pDec->iFrameNum
+ && pSh->iPicOrderCntLsb == pLastThreadCtx->pCtx->pDec->iFramePoc) {
+ WAIT_EVENT (&pLastThreadCtx->sSliceDecodeFinsh, WELS_DEC_THREAD_WAIT_INFINITE);
+ pCtx->pDec = pLastThreadCtx->pCtx->pDec;
+ pCtx->pDec->bIsUngroupedMultiSlice = true;
+ pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic;
+ pCtx->iTotalNumMbRec = pLastThreadCtx->pCtx->iTotalNumMbRec;
+ }
+ }
+ }
+ bool isNewFrame = true;
+ if (pThreadCtx != NULL) {
+ isNewFrame = pCtx->pDec == NULL;
+ }
if (pCtx->pDec == NULL) {
+ if (pLastThreadCtx != NULL) {
+ pLastThreadCtx->pDec->bUsedAsRef = pLastThreadCtx->pCtx->uiNalRefIdc > 0;
+ if (pLastThreadCtx->pDec->bUsedAsRef) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ uint32_t i = 0;
+ while (i < MAX_DPB_COUNT && pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]) {
+ pLastThreadCtx->pDec->pRefPic[listIdx][i] = pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i];
+ pLastThreadCtx->pDec->pRefPic[listIdx][i]->bAvailableFlag = false;
+ ++i;
+ }
+ }
+ pLastThreadCtx->pCtx->sTmpRefPic = pLastThreadCtx->pCtx->sRefPic;
+ WelsMarkAsRef (pLastThreadCtx->pCtx, pLastThreadCtx->pDec);
+ pCtx->sRefPic = pLastThreadCtx->pCtx->sTmpRefPic;
+ } else {
+ pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic;
+ }
+ //printf ("last uiDecodingTimeStamp = %d\n", pLastThreadCtx->pCtx->uiDecodingTimeStamp);
+ for (int32_t i = 0; i < pCtx->sRefPic.uiRefCount[LIST_0]; ++i) {
+ if (pCtx->sRefPic.pRefList[LIST_0][i] != NULL) {
+ pCtx->sRefPic.pRefList[LIST_0][i]->bAvailableFlag = false;
+ }
+ }
+ for (int32_t i = 0; i < pCtx->sRefPic.uiRefCount[LIST_1]; ++i) {
+ if (pCtx->sRefPic.pRefList[LIST_1][i] != NULL) {
+ pCtx->sRefPic.pRefList[LIST_1][i]->bAvailableFlag = false;
+ }
+ }
+ }
pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
+ if (pThreadCtx != NULL) {
+ if (pCtx->pDec != NULL) {
+ pCtx->pDec->bAvailableFlag = false;
+ pCtx->pDec->bIsUngroupedMultiSlice = false;
+ pThreadCtx->pDec = pCtx->pDec;
+ uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
+ for (uint32_t i = 0; i < uiMbHeight; ++i) {
+ RESET_EVENT (&pCtx->pDec->pReadyEvent[i]);
+ }
+ }
+ }
if (pCtx->iTotalNumMbRec != 0)
pCtx->iTotalNumMbRec = 0;
@@ -2519,6 +2604,10 @@
}
pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp;
pCtx->pDec->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
+ if (pThreadCtx != NULL) {
+ pThreadCtx->iPicBuffIdx = pCtx->pDec->iPicBuffIdx;
+ pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag = pCtx->pDec->pMbCorrectlyDecodedFlag;
+ }
if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode
for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i)
@@ -2556,6 +2645,7 @@
pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2
pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag;
pCtx->pDec->eSliceType = pSh->eSliceType;
+
memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage
pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag;
pLayerInfo.sSliceInLayer.eSliceType = pSh->eSliceType;
@@ -2587,11 +2677,9 @@
bFreshSliceAvailable = (iCurrIdD != iLastIdD
|| iCurrIdQ != iLastIdQ); // do not need condition of (first_mb == 0) due multiple slices might be disorder
+
WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps);
- if (iCurrIdQ == BASE_QUALITY_ID) {
- ST64 (iRefCount, LD64 (pLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiRefCount));
- }
if ((iLastIdD < 0) || //case 1: first layer
(iLastIdD == iCurrIdD)) { //case 2: same uiDId
@@ -2601,13 +2689,23 @@
const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag
|| (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR);
// Subclause 8.2.5.2 Decoding process for gaps in frame_num
+ int32_t iPrevFrameNum = pCtx->pLastDecPicInfo->iPrevFrameNum;
+ if (pLastThreadCtx != NULL) {
+ if (pCtx->bNewSeqBegin) {
+ iPrevFrameNum = 0;
+ } else if (pLastThreadCtx->pDec != NULL) {
+ iPrevFrameNum = pLastThreadCtx->pDec->iFrameNum;
+ } else {
+ iPrevFrameNum = pCtx->bNewSeqBegin ? 0 : pLastThreadCtx->pCtx->iFrameNum;
+ }
+ }
if (!kbIdrFlag &&
- pSh->iFrameNum != pCtx->pLastDecPicInfo->iPrevFrameNum &&
- pSh->iFrameNum != ((pCtx->pLastDecPicInfo->iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) -
+ pSh->iFrameNum != iPrevFrameNum &&
+ pSh->iFrameNum != ((iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) -
1))) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
"referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d",
- pCtx->pLastDecPicInfo->iPrevFrameNum,
+ iPrevFrameNum,
pSh->iFrameNum);
bAllRefComplete = false;
@@ -2623,7 +2721,7 @@
}
}
- if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID) {
+ if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID && isNewFrame) {
iRet = InitRefPicList (pCtx, pCtx->uiNalRefIdc, pSh->iPicOrderCntLsb);
if (iRet) {
pCtx->bRPLRError = true;
@@ -2643,7 +2741,13 @@
if (pSh->eSliceType == B_SLICE && !pSh->iDirectSpatialMvPredFlag)
ComputeColocatedTemporalScaling (pCtx);
- iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
+ if (pThreadCtx != NULL) {
+ memset (&pCtx->lastReadyHeightOffset[0][0], -1, LIST_A * MAX_REF_PIC_COUNT * sizeof (int16_t));
+ SET_EVENT (&pThreadCtx->sSliceDecodeStart);
+ iRet = WelsDecodeAndConstructSlice (pCtx);
+ } else {
+ iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
+ }
//Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case
if (iRet != ERR_NONE) {
@@ -2659,7 +2763,7 @@
}
}
- if (bReconstructSlice) {
+ if (pThreadCtx == NULL && bReconstructSlice) {
if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) {
pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false
return iRet;
@@ -2666,10 +2770,12 @@
}
}
if (bAllRefComplete && pCtx->eSliceType != I_SLICE) {
- if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
- bAllRefComplete &= CheckRefPicturesComplete (pCtx);
- } else {
- bAllRefComplete = false;
+ if (pCtx->pThreadCtx == NULL) {
+ if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
+ bAllRefComplete &= CheckRefPicturesComplete (pCtx);
+ } else {
+ bAllRefComplete = false;
+ }
}
}
}
@@ -2721,34 +2827,49 @@
}
}
+ if (pThreadCtx != NULL && pCtx->uiDecodingTimeStamp > 1 && pCtx->pLastDecPicInfo->uiDecodingTimeStamp > 0) {
+ while (pCtx->uiDecodingTimeStamp > pCtx->pLastDecPicInfo->uiDecodingTimeStamp + 1) {
+ WelsSleep (1);
+ }
+ }
+ if (pThreadCtx != NULL) {
+ pCtx->pLastDecPicInfo->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
+ }
iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
- if (iRet)
+ if (iRet) {
+ if (pThreadCtx != NULL) {
+ SET_EVENT (&pThreadCtx->sSliceDecodeFinsh);
+ }
return iRet;
+ }
pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
- pCtx->bUsedAsRef = false;
- if (pCtx->uiNalRefIdc > 0) {
- pCtx->bUsedAsRef = true;
- for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
- uint32_t i = 0;
- while (i < MAX_DPB_COUNT && pCtx->sRefPic.pRefList[listIdx][i]) {
- pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
- ++i;
+ pCtx->bUsedAsRef = pCtx->uiNalRefIdc > 0;
+ if (pCtx->pThreadCtx == NULL) {
+ if (pCtx->bUsedAsRef) {
+ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+ uint32_t i = 0;
+ while (i < MAX_DPB_COUNT && pCtx->sRefPic.pRefList[listIdx][i]) {
+ pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
+ ++i;
+ }
}
- }
- iRet = WelsMarkAsRef (pCtx);
- if (iRet != ERR_NONE) {
- if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
- pCtx->iErrorCode |= dsBitstreamError;
- if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
- pCtx->pDec = NULL;
- return iRet;
+ iRet = WelsMarkAsRef (pCtx);
+ if (iRet != ERR_NONE) {
+ if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
+ pCtx->iErrorCode |= dsBitstreamError;
+ if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+ pCtx->pDec = NULL;
+ return iRet;
+ }
}
+ if (!pCtx->pParam->bParseOnly)
+ ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
+ pCtx->pDec->iLinesize,
+ pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
}
- if (!pCtx->pParam->bParseOnly)
- ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
- pCtx->pDec->iLinesize,
- pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
+ } else {
+ SET_EVENT (&pThreadCtx->sImageReady);
}
pCtx->pDec = NULL; //after frame decoding, always set to NULL
}
@@ -2758,8 +2879,27 @@
pCtx->pLastDecPicInfo->iPrevFrameNum = pSh->iFrameNum;
if (pCtx->pLastDecPicInfo->bLastHasMmco5)
pCtx->pLastDecPicInfo->iPrevFrameNum = 0;
+ if (pThreadCtx != NULL) {
+ int32_t threadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
+ int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
+ for (int32_t i = 0; i < threadCount; ++i) {
+ if (pThreadCtx[i - id].pCtx != NULL) {
+ unsigned long long uiTimeStamp = pThreadCtx[i - id].pCtx->uiTimeStamp;
+ if (uiTimeStamp > 0 && pThreadCtx[i - id].pCtx->sSpsPpsCtx.iSeqId > pCtx->sSpsPpsCtx.iSeqId) {
+ CopySpsPps (pThreadCtx[i - id].pCtx, pCtx);
+ if (pCtx->pPicBuff != pThreadCtx[i - id].pCtx->pPicBuff) {
+ pCtx->pPicBuff = pThreadCtx[i - id].pCtx->pPicBuff;
+ }
+ InitialDqLayersContext (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
+ break;
+ }
+ }
+ }
+ }
}
-
+ if (pThreadCtx != NULL) {
+ SET_EVENT (&pThreadCtx->sSliceDecodeFinsh);
+ }
return ERR_NONE;
}
@@ -2875,6 +3015,7 @@
if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb()
return false;
}
+
return bAllRefComplete;
}
} // namespace WelsDec
--- a/codec/decoder/core/src/manage_dec_ref.cpp
+++ b/codec/decoder/core/src/manage_dec_ref.cpp
@@ -150,7 +150,7 @@
&& pCtx->eSliceType != SI_SLICE)) {
if (pCtx->pParam->eEcActiveIdc !=
ERROR_CON_DISABLE) { //IDR lost!, recover it for future decoding with data all set to 0
- PPicture pRef = pCtx->pThreadCtx != NULL ? PrefetchPicForThread (pCtx->pPicBuff) : PrefetchPic (pCtx->pPicBuff);
+ PPicture pRef = PrefetchPic (pCtx->pPicBuff);
if (pRef != NULL) {
// IDR lost, set new
pRef->bIsComplete = false; // Set complete flag to false for lost IDR ref picture
--- a/codec/decoder/core/src/pic_queue.cpp
+++ b/codec/decoder/core/src/pic_queue.cpp
@@ -111,8 +111,8 @@
uint32_t uiMbWidth = (kiPicWidth + 15) >> 4;
uint32_t uiMbHeight = (kiPicHeight + 15) >> 4;
uint32_t uiMbCount = uiMbWidth * uiMbHeight;
- pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t),
- "pPic->pMbType");
+ pPic->pMbCorrectlyDecodedFlag = (bool*)pMa->WelsMallocz (uiMbCount * sizeof (bool), "pPic->pMbCorrectlyDecodedFlag");
+ pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t), "pPic->pMbType");
pPic->pMv[LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]");
pPic->pMv[LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
@@ -140,6 +140,11 @@
pPic->pBuffer[0] = NULL;
}
+ if (pPic->pMbCorrectlyDecodedFlag) {
+ pMa->WelsFree (pPic->pMbCorrectlyDecodedFlag, "pPic->pMbCorrectlyDecodedFlag");
+ pPic->pMbCorrectlyDecodedFlag = NULL;
+ }
+
if (pPic->pMbType) {
pMa->WelsFree (pPic->pMbType, "pPic->pMbType");
pPic->pMbType = NULL;
@@ -213,6 +218,18 @@
pPic->iPicBuffIdx = pPicBuf->iCurrentIdx;
if (++pPicBuf->iCurrentIdx >= pPicBuf->iCapacity) {
pPicBuf->iCurrentIdx = 0;
+ }
+ return pPic;
+}
+
+PPicture PrefetchLastPicForThread (PPicBuff pPicBuf, const int32_t& iLastPicBuffIdx) {
+ PPicture pPic = NULL;
+
+ if (pPicBuf->iCapacity == 0) {
+ return NULL;
+ }
+ if (iLastPicBuffIdx >= 0 && iLastPicBuffIdx < pPicBuf->iCapacity) {
+ pPic = pPicBuf->ppPic[iLastPicBuffIdx];
}
return pPic;
}
--- a/codec/decoder/core/src/wels_decoder_thread.cpp
+++ b/codec/decoder/core/src/wels_decoder_thread.cpp
@@ -57,6 +57,12 @@
#define HW_NCPU_NAME "hw.ncpu"
#endif
#endif
+#ifdef ANDROID_NDK
+#include <cpu-features.h>
+#endif
+#ifdef __ANDROID__
+#include <android/api-level.h>
+#endif
#include "wels_decoder_thread.h"
#include <stdio.h>