ref: 8ed4e83e93124ffdbf900f6a816a191fea5df9cd
parent: b896faa47cd847e15dcd37ad30790402dfdd573d
parent: 54c677cdd7717cbdef490ef3c64a89da7d1cc28a
author: ruil2 <[email protected]>
date: Tue Nov 4 10:12:38 EST 2014
Merge pull request #1490 from sijchen/after_review [Encoder] Fixing for large number of slices
--- a/codec/common/inc/macros.h
+++ b/codec/common/inc/macros.h
@@ -42,6 +42,7 @@
#include <math.h>
#include <assert.h>
+#include <string.h>
#include "typedefs.h"
@@ -273,8 +274,36 @@
#endif
inline bool CheckInRangeCloseOpen (const int16_t kiCurrent, const int16_t kiMin, const int16_t kiMax) {
-return ((kiCurrent >= kiMin) && (kiCurrent < kiMax));
+ return ((kiCurrent >= kiMin) && (kiCurrent < kiMax));
}
+static inline void WelsSetMemUint32_c (uint32_t* pDst, uint32_t iValue, int32_t iSizeOfData) {
+ for (int i = 0; i < iSizeOfData; i++) {
+ pDst[i] = iValue;
+ }
+}
+
+static inline void WelsSetMemUint16_c (uint16_t* pDst, uint16_t iValue, int32_t iSizeOfData) {
+ for (int i = 0; i < iSizeOfData; i++) {
+ pDst[i] = iValue;
+ }
+}
+
+inline void WelsSetMemMultiplebytes_c (void* pDst, uint32_t iValue, int32_t iSizeOfData, int32_t iDataLengthOfData) {
+ assert (4 == iDataLengthOfData || 2 == iDataLengthOfData || 1 == iDataLengthOfData);
+
+ // TODO: consider add assembly for these functions
+ if (0 != iValue) {
+ if (4 == iDataLengthOfData) {
+ WelsSetMemUint32_c (static_cast<uint32_t*> (pDst), static_cast<uint32_t> (iValue), iSizeOfData);
+ } else if (2 == iDataLengthOfData) {
+ WelsSetMemUint16_c (static_cast<uint16_t*> (pDst), static_cast<uint16_t> (iValue), iSizeOfData);
+ } else {
+ memset (static_cast<uint8_t*> (pDst), static_cast<uint8_t> (iValue), iSizeOfData);
+ }
+ } else {
+ memset (static_cast<uint8_t*> (pDst), 0, iSizeOfData * iDataLengthOfData);
+ }
+}
#endif//WELS_MACRO_UTILIZATIONS_H__
--- a/codec/encoder/core/inc/svc_enc_macroblock.h
+++ b/codec/encoder/core/inc/svc_enc_macroblock.h
@@ -68,12 +68,12 @@
uint8_t uiLumaQp; // uiLumaQp: pPps->iInitialQp + sSliceHeader->delta_qp + mb->dquant.
uint8_t uiChromaQp;
-uint8_t uiSliceIdc; // AVC: pFirstMbInSlice?; SVC: (pFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId);
+uint16_t uiSliceIdc; // 2^16=65536 > MaxFS(36864) of level 5.1; AVC: pFirstMbInSlice?; SVC: (pFirstMbInSlice << 7) | ((uiDependencyId << 4) | uiQualityId);
uint32_t uiChromPredMode;
int32_t iLumaDQp;
SMVUnitXY sMvd[4];
int32_t iCbpDc;
-uint8_t reserved_filling_bytes[1]; // filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit
+//uint8_t reserved_filling_bytes[1]; // not deleting this line for further changes of this structure. filling bytes reserved to make structure aligned with 4 bytes, higher cache hit on less structure size by 2 cache lines( 2 * 64 bytes) once hit
} SMB, *PMb;
}
--- a/codec/encoder/core/inc/svc_enc_slice_segment.h
+++ b/codec/encoder/core/inc/svc_enc_slice_segment.h
@@ -80,7 +80,7 @@
int16_t iMbHeight; /* height of picture size in mb */
int16_t iSliceNumInFrame; /* count number of slices in frame; */
int32_t iMbNumInFrame; /* count number of MBs in frame */
-uint8_t* pOverallMbMap; /* overall MB map in frame, store virtual slice idc; */
+uint16_t* pOverallMbMap; /* overall MB map in frame, store virtual slice idc; */
int16_t* pFirstMbInSlice; /* first MB address top-left based in every slice respectively; */
int32_t* pCountMbNumInSlice; /* count number of MBs in every slice respectively; */
uint32_t uiSliceSizeConstraint;/*in byte*/
@@ -140,7 +140,7 @@
*
* \return uiSliceIdc - successful; (uint8_t)(-1) - failed;
*/
-uint8_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY);
+uint16_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY);
/*!
* \brief Get first mb in slice/slice_group: uiSliceIdc (apply in Single/multiple slices and FMO)
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -597,7 +597,7 @@
bool bLeftTop;
bool bRightTop;
int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
- uint8_t uiSliceIdc;
+ uint16_t uiSliceIdc;
pList[iIdx].iMbX = pEnc->pStrideTab->pMbIndexX[kiDlayerId][iIdx];
pList[iIdx].iMbY = pEnc->pStrideTab->pMbIndexY[kiDlayerId][iIdx];
@@ -2272,7 +2272,7 @@
bool bTop;
bool bLeftTop;
bool bRightTop;
- int32_t uiSliceIdc;
+ uint16_t uiSliceIdc;
int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
uiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY);
@@ -2347,7 +2347,8 @@
}
pSliceCtx->pFirstMbInSlice[i] = iFirstMbIdx;
- memset (pSliceCtx->pOverallMbMap + iFirstMbIdx, (uint8_t)i, pSliceCtx->pCountMbNumInSlice[i]*sizeof (uint8_t));
+ WelsSetMemMultiplebytes_c(pSliceCtx->pOverallMbMap + iFirstMbIdx, i,
+ pSliceCtx->pCountMbNumInSlice[i], sizeof(uint16_t));
// for next partition(or pSlice)
iFirstMbIdx += pSliceCtx->pCountMbNumInSlice[i];
@@ -3924,15 +3925,19 @@
iMaxSliceNum *= SLICE_NUM_EXPAND_COEF;
SWelsNalRaw* pNalList = (SWelsNalRaw*)pMA->WelsMalloc (iCountNals * sizeof (SWelsNalRaw), "pOut->sNalList");
- if (NULL == pNalList)
+ if (NULL == pNalList) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pNalList is NULL");
return ENC_RETURN_MEMALLOCERR;
+ }
memcpy (pNalList, pCtx->pOut->sNalList, sizeof (SWelsNalRaw) * pCtx->pOut->iCountNals);
pMA->WelsFree (pCtx->pOut->sNalList, "pOut->sNalList");
pCtx->pOut->sNalList = pNalList;
int32_t* pNalLen = (int32_t*)pMA->WelsMalloc (iCountNals * sizeof (int32_t), "pOut->pNalLen");
- if (NULL == pNalLen)
+ if (NULL == pNalLen) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pNalLen is NULL");
return ENC_RETURN_MEMALLOCERR;
+ }
memcpy (pNalLen, pCtx->pOut->pNalLen, sizeof (int32_t) * pCtx->pOut->iCountNals);
pMA->WelsFree (pCtx->pOut->pNalLen, "pOut->pNalLen");
pCtx->pOut->pNalLen = pNalLen;
@@ -3948,8 +3953,10 @@
}
SSlice* pSlice = (SSlice*)pMA->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "Slice");
- if (NULL == pSlice)
+ if (NULL == pSlice) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pSlice is NULL");
return ENC_RETURN_MEMALLOCERR;
+ }
memcpy (pSlice, pCurLayer->sLayerInfo.pSliceInLayer, sizeof (SSlice) * iMaxSliceNumOld);
int32_t uiSliceIdx;
uiSliceIdx = iMaxSliceNumOld;
@@ -3963,8 +3970,11 @@
pSliceIdx->pSliceBsa = &pCtx->pSliceBs[uiSliceIdx].sBsWrite;
else
pSliceIdx->pSliceBsa = &pCtx->pOut->sBsWrite;
- if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA))
+ if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA)) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc MbCache not successful at slice_idx=%d (max-slice=%d)",
+ uiSliceIdx, iMaxSliceNum);
return ENC_RETURN_MEMALLOCERR;
+ }
pSliceIdx->bSliceHeaderExtFlag = pBaseSlice->bSliceHeaderExtFlag;
pSHExt->sSliceHeader.iPpsId = pBaseSHExt->sSliceHeader.iPpsId;
@@ -3983,8 +3993,10 @@
pCurLayer->sLayerInfo.pSliceInLayer = pSlice;
int16_t* pFirstMbInSlice = (int16_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (int16_t), "pSliceSeg->pFirstMbInSlice");
- if (NULL == pFirstMbInSlice)
+ if (NULL == pFirstMbInSlice) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pFirstMbInSlice is NULL");
return ENC_RETURN_MEMALLOCERR;
+ }
memset (pFirstMbInSlice, 0, sizeof (int16_t) * iMaxSliceNum);
memcpy (pFirstMbInSlice, pCurLayer->pSliceEncCtx->pFirstMbInSlice, sizeof (int16_t) * iMaxSliceNumOld);
pMA->WelsFree (pCurLayer->pSliceEncCtx->pFirstMbInSlice, "pSliceSeg->pFirstMbInSlice");
@@ -3992,8 +4004,10 @@
int32_t* pCountMbNumInSlice = (int32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (int32_t),
"pSliceSeg->pCountMbNumInSlice");
- if (NULL == pCountMbNumInSlice)
+ if (NULL == pCountMbNumInSlice) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc pCountMbNumInSlice not successful");
return ENC_RETURN_MEMALLOCERR;
+ }
memcpy (pCountMbNumInSlice, pCurLayer->pSliceEncCtx->pCountMbNumInSlice, sizeof (int32_t) * iMaxSliceNumOld);
uiSliceIdx = iMaxSliceNumOld;
while (uiSliceIdx < iMaxSliceNum) {
@@ -4004,8 +4018,10 @@
pCurLayer->pSliceEncCtx->pCountMbNumInSlice = pCountMbNumInSlice;
SRCSlicing* pSlcingOverRc = (SRCSlicing*)pMA->WelsMalloc (iMaxSliceNum * sizeof (SRCSlicing), "SlicingOverRC");
- if (NULL == pSlcingOverRc)
+ if (NULL == pSlcingOverRc) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSlcingOverRc not successful");
return ENC_RETURN_MEMALLOCERR;
+ }
memcpy (pSlcingOverRc, pCtx->pWelsSvcRc->pSlicingOverRc, sizeof (SRCSlicing) * iMaxSliceNumOld);
uiSliceIdx = iMaxSliceNumOld;
SRCSlicing* pSORC = &pSlcingOverRc[uiSliceIdx];
@@ -4069,9 +4085,13 @@
if (iSliceIdx >= (pSliceCtx->iMaxSliceNumConstraint - kiSliceIdxStep)) { // insufficient memory in pSliceInLayer[]
if (pCtx->iActiveThreadsNum == 1) {
- if (DynSliceRealloc (pCtx, pFrameBSInfo, pLayerBsInfo)) //only single thread support re-alloc now
+ //only single thread support re-alloc now
+ if (DynSliceRealloc (pCtx, pFrameBSInfo, pLayerBsInfo)) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::WelsCodeOnePicPartition: DynSliceRealloc not successful");
return ENC_RETURN_MEMALLOCERR;
+ }
} else if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
+ WelsLog (&(pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::WelsCodeOnePicPartition: iSliceIdx(%d) over iMaxSliceNumConstraint(%d)", iSliceIdx, pSliceCtx->iMaxSliceNumConstraint);
return ENC_RETURN_MEMALLOCERR;
}
}
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -73,7 +73,7 @@
void UpdateMbListNeighborParallel (SSliceCtx* pSliceCtx,
SMB* pMbList,
const int32_t uiSliceIdc) {
- const uint8_t* kpMbMap = pSliceCtx->pOverallMbMap;
+ const uint16_t* kpMbMap = pSliceCtx->pOverallMbMap;
const int32_t kiMbWidth = pSliceCtx->iMbWidth;
int32_t iIdx = pSliceCtx->pFirstMbInSlice[uiSliceIdc];
const int32_t kiEndMbInSlice = iIdx + pSliceCtx->pCountMbNumInSlice[uiSliceIdc] - 1;
--- a/codec/encoder/core/src/svc_enc_slice_segment.cpp
+++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp
@@ -78,7 +78,8 @@
const int16_t kiFirstMb = uiSliceIdx * kiMbWidth;
pSliceSeg->pCountMbNumInSlice[uiSliceIdx] = kiMbWidth;
pSliceSeg->pFirstMbInSlice[uiSliceIdx] = kiFirstMb;
- memset (pSliceSeg->pOverallMbMap + kiFirstMb, (uint8_t)uiSliceIdx, kiMbWidth * sizeof (uint8_t));
+ WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap + kiFirstMb, uiSliceIdx,
+ kiMbWidth, sizeof(uint16_t));
++ uiSliceIdx;
}
@@ -89,7 +90,7 @@
const int32_t* kpSlicesAssignList = (int32_t*) & (kpMso->sSliceArgument.uiSliceMbNum[0]);
const int32_t kiCountNumMbInFrame = pSliceSeg->iMbNumInFrame;
const int32_t kiCountSliceNumInFrame = pSliceSeg->iSliceNumInFrame;
- int32_t iSliceIdx = 0;
+ uint16_t iSliceIdx = 0;
int16_t iMbIdx = 0;
do {
@@ -392,7 +393,7 @@
}
if (SM_SINGLE_SLICE == uiSliceMode) {
- pSliceSeg->pOverallMbMap = (uint8_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint8_t), "pSliceSeg->pOverallMbMap");
+ pSliceSeg->pOverallMbMap = (uint16_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap)
pSliceSeg->iSliceNumInFrame = 1;
@@ -419,11 +420,12 @@
&& uiSliceMode != SM_DYN_SLICE && uiSliceMode != SM_AUTO_SLICE)
return 1;
- pSliceSeg->pOverallMbMap = (uint8_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint8_t), "pSliceSeg->pOverallMbMap");
+ pSliceSeg->pOverallMbMap = (uint16_t*)pMa->WelsMalloc (kiCountMbNum * sizeof (uint16_t), "pSliceSeg->pOverallMbMap");
WELS_VERIFY_RETURN_IF (1, NULL == pSliceSeg->pOverallMbMap)
- memset (pSliceSeg->pOverallMbMap, 0, kiCountMbNum * sizeof (uint8_t));
+ WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap, 0, kiCountMbNum, sizeof(uint16_t));
+
//SM_DYN_SLICE: init, set pSliceSeg->iSliceNumInFrame = 1;
pSliceSeg->iSliceNumInFrame = GetInitialSliceNum (kiMbWidth, kiMbHeight, pMso);
@@ -550,10 +552,10 @@
*
* \return uiSliceIdc - successful; -1 - failed;
*/
-uint8_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY) {
+uint16_t WelsMbToSliceIdc (SSliceCtx* pSliceCtx, const int16_t kiMbXY) {
if (NULL != pSliceCtx && kiMbXY < pSliceCtx->iMbNumInFrame && kiMbXY >= 0)
return pSliceCtx->pOverallMbMap[ kiMbXY ];
- return (uint8_t) (-1);
+ return (uint16_t) (-1);
}
/*!
@@ -688,7 +690,8 @@
pSliceCtx->pFirstMbInSlice[iSliceIdx] = iFirstMbIdx;
pSliceCtx->pCountMbNumInSlice[iSliceIdx] = kiSliceRun;
- memset (pSliceCtx->pOverallMbMap + iFirstMbIdx, (uint8_t)iSliceIdx, kiSliceRun * sizeof (uint8_t));
+ WelsSetMemMultiplebytes_c(pSliceCtx->pOverallMbMap + iFirstMbIdx, iSliceIdx,
+ kiSliceRun, sizeof(uint16_t));
iFirstMbIdx += kiSliceRun;
--- a/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/codec/encoder/core/src/svc_encode_slice.cpp
@@ -776,7 +776,7 @@
bool bLeftTop;
bool bRightTop;
int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
- const uint8_t kuiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY);
+ const uint16_t kuiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY);
pMb->uiSliceIdc = kuiSliceIdc;
iLeftXY = kiMbXY - 1;
@@ -814,9 +814,9 @@
int32_t iFirstMbIdxOfNextSlice, const int32_t kiLastMbIdxInPartition) {
SDqLayer* pCurLayer = pEncCtx->pCurDqLayer;
int32_t iCurMbIdx = pCurMb->iMbXY;
- int32_t iCurSliceIdc = pSliceCtx->pOverallMbMap[ iCurMbIdx ];
+ uint16_t iCurSliceIdc = pSliceCtx->pOverallMbMap[ iCurMbIdx ];
const int32_t kiSliceIdxStep = pEncCtx->iActiveThreadsNum;
- int32_t iNextSliceIdc = iCurSliceIdc + kiSliceIdxStep;
+ uint16_t iNextSliceIdc = iCurSliceIdc + kiSliceIdxStep;
SSlice* pNextSlice = NULL;
SMB* pMbList = pCurLayer->sMbDataP;
@@ -839,10 +839,9 @@
sizeof (SSliceHeaderExt)); // confirmed_safe_unsafe_usage
pSliceCtx->pFirstMbInSlice[iNextSliceIdc] = iFirstMbIdxOfNextSlice;
+ WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdxOfNextSlice, iNextSliceIdc,
+ (kiLastMbIdxInPartition - iFirstMbIdxOfNextSlice + 1), sizeof(uint16_t));
- memset (pSliceCtx->pOverallMbMap + iFirstMbIdxOfNextSlice, (uint8_t)iNextSliceIdc,
- (kiLastMbIdxInPartition - iFirstMbIdxOfNextSlice + 1)*sizeof (uint8_t));
-
//DYNAMIC_SLICING_ONE_THREAD: update pMbList slice_neighbor_info
UpdateMbNeighbourInfoForNextSlice (pSliceCtx, pMbList, iFirstMbIdxOfNextSlice, kiLastMbIdxInPartition);
}
@@ -874,6 +873,10 @@
if ((kbCurMbNotFirstMbOfCurSlice
&& JUMPPACKETSIZE_JUDGE (uiLen, iCurMbIdx, pSliceCtx->uiSliceSizeConstraint)) /*jump_avoiding_pack_exceed*/
&& kbCurMbNotLastMbOfCurPartition) { //decide to add new pSlice
+
+ WelsLog (&pEncCtx->sLogCtx, WELS_LOG_DETAIL,
+ "DynSlcJudgeSliceBoundaryStepBack: AddSliceBoundary: iCurMbIdx=%d, uiLen=%d, uiSliceIdx=%d", iCurMbIdx, uiLen,
+ pCurSlice->uiSliceIdx);
if (pEncCtx->pSvcParam->iMultipleThreadIdc > 1) {
WelsMutexLock (&pEncCtx->pSliceThreading->mutexSliceNumUpdate);
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -718,7 +718,7 @@
pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8;
pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
pScreenBlockFeatureStorage->iActualListSize = kiListSize;
- memset (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL * sizeof (uint32_t));
+ WelsSetMemMultiplebytes_c (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL, sizeof(uint32_t));
pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false;
return ENC_RETURN_SUCCESS;