ref: 801da26d1d11b393533b2f262ff397ea9fa4f1bd
parent: d0a81355b0b2677f38c49d1b55adce584f3f5449
author: Martin Storsjö <[email protected]>
date: Mon Mar 3 17:45:23 EST 2014
Use WelsMultipleEventsWaitSingleBlocking with a master event for waiting on finished threads This allows using the same codepath for both unix and windows for distributing new slices to code to threads. This also improves the performance on unix - instead of waiting for all the current threads to finish their current slice before handing out a new slice to each of them (where the threads that finish first will just wait instead of immediately getting a new slice to work on), we now use the same logic as on windows. In one setup, it improves the performance of encoding from ~920 fps to ~950 fps, and in another setup it goes from ~390 fps to ~660 fps. (These tests were done with the SM_ROWMB_SLICE mode, which heavily exercises the code for distributing new slices to the worker threads.) The extra WelsEventSignal call on windows where it isn't strictly necessary doesn't incur any measurable slowdown, so it is kept without any extra ifdefs to keep the code more readable and unified.
--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -94,6 +94,7 @@
char eventNamespace[100];
WELS_THREAD_HANDLE pThreadHandles[MAX_THREADS_NUM];// thread handles, [iThreadIdx]
WELS_EVENT pSliceCodedEvent[MAX_THREADS_NUM];// events for slice coded state, [iThreadIdx]
+WELS_EVENT pSliceCodedMasterEvent; // events for signalling that some event in pSliceCodedEvent has been signalled
WELS_EVENT pReadySliceCodingEvent[MAX_THREADS_NUM]; // events for slice coding ready, [iThreadIdx]
WELS_EVENT pUpdateMbListEvent[MAX_THREADS_NUM]; // signal to update mb list neighbor for various slices
WELS_EVENT pFinUpdateMbListEvent[MAX_THREADS_NUM]; // signal to indicate finish updating mb list
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -3225,7 +3225,7 @@
return ENC_RETURN_UNEXPECTED;
}
- WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
+ WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0], &pCtx->pSliceThreading->pSliceCodedMasterEvent);
// all slices are finished coding here
@@ -3266,12 +3266,12 @@
while (1) {
if (iIndexOfSliceToBeCoded >= iSliceCount && iNumThreadsRunning <= 0)
break;
-#ifdef _WIN32
WELS_THREAD_ERROR_CODE lwait = 0;
int32_t iEventId = -1;
lwait = WelsMultipleEventsWaitSingleBlocking (iNumThreadsScheduled,
- &pCtx->pSliceThreading->pSliceCodedEvent[0]);
+ &pCtx->pSliceThreading->pSliceCodedEvent[0],
+ &pCtx->pSliceThreading->pSliceCodedMasterEvent);
iEventId = (int32_t) (lwait - WELS_THREAD_ERROR_WAIT_OBJECT_0);
if (iEventId >= 0 && iEventId < iNumThreadsScheduled) {
if (iIndexOfSliceToBeCoded < iSliceCount) {
@@ -3285,29 +3285,6 @@
-- iNumThreadsRunning;
}
}
-#else
- // TODO for pthread platforms
- // alternate implementation using blocking due non-blocking with timeout mode not support at wels thread lib, tune back if available
- WelsMultipleEventsWaitAllBlocking (iNumThreadsRunning, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
- WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
- if (iIndexOfSliceToBeCoded < iSliceCount) {
- int32_t iThreadIdx = 0;
- // pick up succeeding slices for threading if left
- while (iThreadIdx < iNumThreadsScheduled) {
- if (iIndexOfSliceToBeCoded >= iSliceCount)
- break;
- pCtx->pSliceThreading->pThreadPEncCtx[iThreadIdx].iSliceIndex = iIndexOfSliceToBeCoded;
- WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iThreadIdx]);
-
- ++ iIndexOfSliceToBeCoded;
- ++ iThreadIdx;
- }
- // update iNumThreadsRunning
- iNumThreadsRunning = iThreadIdx;
- } else {
- iNumThreadsRunning = 0;
- }
-#endif//_WIN32
}//while(1)
// all slices are finished coding here
@@ -3329,7 +3306,7 @@
return ENC_RETURN_UNEXPECTED;
}
- WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
+ WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0], &pCtx->pSliceThreading->pSliceCodedMasterEvent);
WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt);
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -351,10 +351,11 @@
MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "encpEncCtx= 0x%p\n", (void*) (*ppCtx));
+ char name[SEM_NAME_MAX] = {0};
+ WELS_THREAD_ERROR_CODE err = 0;
+
iIdx = 0;
while (iIdx < iThreadNum) {
- char name[SEM_NAME_MAX] = {0};
- WELS_THREAD_ERROR_CODE err = 0;
pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx = (void*) (*ppCtx);
pSmt->pThreadPEncCtx[iIdx].iSliceIndex = iIdx;
pSmt->pThreadPEncCtx[iIdx].iThreadIndex = iIdx;
@@ -386,6 +387,10 @@
++ iIdx;
}
+ WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
+ err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
+ MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d\n", name, err, errno);
+
(*ppCtx)->pSliceBs = (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSliceBs), FreeMemorySvc (ppCtx))
@@ -444,8 +449,8 @@
if (NULL == pSmt)
return;
+ char ename[SEM_NAME_MAX] = {0};
while (iIdx < iThreadNum) {
- char ename[SEM_NAME_MAX] = {0};
// length of semaphore name should be system constrained at least on mac 10.7
#ifdef _WIN32
if (pSmt->pThreadHandles != NULL && pSmt->pThreadHandles[iIdx] != NULL)
@@ -467,6 +472,8 @@
++ iIdx;
}
+ WelsSnprintf (ename, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
+ WelsEventClose (&pSmt->pSliceCodedMasterEvent, ename);
WelsMutexDestroy (&pSmt->mutexSliceNumUpdate);
WelsMutexDestroy (&((*ppCtx)->mutexEncoderError));
@@ -864,6 +871,8 @@
WelsEventSignal (
&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
+ WelsEventSignal (
+ &pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
} else { // for SM_DYN_SLICE parallelization
SSliceCtx* pSliceCtx = pCurDq->pSliceEncCtx;
const int32_t kiPartitionId = iThreadIdx;
@@ -967,6 +976,7 @@
break;
WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]); // mean finished coding current pSlice
+ WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
}
}
#ifdef _WIN32