shithub: openh264

Download patch

ref: 4863c6602aae600f6985f3069401341bacfbc6c2
parent: 4bc0b8ad188cf373e3224e9471963c116816fea6
author: xiaotian.shi <[email protected]>
date: Thu Oct 31 07:29:35 EDT 2019

commit-4 of multi-thread decoding support.

Add various internal thread functions.
Create function ParseAccessUnit and thread function ConstructAccessUnit in the condition that each input bitstream segment must contain exact one access unit (i.e. one slice or one frame).

--- a/codec/api/svc/codec_app_def.h
+++ b/codec/api/svc/codec_app_def.h
@@ -167,8 +167,8 @@
   DECODER_OPTION_LEVEL,                 ///< get current AU level info,only is used in GetOption
   DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
   DECODER_OPTION_IS_REF_PIC,             ///< feedback current frame is ref pic or not
-  DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER  ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
-
+  DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER,  ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
+  DECODER_OPTION_NUM_OF_THREADS,         ///< number of decoding threads. The maximum thread count is equal or less than lesser of (cpu core counts and 16).
 } DECODER_OPTION;
 
 /**
--- a/codec/console/dec/src/h264dec.cpp
+++ b/codec/console/dec/src/h264dec.cpp
@@ -52,7 +52,6 @@
 #include "measure_time.h"
 #include "d3d9_utils.h"
 
-
 using namespace std;
 
 #if defined (WINDOWS_PHONE)
@@ -69,6 +68,106 @@
 #endif
 //using namespace WelsDec;
 
+int32_t readPicture (uint8_t* pBuf, const int32_t& iFileSize, const int32_t& bufPos, uint8_t*& pSpsBuf,
+                     int32_t& sps_byte_count) {
+  int32_t bytes_available = iFileSize - bufPos;
+  if (bytes_available < 4) {
+    return bytes_available;
+  }
+  uint8_t* ptr = pBuf + bufPos;
+  int32_t read_bytes = 0;
+  int32_t sps_count = 0;
+  int32_t pps_count = 0;
+  int32_t non_idr_pict_count = 0;
+  int32_t idr_pict_count = 0;
+  pSpsBuf = NULL;
+  sps_byte_count = 0;
+  while (read_bytes < bytes_available - 4) {
+    bool has4ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 1;
+    bool has3ByteStartCode = false;
+    if (!has4ByteStartCode) {
+      has3ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 1;
+    }
+    if (has4ByteStartCode || has3ByteStartCode) {
+      uint8_t nal_unit_type = has4ByteStartCode ? (ptr[4] & 0x1F) : (ptr[3] & 0x1F);
+      if (nal_unit_type == 1) {
+        if (++non_idr_pict_count == 1 && idr_pict_count == 1) {
+          return read_bytes;
+        }
+        if (non_idr_pict_count == 2) {
+          return read_bytes;
+        }
+      } else if (nal_unit_type == 5) {
+        if (++idr_pict_count == 1 && non_idr_pict_count == 1) {
+          return read_bytes;
+        }
+        if (idr_pict_count == 2) {
+          return read_bytes;
+        }
+      } else if (nal_unit_type == 7) {
+        pSpsBuf = ptr + (has4ByteStartCode ? 4 : 3);
+        if ((++sps_count == 1) && (non_idr_pict_count == 1 || idr_pict_count == 1)) {
+          return read_bytes;
+        }
+      } else if (nal_unit_type == 8) {
+        if (++pps_count == 1 && sps_count == 1) {
+          sps_byte_count = int32_t (ptr - pSpsBuf);
+        }
+      }
+      if (read_bytes >= bytes_available - 4) {
+        return bytes_available;
+      }
+      read_bytes += 4;
+      ptr += 4;
+    } else {
+      ++ptr;
+      ++read_bytes;
+    }
+  }
+  return bytes_available;
+}
+
+void FlushFrames (ISVCDecoder* pDecoder, int64_t& iTotal, FILE* pYuvFile, FILE* pOptionFile, int32_t& iFrameCount,
+                  unsigned long long& uiTimeStamp, int32_t& iWidth, int32_t& iHeight, int32_t& iLastWidth, int32_t iLastHeight) {
+  uint8_t* pData[3] = { NULL };
+  uint8_t* pDst[3] = { NULL };
+  SBufferInfo sDstBufInfo;
+  int32_t num_of_frames_in_buffer = 0;
+  CUtils cOutputModule;
+  pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
+  for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
+    int64_t iStart = WelsTime();
+    pData[0] = NULL;
+    pData[1] = NULL;
+    pData[2] = NULL;
+    memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
+    sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
+    sDstBufInfo.iBufferStatus = 1;
+    pDecoder->FlushFrame (pData, &sDstBufInfo);
+    if (sDstBufInfo.iBufferStatus == 1) {
+      pDst[0] = pData[0];
+      pDst[1] = pData[1];
+      pDst[2] = pData[2];
+    }
+    int64_t iEnd = WelsTime();
+    iTotal += iEnd - iStart;
+    if (sDstBufInfo.iBufferStatus == 1) {
+      cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
+      iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
+      iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
+      if (pOptionFile != NULL) {
+        if (iWidth != iLastWidth && iHeight != iLastHeight) {
+          fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
+          fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
+          fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
+          iLastWidth = iWidth;
+          iLastHeight = iHeight;
+        }
+      }
+      ++iFrameCount;
+    }
+  }
+}
 void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, const char* kpOuputFileName,
                          int32_t& iWidth, int32_t& iHeight, const char* pOptionFileName, const char* pLengthFileName,
                          int32_t iErrorConMethod,
@@ -95,15 +194,18 @@
 
   int32_t iBufPos = 0;
   int32_t iFileSize;
-  int32_t i = 0;
   int32_t iLastWidth = 0, iLastHeight = 0;
   int32_t iFrameCount = 0;
   int32_t iEndOfStreamFlag = 0;
-  int32_t num_of_frames_in_buffer = 0;
   pDecoder->SetOption (DECODER_OPTION_ERROR_CON_IDC, &iErrorConMethod);
   CUtils cOutputModule;
   double dElapsed = 0;
+  uint8_t uLastSpsBuf[32];
+  int32_t iLastSpsByteCount = 0;
 
+  int32_t iThreadCount = 1;
+  pDecoder->GetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
+
   if (kpH264FileName) {
     pH264File = fopen (kpH264FileName, "rb");
     if (pH264File == NULL) {
@@ -181,13 +283,32 @@
         goto label_exit;
       iSliceSize = static_cast<int32_t> (pInfo[2]);
     } else {
-      for (i = 0; i < iFileSize; i++) {
-        if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
-             && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
-          break;
+      if (iThreadCount > 1) {
+        uint8_t* uSpsPtr = NULL;
+        int32_t iSpsByteCount = 0;
+        iSliceSize = readPicture (pBuf, iFileSize, iBufPos, uSpsPtr, iSpsByteCount);
+        if (iLastSpsByteCount > 0 && iSpsByteCount > 0) {
+          if (iSpsByteCount != iLastSpsByteCount || memcmp (uSpsPtr, uLastSpsBuf, iLastSpsByteCount) != 0) {
+            //whenever new sequence is different from preceding sequence. All pending frames must be flushed out before the new sequence can start to decode.
+            FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth,
+                         iLastHeight);
+          }
         }
+        if (iSpsByteCount > 0 && uSpsPtr != NULL) {
+          if (iSpsByteCount > 32) iSpsByteCount = 32;
+          iLastSpsByteCount = iSpsByteCount;
+          memcpy (uLastSpsBuf, uSpsPtr, iSpsByteCount);
+        }
+      } else {
+        int i = 0;
+        for (i = 0; i < iFileSize; i++) {
+          if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
+               && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
+            break;
+          }
+        }
+        iSliceSize = i;
       }
-      iSliceSize = i;
     }
     if (iSliceSize < 4) { //too small size, no effective data, ignore
       iBufPos += iSliceSize;
@@ -283,41 +404,8 @@
     iBufPos += iSliceSize;
     ++ iSliceIndex;
   }
-
-  pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
-  for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
-    iStart = WelsTime();
-    pData[0] = NULL;
-    pData[1] = NULL;
-    pData[2] = NULL;
-    memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
-    sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
-    sDstBufInfo.iBufferStatus = 1;
-    pDecoder->FlushFrame (pData, &sDstBufInfo);
-    if (sDstBufInfo.iBufferStatus == 1) {
-      pDst[0] = pData[0];
-      pDst[1] = pData[1];
-      pDst[2] = pData[2];
-    }
-    iEnd = WelsTime();
-    iTotal += iEnd - iStart;
-    if (sDstBufInfo.iBufferStatus == 1) {
-      cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
-      iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
-      iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
-
-      if (pOptionFile != NULL) {
-        if (iWidth != iLastWidth && iHeight != iLastHeight) {
-          fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
-          fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
-          fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
-          iLastWidth = iWidth;
-          iLastHeight = iHeight;
-        }
-      }
-      ++iFrameCount;
-    }
-  }
+  FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth,
+               iLastHeight);
   dElapsed = iTotal / 1e6;
   fprintf (stderr, "-------------------------------------------------------\n");
   fprintf (stderr, "iWidth:\t\t%d\nheight:\t\t%d\nFrames:\t\t%d\ndecode time:\t%f sec\nFPS:\t\t%f fps\n",
@@ -488,6 +576,9 @@
   if (iLevelSetting >= 0) {
     pDecoder->SetOption (DECODER_OPTION_TRACE_LEVEL, &iLevelSetting);
   }
+
+  int32_t iThreadCount = 1;
+  pDecoder->SetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
 
   if (pDecoder->Initialize (&sDecParam)) {
     printf ("Decoder initialization failed.\n");
--- a/codec/decoder/plus/inc/welsDecoderExt.h
+++ b/codec/decoder/plus/inc/welsDecoderExt.h
@@ -109,22 +109,51 @@
   virtual long EXTAPI SetOption (DECODER_OPTION eOptID, void* pOption);
   virtual long EXTAPI GetOption (DECODER_OPTION eOptID, void* pOption);
 
+ public:
+  DECODING_STATE DecodeFrame2WithCtx (PWelsDecoderContext pCtx, const unsigned char* kpSrc, const int kiSrcLen,
+                                      unsigned char** ppDst, SBufferInfo* pDstInfo);
+  DECODING_STATE ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx);
+
  private:
-  PWelsDecoderContext     m_pDecContext;
   welsCodecTrace*         m_pWelsTrace;
+  uint32_t                m_uiDecodeTimeStamp;
+  bool                    m_bIsBaseline;
+  int32_t                 m_iCpuCount;
+  int32_t                 m_iThreadCount;
+  PPicBuff                m_pPicBuff;
+  bool                    m_bParamSetsLostFlag;
+  bool                    m_bFreezeOutput;
+  int32_t                 m_DecCtxActiveCount;
+  PWelsDecoderThreadCTX   m_pDecThrCtx;
+  PWelsDecoderThreadCTX   m_pLastDecThrCtx;
+  WELS_MUTEX              m_csDecoder;
+  SWelsDecEvent           m_sBufferingEvent;
+  SWelsDecEvent           m_sReleaseBufferEvent;
+  SWelsDecSemphore        m_sIsBusy;
   SPictInfo               m_sPictInfoList[16];
   SPictReoderingStatus    m_sReoderingStatus;
+  PWelsDecoderThreadCTX   m_pDecThrCtxActive[WELS_DEC_MAX_NUM_CPU];
   SVlcTable               m_sVlcTable;
   SWelsLastDecPicInfo     m_sLastDecPicInfo;
   SDecoderStatistics      m_sDecoderStatistics;// For real time debugging
 
+ private:
   int32_t InitDecoder (const SDecodingParam* pParam);
   void UninitDecoder (void);
-  int32_t ResetDecoder();
+  int32_t InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam);
+  void UninitDecoderCtx (PWelsDecoderContext& pCtx);
+  int32_t ResetDecoder (PWelsDecoderContext& pCtx);
+  int32_t ThreadResetDecoder (PWelsDecoderContext& pCtx);
 
   void OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics);
-  DECODING_STATE ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo);
+  DECODING_STATE ReorderPicturesInDisplay (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
+  int ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst,
+                                 SBufferInfo* pDstInfo);
+  void BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
+  void ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
 
+  void OpenDecoderThreads();
+  void CloseDecoderThreads();
 #ifdef OUTPUT_BIT_STREAM
   WelsFileHandle* m_pFBS;
   WelsFileHandle* m_pFBSSize;
--- a/codec/decoder/plus/src/welsDecoderExt.cpp
+++ b/codec/decoder/plus/src/welsDecoderExt.cpp
@@ -51,6 +51,7 @@
 //#include "macros.h"
 #include "decoder.h"
 #include "decoder_core.h"
+#include "manage_dec_ref.h"
 #include "error_concealment.h"
 
 #include "measure_time.h"
@@ -67,12 +68,11 @@
 #include <stdio.h>
 #include <stdarg.h>
 #include <sys/types.h>
+#include <malloc.h>
 #else
 #include <sys/time.h>
 #endif
 
-#define _PICTURE_REORDERING_ 1
-
 namespace WelsDec {
 
 //////////////////////////////////////////////////////////////////////
@@ -88,9 +88,58 @@
 *
 *   return: none
 ***************************************************************************/
+DECLARE_PROCTHREAD (pThrProcInit, p) {
+  SWelsDecThreadInfo* sThreadInfo = (SWelsDecThreadInfo*)p;
+#if defined(WIN32)
+  _alloca (WELS_DEC_MAX_THREAD_STACK_SIZE * (sThreadInfo->uiThrNum + 1));
+#endif
+  return sThreadInfo->pThrProcMain (p);
+}
+
+static DECODING_STATE  ConstructAccessUnit (CWelsDecoder* pWelsDecoder, PWelsDecoderThreadCTX pThrCtx) {
+  int iRet = dsErrorFree;
+  //WelsMutexLock (&pWelsDecoder->m_csDecoder);
+  if (pThrCtx->pCtx->pLastThreadCtx != NULL) {
+    PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pThrCtx->pCtx->pLastThreadCtx);
+    WAIT_EVENT (&pLastThreadCtx->sSliceDecodeStart, WELS_DEC_THREAD_WAIT_INFINITE);
+    RESET_EVENT (&pLastThreadCtx->sSliceDecodeStart);
+  }
+  pThrCtx->pDec = NULL;
+  RESET_EVENT (&pThrCtx->sSliceDecodeFinsh);
+  iRet |= pWelsDecoder->DecodeFrame2WithCtx (pThrCtx->pCtx, NULL, 0, pThrCtx->ppDst, &pThrCtx->sDstInfo);
+
+  //WelsMutexUnlock (&pWelsDecoder->m_csDecoder);
+  return (DECODING_STATE)iRet;
+}
+
+DECLARE_PROCTHREAD (pThrProcFrame, p) {
+  SWelsDecoderThreadCTX* pThrCtx = (SWelsDecoderThreadCTX*)p;
+  while (1) {
+    RELEASE_SEMAPHORE (pThrCtx->sThreadInfo.sIsBusy);
+    RELEASE_SEMAPHORE (&pThrCtx->sThreadInfo.sIsIdle);
+    WAIT_SEMAPHORE (&pThrCtx->sThreadInfo.sIsActivated, WELS_DEC_THREAD_WAIT_INFINITE);
+    if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_RUN) {
+      CWelsDecoder* pWelsDecoder = (CWelsDecoder*)pThrCtx->threadCtxOwner;
+      ConstructAccessUnit (pWelsDecoder, pThrCtx);
+    } else if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_ABORT) {
+      break;
+    }
+  }
+  return 0;
+}
+
 CWelsDecoder::CWelsDecoder (void)
-  : m_pDecContext (NULL),
-    m_pWelsTrace (NULL) {
+  : m_pWelsTrace (NULL),
+    m_uiDecodeTimeStamp (0),
+    m_bIsBaseline (false),
+    m_iCpuCount (1),
+    m_iThreadCount (1),
+    m_pPicBuff (NULL),
+    m_bParamSetsLostFlag (false),
+    m_bFreezeOutput (false),
+    m_DecCtxActiveCount (0),
+    m_pDecThrCtx (NULL),
+    m_pLastDecThrCtx (NULL) {
 #ifdef OUTPUT_BIT_STREAM
   char chFileName[1024] = { 0 };  //for .264
   int iBufUsed = 0;
@@ -114,6 +163,15 @@
 
   ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true);
 
+  m_iCpuCount = GetCPUCount();
+  if (m_iCpuCount > WELS_DEC_MAX_NUM_CPU) {
+    m_iCpuCount = WELS_DEC_MAX_NUM_CPU;
+  }
+  m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iThreadCount];
+  memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iThreadCount);
+  for (int32_t i = 0; i < WELS_DEC_MAX_NUM_CPU; ++i) {
+    m_pDecThrCtxActive[i] = NULL;
+  }
 #ifdef OUTPUT_BIT_STREAM
   SWelsTime sCurTime;
 
@@ -169,7 +227,7 @@
   if (m_pWelsTrace != NULL) {
     WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::~CWelsDecoder()");
   }
-
+  CloseDecoderThreads();
   UninitDecoder();
 
 #ifdef OUTPUT_BIT_STREAM
@@ -187,6 +245,10 @@
     delete m_pWelsTrace;
     m_pWelsTrace = NULL;
   }
+  if (m_pDecThrCtx != NULL) {
+    delete[] m_pDecThrCtx;
+    m_pDecThrCtx = NULL;
+  }
 }
 
 long CWelsDecoder::Initialize (const SDecodingParam* pParam) {
@@ -215,26 +277,87 @@
 }
 
 void CWelsDecoder::UninitDecoder (void) {
-  if (NULL == m_pDecContext)
-    return;
+  for (int32_t i = 0; i < m_iThreadCount; ++i) {
+    if (m_pDecThrCtx[i].pCtx != NULL) {
+      if (i > 0) {
+        WelsResetRefPicWithoutUnRef (m_pDecThrCtx[i].pCtx);
+      }
+      UninitDecoderCtx (m_pDecThrCtx[i].pCtx);
+    }
+  }
+}
 
-  WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoder(), openh264 codec version = %s.",
-           VERSION_NUMBER);
+void CWelsDecoder::OpenDecoderThreads() {
+  if (m_iThreadCount > 1) {
+    m_uiDecodeTimeStamp = 0;
+    CREATE_SEMAPHORE (&m_sIsBusy, m_iThreadCount, m_iThreadCount, NULL);
+    WelsMutexInit (&m_csDecoder);
+    CREATE_EVENT (&m_sBufferingEvent, 1, 0, NULL);
+    SET_EVENT (&m_sBufferingEvent);
+    CREATE_EVENT (&m_sReleaseBufferEvent, 1, 0, NULL);
+    SET_EVENT (&m_sReleaseBufferEvent);
+    for (int32_t i = 0; i < m_iThreadCount; ++i) {
+      m_pDecThrCtx[i].sThreadInfo.uiThrMaxNum = m_iThreadCount;
+      m_pDecThrCtx[i].sThreadInfo.uiThrNum = i;
+      m_pDecThrCtx[i].sThreadInfo.uiThrStackSize = WELS_DEC_MAX_THREAD_STACK_SIZE;
+      m_pDecThrCtx[i].sThreadInfo.pThrProcMain = pThrProcFrame;
+      m_pDecThrCtx[i].sThreadInfo.sIsBusy = &m_sIsBusy;
+      m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN;
+      m_pDecThrCtx[i].threadCtxOwner = this;
+      m_pDecThrCtx[i].kpSrc = NULL;
+      m_pDecThrCtx[i].kiSrcLen = 0;
+      m_pDecThrCtx[i].ppDst = NULL;
+      m_pDecThrCtx[i].pDec = NULL;
+      CREATE_EVENT (&m_pDecThrCtx[i].sImageReady, 1, 0, NULL);
+      CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart, 1, 0, NULL);
+      CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinsh, 1, 0, NULL);
+      CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, 0, 1, NULL);
+      CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated, 0, 1, NULL);
+      CREATE_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle, pThrProcInit, (void*) (& (m_pDecThrCtx[i])));
+    }
+  }
+}
+void CWelsDecoder::CloseDecoderThreads() {
+  if (m_iThreadCount > 1) {
+    for (int32_t i = 0; i < m_iThreadCount; i++) { //waiting the completion begun slices
+      WAIT_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+      m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_ABORT;
+      RELEASE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated);
+      WAIT_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle);
+      CLOSE_EVENT (&m_pDecThrCtx[i].sImageReady);
+      CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart);
+      CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinsh);
+      CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle);
+      CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated);
+    }
+    WelsMutexDestroy (&m_csDecoder);
+    CLOSE_EVENT (&m_sBufferingEvent);
+    CLOSE_EVENT (&m_sReleaseBufferEvent);
+    CLOSE_SEMAPHORE (&m_sIsBusy);
+  }
+}
 
-  WelsEndDecoder (m_pDecContext);
+void CWelsDecoder::UninitDecoderCtx (PWelsDecoderContext& pCtx) {
+  if (pCtx != NULL) {
 
-  if (m_pDecContext->pMemAlign != NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-             "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..",
-             m_pDecContext->pMemAlign->WelsGetMemoryUsage());
-    delete m_pDecContext->pMemAlign;
-    m_pDecContext->pMemAlign = NULL;
-  }
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoderCtx(), openh264 codec version = %s.",
+             VERSION_NUMBER);
 
-  if (NULL != m_pDecContext) {
-    WelsFree (m_pDecContext, "m_pDecContext");
+    WelsEndDecoder (pCtx);
 
-    m_pDecContext = NULL;
+    if (pCtx->pMemAlign != NULL) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+               "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..",
+               pCtx->pMemAlign->WelsGetMemoryUsage());
+      delete pCtx->pMemAlign;
+      pCtx->pMemAlign = NULL;
+    }
+
+    if (NULL != pCtx) {
+      WelsFree (pCtx, "m_pDecContext");
+
+      pCtx = NULL;
+    }
   }
 }
 
@@ -244,59 +367,100 @@
   WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
            "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
            VERSION_NUMBER, (int32_t)pParam->bParseOnly);
-
+  if (m_iThreadCount > 1 && pParam->bParseOnly) {
+    m_iThreadCount = 1;
+  }
+  OpenDecoderThreads();
   //reset decoder context
   memset (&m_sDecoderStatistics, 0, sizeof (SDecoderStatistics));
   memset (&m_sLastDecPicInfo, 0, sizeof (SWelsLastDecPicInfo));
   memset (&m_sVlcTable, 0, sizeof (SVlcTable));
+  UninitDecoder();
   WelsDecoderLastDecPicInfoDefaults (m_sLastDecPicInfo);
-  if (m_pDecContext) //free
-    UninitDecoder();
-  m_pDecContext = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext");
-  if (NULL == m_pDecContext)
+  for (int32_t i = 0; i < m_iThreadCount; ++i) {
+    InitDecoderCtx (m_pDecThrCtx[i].pCtx, pParam);
+    if (m_iThreadCount > 1) {
+      m_pDecThrCtx[i].pCtx->pThreadCtx = &m_pDecThrCtx[i];
+    }
+  }
+  m_bParamSetsLostFlag = false;
+  m_bFreezeOutput = false;
+  return cmResultSuccess;
+}
+
+// the return value of this function is not suitable, it need report failure info to upper layer.
+int32_t CWelsDecoder::InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam) {
+
+  WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+           "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
+           VERSION_NUMBER, (int32_t)pParam->bParseOnly);
+
+  //reset decoder context
+  UninitDecoderCtx (pCtx);
+  pCtx = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext");
+  if (NULL == pCtx)
     return cmMallocMemeError;
   int32_t iCacheLineSize = 16;   // on chip cache line size in byte
-  m_pDecContext->pMemAlign = new CMemoryAlign (iCacheLineSize);
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pMemAlign), UninitDecoder())
+  pCtx->pMemAlign = new CMemoryAlign (iCacheLineSize);
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pMemAlign), UninitDecoderCtx (pCtx))
 
   //fill in default value into context
-  m_pDecContext->pLastDecPicInfo = &m_sLastDecPicInfo;
-  m_pDecContext->pDecoderStatistics = &m_sDecoderStatistics;
-  m_pDecContext->pVlcTable = &m_sVlcTable;
-  m_pDecContext->pPictInfoList = m_sPictInfoList;
-  m_pDecContext->pPictReoderingStatus = &m_sReoderingStatus;
-  WelsDecoderDefaults (m_pDecContext, &m_pWelsTrace->m_sLogCtx);
-  WelsDecoderSpsPpsDefaults (m_pDecContext->sSpsPpsCtx);
-
+  pCtx->pLastDecPicInfo = &m_sLastDecPicInfo;
+  pCtx->pDecoderStatistics = &m_sDecoderStatistics;
+  pCtx->pVlcTable = &m_sVlcTable;
+  pCtx->pPictInfoList = m_sPictInfoList;
+  pCtx->pPictReoderingStatus = &m_sReoderingStatus;
+  pCtx->pCsDecoder = &m_csDecoder;
+  WelsDecoderDefaults (pCtx, &m_pWelsTrace->m_sLogCtx);
+  WelsDecoderSpsPpsDefaults (pCtx->sSpsPpsCtx);
   //check param and update decoder context
-  m_pDecContext->pParam = (SDecodingParam*)m_pDecContext->pMemAlign->WelsMallocz (sizeof (SDecodingParam),
-                          "SDecodingParam");
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pParam), UninitDecoder());
-  int32_t iRet = DecoderConfigParam (m_pDecContext, pParam);
+  pCtx->pParam = (SDecodingParam*)pCtx->pMemAlign->WelsMallocz (sizeof (SDecodingParam),
+                 "SDecodingParam");
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pParam), UninitDecoderCtx (pCtx));
+  int32_t iRet = DecoderConfigParam (pCtx, pParam);
   WELS_VERIFY_RETURN_IFNEQ (iRet, cmResultSuccess);
 
   //init decoder
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (m_pDecContext, &m_pWelsTrace->m_sLogCtx),
-                              UninitDecoder())
-
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (pCtx, &m_pWelsTrace->m_sLogCtx),
+                              UninitDecoderCtx (pCtx))
+  pCtx->pPicBuff = NULL;
   return cmResultSuccess;
 }
 
-int32_t CWelsDecoder::ResetDecoder() {
+int32_t CWelsDecoder::ResetDecoder (PWelsDecoderContext& pCtx) {
   // TBC: need to be modified when context and trace point are null
-  if (m_pDecContext != NULL && m_pWelsTrace != NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d",
-             m_pDecContext->iErrorCode);
-    SDecodingParam sPrevParam;
-    memcpy (&sPrevParam, m_pDecContext->pParam, sizeof (SDecodingParam));
+  if (m_iThreadCount > 1) {
+    ThreadResetDecoder (pCtx);
+  } else {
+    if (pCtx != NULL && m_pWelsTrace != NULL) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d",
+               pCtx->iErrorCode);
+      SDecodingParam sPrevParam;
+      memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam));
 
-    WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoder (&sPrevParam), UninitDecoder());
+      WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoderCtx (pCtx, &sPrevParam),
+                                  UninitDecoderCtx (pCtx));
+    } else if (m_pWelsTrace != NULL) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null");
+    }
+    ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, false);
+  }
+  return ERR_INFO_UNINIT;
+}
+
+int32_t CWelsDecoder::ThreadResetDecoder (PWelsDecoderContext& pCtx) {
+  // TBC: need to be modified when context and trace point are null
+  SDecodingParam sPrevParam;
+  if (pCtx != NULL && m_pWelsTrace != NULL) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d", pCtx->iErrorCode);
+    memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam));
+    ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true);
+    CloseDecoderThreads();
+    UninitDecoder();
+    InitDecoder (&sPrevParam);
   } else if (m_pWelsTrace != NULL) {
     WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null");
   }
-#ifdef _PICTURE_REORDERING_
-  ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, false);
-#endif
   return ERR_INFO_UNINIT;
 }
 
@@ -305,71 +469,95 @@
  */
 long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) {
   int iVal = 0;
+  if (eOptID == DECODER_OPTION_NUM_OF_THREADS) {
+    if (pOption != NULL) {
+      int32_t threadCount = * ((int32_t*)pOption);
+      if (threadCount <= 0) {
+        threadCount = 1;
+      } else if (threadCount > m_iCpuCount) {
+        threadCount = m_iCpuCount;
+      }
+      if (threadCount > 3) {
+        threadCount = 3;
+      }
+      if (threadCount != m_iThreadCount) {
+        m_iThreadCount = threadCount;
+        if (m_pDecThrCtx != NULL) {
+          delete [] m_pDecThrCtx;
+          m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iThreadCount];
+          memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iThreadCount);
+        }
+      }
+    }
+    return cmResultSuccess;
+  }
+  for (int32_t i = 0; i < m_iThreadCount; ++i) {
+    PWelsDecoderContext pDecContext = m_pDecThrCtx[i].pCtx;
+    if (pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
+        eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
+      return dsInitialOptExpected;
+    if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded
+      if (pOption == NULL)
+        return cmInitParaError;
 
-  if (m_pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
-      eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
-    return dsInitialOptExpected;
-  if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded
-    if (pOption == NULL)
-      return cmInitParaError;
+      iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
 
-    iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
+      pDecContext->bEndOfStreamFlag = iVal ? true : false;
 
-    m_pDecContext->bEndOfStreamFlag = iVal ? true : false;
+      return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status
+      if (pOption == NULL)
+        return cmInitParaError;
 
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status
-    if (pOption == NULL)
-      return cmInitParaError;
+      iVal = * ((int*)pOption); // int value for error concealment idc
+      iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
+      if ((pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) {
+        WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+                 "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal);
+        return cmInitParaError;
+      }
 
-    iVal = * ((int*)pOption); // int value for error concealment idc
-    iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
-    if ((m_pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) {
+      pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal;
+      InitErrorCon (pDecContext);
       WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-               "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal);
-      return cmInitParaError;
-    }
+               "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal);
 
-    m_pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal;
-    InitErrorCon (m_pDecContext);
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-             "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal);
-
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) {
-    if (m_pWelsTrace) {
-      uint32_t level = * ((uint32_t*)pOption);
-      m_pWelsTrace->SetTraceLevel (level);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) {
-    if (m_pWelsTrace) {
-      WelsTraceCallback callback = * ((WelsTraceCallback*)pOption);
-      m_pWelsTrace->SetTraceCallback (callback);
-      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-               "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.",
-               callback);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) {
-    if (m_pWelsTrace) {
-      void* ctx = * ((void**)pOption);
-      m_pWelsTrace->SetTraceCallbackContext (ctx);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_GET_STATISTICS) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
-             "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!");
-    return cmInitParaError;
-  } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
-    if (pOption) {
-      m_pDecContext->pDecoderStatistics->iStatisticsLogInterval = (* ((unsigned int*)pOption));
       return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) {
+      if (m_pWelsTrace) {
+        uint32_t level = * ((uint32_t*)pOption);
+        m_pWelsTrace->SetTraceLevel (level);
+      }
+      return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) {
+      if (m_pWelsTrace) {
+        WelsTraceCallback callback = * ((WelsTraceCallback*)pOption);
+        m_pWelsTrace->SetTraceCallback (callback);
+        WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+                 "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.",
+                 callback);
+      }
+      return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) {
+      if (m_pWelsTrace) {
+        void* ctx = * ((void**)pOption);
+        m_pWelsTrace->SetTraceCallbackContext (ctx);
+      }
+      return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_GET_STATISTICS) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
+               "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!");
+      return cmInitParaError;
+    } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
+      if (pOption) {
+        pDecContext->pDecoderStatistics->iStatisticsLogInterval = (* ((unsigned int*)pOption));
+        return cmResultSuccess;
+      }
+    } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
+               "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!");
+      return cmInitParaError;
     }
-  } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
-             "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!");
-    return cmInitParaError;
   }
   return cmInitParaError;
 }
@@ -379,8 +567,12 @@
  */
 long CWelsDecoder::GetOption (DECODER_OPTION eOptID, void* pOption) {
   int iVal = 0;
-
-  if (m_pDecContext == NULL)
+  if (DECODER_OPTION_NUM_OF_THREADS == eOptID) {
+    * ((int*)pOption) = m_iThreadCount;
+    return cmResultSuccess;
+  }
+  PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+  if (pDecContext == NULL)
     return cmInitExpected;
 
   if (pOption == NULL)
@@ -387,63 +579,63 @@
     return cmInitParaError;
 
   if (DECODER_OPTION_END_OF_STREAM == eOptID) {
-    iVal = m_pDecContext->bEndOfStreamFlag;
+    iVal = pDecContext->bEndOfStreamFlag;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   }
 #ifdef LONG_TERM_REF
   else if (DECODER_OPTION_IDR_PIC_ID == eOptID) {
-    iVal = m_pDecContext->uiCurIdrPicId;
+    iVal = pDecContext->uiCurIdrPicId;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_FRAME_NUM == eOptID) {
-    iVal = m_pDecContext->iFrameNum;
+    iVal = pDecContext->iFrameNum;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_LTR_MARKING_FLAG == eOptID) {
-    iVal = m_pDecContext->bCurAuContainLtrMarkSeFlag;
+    iVal = pDecContext->bCurAuContainLtrMarkSeFlag;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_LTR_MARKED_FRAME_NUM == eOptID) {
-    iVal = m_pDecContext->iFrameNumOfAuMarkedLtr;
+    iVal = pDecContext->iFrameNumOfAuMarkedLtr;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   }
 #endif
   else if (DECODER_OPTION_VCL_NAL == eOptID) { //feedback whether or not have VCL NAL in current AU
-    iVal = m_pDecContext->iFeedbackVclNalInAu;
+    iVal = pDecContext->iFeedbackVclNalInAu;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_TEMPORAL_ID == eOptID) { //if have VCL NAL in current AU, then feedback the temporal ID
-    iVal = m_pDecContext->iFeedbackTidInAu;
+    iVal = pDecContext->iFeedbackTidInAu;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_IS_REF_PIC == eOptID) {
-    iVal = m_pDecContext->iFeedbackNalRefIdc;
+    iVal = pDecContext->iFeedbackNalRefIdc;
     if (iVal > 0)
       iVal = 1;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_ERROR_CON_IDC == eOptID) {
-    iVal = (int)m_pDecContext->pParam->eEcActiveIdc;
+    iVal = (int)pDecContext->pParam->eEcActiveIdc;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_GET_STATISTICS == eOptID) { // get decoder statistics info for real time debugging
     SDecoderStatistics* pDecoderStatistics = (static_cast<SDecoderStatistics*> (pOption));
 
-    memcpy (pDecoderStatistics, m_pDecContext->pDecoderStatistics, sizeof (SDecoderStatistics));
+    memcpy (pDecoderStatistics, pDecContext->pDecoderStatistics, sizeof (SDecoderStatistics));
 
-    if (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount != 0) { //not original status
-      pDecoderStatistics->fAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
-          (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount);
-      pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
-          (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount + m_pDecContext->pDecoderStatistics->uiFreezingIDRNum +
-           m_pDecContext->pDecoderStatistics->uiFreezingNonIDRNum);
+    if (pDecContext->pDecoderStatistics->uiDecodedFrameCount != 0) { //not original status
+      pDecoderStatistics->fAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) /
+          (pDecContext->pDecoderStatistics->uiDecodedFrameCount);
+      pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) /
+          (pDecContext->pDecoderStatistics->uiDecodedFrameCount + pDecContext->pDecoderStatistics->uiFreezingIDRNum +
+           pDecContext->pDecoderStatistics->uiFreezingNonIDRNum);
     }
     return cmResultSuccess;
   } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
     if (pOption) {
-      iVal = m_pDecContext->pDecoderStatistics->iStatisticsLogInterval;
+      iVal = pDecContext->pDecoderStatistics->iStatisticsLogInterval;
       * ((unsigned int*)pOption) = iVal;
       return cmResultSuccess;
     }
@@ -450,34 +642,34 @@
   } else if (DECODER_OPTION_GET_SAR_INFO == eOptID) { //get decoder SAR info in VUI
     PVuiSarInfo pVuiSarInfo = (static_cast<PVuiSarInfo> (pOption));
     memset (pVuiSarInfo, 0, sizeof (SVuiSarInfo));
-    if (!m_pDecContext->pSps) {
+    if (!pDecContext->pSps) {
       return cmInitExpected;
     } else {
-      pVuiSarInfo->uiSarWidth = m_pDecContext->pSps->sVui.uiSarWidth;
-      pVuiSarInfo->uiSarHeight = m_pDecContext->pSps->sVui.uiSarHeight;
-      pVuiSarInfo->bOverscanAppropriateFlag = m_pDecContext->pSps->sVui.bOverscanAppropriateFlag;
+      pVuiSarInfo->uiSarWidth = pDecContext->pSps->sVui.uiSarWidth;
+      pVuiSarInfo->uiSarHeight = pDecContext->pSps->sVui.uiSarHeight;
+      pVuiSarInfo->bOverscanAppropriateFlag = pDecContext->pSps->sVui.bOverscanAppropriateFlag;
       return cmResultSuccess;
     }
   } else if (DECODER_OPTION_PROFILE == eOptID) {
-    if (!m_pDecContext->pSps) {
+    if (!pDecContext->pSps) {
       return cmInitExpected;
     }
-    iVal = (int)m_pDecContext->pSps->uiProfileIdc;
+    iVal = (int)pDecContext->pSps->uiProfileIdc;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_LEVEL == eOptID) {
-    if (!m_pDecContext->pSps) {
+    if (!pDecContext->pSps) {
       return cmInitExpected;
     }
-    iVal = (int)m_pDecContext->pSps->uiLevelIdc;
+    iVal = (int)pDecContext->pSps->uiLevelIdc;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER == eOptID) {
-    if (m_pDecContext->pSps && m_pDecContext->pSps->uiProfileIdc != 66) {
-      * ((int*)pOption) = m_sReoderingStatus.iNumOfPicts > 0 ? m_sReoderingStatus.iNumOfPicts : 0;
-    } else {
-      * ((int*)pOption) = 0;
+    for (int32_t activeThread = 0; activeThread < m_DecCtxActiveCount; ++activeThread) {
+      WAIT_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+      RELEASE_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle);
     }
+    * ((int*)pOption) = m_sReoderingStatus.iNumOfPicts;
     return cmResultSuccess;
   }
 
@@ -488,7 +680,17 @@
     const int kiSrcLen,
     unsigned char** ppDst,
     SBufferInfo* pDstInfo) {
-  int iRet;
+  int iRet = dsErrorFree;
+  if (m_iThreadCount > 1) {
+    iRet = ThreadDecodeFrameInternal (kpSrc, kiSrcLen, ppDst, pDstInfo);
+    if (m_sReoderingStatus.iNumOfPicts) {
+      WAIT_EVENT (&m_sBufferingEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+      RESET_EVENT (&m_sReleaseBufferEvent);
+      ReleaseBufferedReadyPicture (NULL, ppDst, pDstInfo);
+      SET_EVENT (&m_sReleaseBufferEvent);
+    }
+    return (DECODING_STATE)iRet;
+  }
   //SBufferInfo sTmpBufferInfo;
   //unsigned char* ppTmpDst[3] = {NULL, NULL, NULL};
   iRet = (int)DecodeFrame2 (kpSrc, kiSrcLen, ppDst, pDstInfo);
@@ -506,11 +708,11 @@
   return (DECODING_STATE)iRet;
 }
 
-DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
+DECODING_STATE CWelsDecoder::DecodeFrame2WithCtx (PWelsDecoderContext pDecContext, const unsigned char* kpSrc,
     const int kiSrcLen,
     unsigned char** ppDst,
     SBufferInfo* pDstInfo) {
-  if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
+  if (pDecContext == NULL || pDecContext->pParam == NULL) {
     if (m_pWelsTrace != NULL) {
       WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeFrame2 without Initialize.\n");
     }
@@ -517,13 +719,13 @@
     return dsInitialOptExpected;
   }
 
-  if (m_pDecContext->pParam->bParseOnly) {
+  if (pDecContext->pParam->bParseOnly) {
     WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be false for this API calling! \n");
-    m_pDecContext->iErrorCode |= dsInvalidArgument;
+    pDecContext->iErrorCode |= dsInvalidArgument;
     return dsInvalidArgument;
   }
-  if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
-    if (ResetDecoder())
+  if (CheckBsBuffer (pDecContext, kiSrcLen)) {
+    if (ResetDecoder (pDecContext))
       return dsOutOfMemory;
 
     return dsErrorFree;
@@ -539,147 +741,175 @@
       WelsFflush (m_pFBSSize);
     }
 #endif//OUTPUT_BIT_STREAM
-    m_pDecContext->bEndOfStreamFlag = false;
+    pDecContext->bEndOfStreamFlag = false;
   } else {
     //For application MODE, the error detection should be added for safe.
     //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
-    m_pDecContext->bEndOfStreamFlag = true;
-    m_pDecContext->bInstantDecFlag = true;
+    pDecContext->bEndOfStreamFlag = true;
+    pDecContext->bInstantDecFlag = true;
   }
 
   int64_t iStart, iEnd;
   iStart = WelsTime();
 
-  ppDst[0] = ppDst[1] = ppDst[2] = NULL;
-  m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
-  m_pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
+  if (pDecContext->pThreadCtx == NULL) {
+    ppDst[0] = ppDst[1] = ppDst[2] = NULL;
+  }
+  pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
+  pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
   unsigned long long uiInBsTimeStamp = pDstInfo->uiInBsTimeStamp;
-  memset (pDstInfo, 0, sizeof (SBufferInfo));
+  if (pDecContext->pThreadCtx == NULL) {
+    memset (pDstInfo, 0, sizeof (SBufferInfo));
+  }
   pDstInfo->uiInBsTimeStamp = uiInBsTimeStamp;
 #ifdef LONG_TERM_REF
-  m_pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR
-  m_pDecContext->bCurAuContainLtrMarkSeFlag = false;
-  m_pDecContext->iFrameNumOfAuMarkedLtr = 0;
-  m_pDecContext->iFrameNum = -1; //initialize
+  pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR
+  pDecContext->bCurAuContainLtrMarkSeFlag = false;
+  pDecContext->iFrameNumOfAuMarkedLtr = 0;
+  pDecContext->iFrameNum = -1; //initialize
 #endif
 
-  m_pDecContext->iFeedbackTidInAu = -1; //initialize
-  m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
+  pDecContext->iFeedbackTidInAu = -1; //initialize
+  pDecContext->iFeedbackNalRefIdc = -1; //initialize
   if (pDstInfo) {
     pDstInfo->uiOutYuvTimeStamp = 0;
-    m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
+    pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
   } else {
-    m_pDecContext->uiTimeStamp = 0;
+    pDecContext->uiTimeStamp = 0;
   }
-  WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, ppDst,
+  WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, ppDst,
                 pDstInfo, NULL); //iErrorCode has been modified in this function
-  m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
-  if (m_pDecContext->iErrorCode) {
+  pDecContext->bInstantDecFlag = false; //reset no-delay flag
+  if (pDecContext->iErrorCode) {
     EWelsNalUnitType eNalType =
       NAL_UNIT_UNSPEC_0; //for NBR, IDR frames are expected to decode as followed if error decoding an IDR currently
 
-    eNalType = m_pDecContext->sCurNalHead.eNalUnitType;
-    if (m_pDecContext->iErrorCode & dsOutOfMemory) {
-      if (ResetDecoder()) {
+    eNalType = pDecContext->sCurNalHead.eNalUnitType;
+    if (pDecContext->iErrorCode & dsOutOfMemory) {
+      if (ResetDecoder (pDecContext)) {
         return dsOutOfMemory;
       }
       return dsErrorFree;
     }
-    if (m_pDecContext->iErrorCode & dsRefListNullPtrs) {
-      if (ResetDecoder()) {
+    if (pDecContext->iErrorCode & dsRefListNullPtrs) {
+      if (ResetDecoder (pDecContext)) {
         return dsRefListNullPtrs;
       }
       return dsErrorFree;
     }
-    if ((m_pDecContext->iErrorCode & (dsBitstreamError | dsDataErrorConcealed)) && m_pDecContext->eSliceType == B_SLICE) {
-      if (ResetDecoder()) {
+    if ((pDecContext->iErrorCode & (dsBitstreamError | dsDataErrorConcealed)) && pDecContext->eSliceType == B_SLICE) {
+      if (ResetDecoder (pDecContext)) {
         pDstInfo->iBufferStatus = 0;
-        return (DECODING_STATE)m_pDecContext->iErrorCode;
+        return (DECODING_STATE)pDecContext->iErrorCode;
       }
       return dsErrorFree;
     }
     //for AVC bitstream (excluding AVC with temporal scalability, including TP), as long as error occur, SHOULD notify upper layer key frame loss.
     if ((IS_PARAM_SETS_NALS (eNalType) || NAL_UNIT_CODED_SLICE_IDR == eNalType) ||
-        (VIDEO_BITSTREAM_AVC == m_pDecContext->eVideoType)) {
-      if (m_pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+        (VIDEO_BITSTREAM_AVC == pDecContext->eVideoType)) {
+      if (pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
 #ifdef LONG_TERM_REF
-        m_pDecContext->bParamSetsLostFlag = true;
+        pDecContext->bParamSetsLostFlag = true;
 #else
-        m_pDecContext->bReferenceLostAtT0Flag = true;
+        pDecContext->bReferenceLostAtT0Flag = true;
 #endif
       }
     }
 
-    if (m_pDecContext->bPrintFrameErrorTraceFlag) {
+    if (pDecContext->bPrintFrameErrorTraceFlag) {
       WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n",
-               m_pDecContext->iErrorCode);
-      m_pDecContext->bPrintFrameErrorTraceFlag = false;
+               pDecContext->iErrorCode);
+      pDecContext->bPrintFrameErrorTraceFlag = false;
     } else {
-      m_pDecContext->iIgnoredErrorInfoPacketCount++;
-      if (m_pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) {
+      pDecContext->iIgnoredErrorInfoPacketCount++;
+      if (pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) {
         WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, "continuous error reached INT_MAX! Restart as 0.");
-        m_pDecContext->iIgnoredErrorInfoPacketCount = 0;
+        pDecContext->iIgnoredErrorInfoPacketCount = 0;
       }
     }
-    if ((m_pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) {
+    if ((pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) {
       //TODO after dec status updated
-      m_pDecContext->iErrorCode |= dsDataErrorConcealed;
+      pDecContext->iErrorCode |= dsDataErrorConcealed;
 
-      m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
-      if (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-        ResetDecStatNums (m_pDecContext->pDecoderStatistics);
-        m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+      pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+      if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+        ResetDecStatNums (pDecContext->pDecoderStatistics);
+        pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
       }
-      int32_t iMbConcealedNum = m_pDecContext->iMbEcedNum + m_pDecContext->iMbEcedPropNum;
-      m_pDecContext->pDecoderStatistics->uiAvgEcRatio = m_pDecContext->iMbNum == 0 ?
-          (m_pDecContext->pDecoderStatistics->uiAvgEcRatio * m_pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
-                m_pDecContext->pDecoderStatistics->uiAvgEcRatio * m_pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
-                      iMbConcealedNum * 100) / m_pDecContext->iMbNum));
-      m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio = m_pDecContext->iMbNum == 0 ?
-          (m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio * m_pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
-                m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio * m_pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
-                      m_pDecContext->iMbEcedPropNum * 100) / m_pDecContext->iMbNum));
-      m_pDecContext->pDecoderStatistics->uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1);
-      m_pDecContext->pDecoderStatistics->uiAvgEcRatio = m_pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
-          m_pDecContext->pDecoderStatistics->uiAvgEcRatio / m_pDecContext->pDecoderStatistics->uiEcFrameNum;
-      m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio = m_pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
-          m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio / m_pDecContext->pDecoderStatistics->uiEcFrameNum;
+      int32_t iMbConcealedNum = pDecContext->iMbEcedNum + pDecContext->iMbEcedPropNum;
+      pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->iMbNum == 0 ?
+          (pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
+                pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
+                      iMbConcealedNum * 100) / pDecContext->iMbNum));
+      pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->iMbNum == 0 ?
+          (pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
+                pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
+                      pDecContext->iMbEcedPropNum * 100) / pDecContext->iMbNum));
+      pDecContext->pDecoderStatistics->uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1);
+      pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
+          pDecContext->pDecoderStatistics->uiAvgEcRatio / pDecContext->pDecoderStatistics->uiEcFrameNum;
+      pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
+          pDecContext->pDecoderStatistics->uiAvgEcPropRatio / pDecContext->pDecoderStatistics->uiEcFrameNum;
     }
     iEnd = WelsTime();
-    m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+    pDecContext->dDecTime += (iEnd - iStart) / 1e3;
 
-    OutputStatisticsLog (*m_pDecContext->pDecoderStatistics);
+    OutputStatisticsLog (*pDecContext->pDecoderStatistics);
 
-#ifdef  _PICTURE_REORDERING_
-    ReorderPicturesInDisplay (ppDst, pDstInfo);
-#endif
+    if (pDecContext->pThreadCtx != NULL) {
+      WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+      RESET_EVENT (&m_sBufferingEvent);
+      BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+      SET_EVENT (&m_sBufferingEvent);
+    } else {
+      ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo);
+    }
 
-    return (DECODING_STATE)m_pDecContext->iErrorCode;
+    return (DECODING_STATE)pDecContext->iErrorCode;
   }
   // else Error free, the current codec works well
 
   if (pDstInfo->iBufferStatus == 1) {
 
-    m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
-    if (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-      ResetDecStatNums (m_pDecContext->pDecoderStatistics);
-      m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+    pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+    if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+      ResetDecStatNums (pDecContext->pDecoderStatistics);
+      pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
     }
 
-    OutputStatisticsLog (*m_pDecContext->pDecoderStatistics);
+    OutputStatisticsLog (*pDecContext->pDecoderStatistics);
   }
   iEnd = WelsTime();
-  m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+  pDecContext->dDecTime += (iEnd - iStart) / 1e3;
 
-#ifdef  _PICTURE_REORDERING_
-  ReorderPicturesInDisplay (ppDst, pDstInfo);
-#endif
+  if (pDecContext->pThreadCtx != NULL) {
+    WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+    RESET_EVENT (&m_sBufferingEvent);
+    BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+    SET_EVENT (&m_sBufferingEvent);
+  } else {
+    ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo);
+  }
   return dsErrorFree;
 }
 
+DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
+    const int kiSrcLen,
+    unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+  return DecodeFrame2WithCtx (pDecContext, kpSrc, kiSrcLen, ppDst, pDstInfo);
+}
+
 DECODING_STATE CWelsDecoder::FlushFrame (unsigned char** ppDst,
     SBufferInfo* pDstInfo) {
-  if (m_pDecContext->bEndOfStreamFlag && m_sReoderingStatus.iNumOfPicts > 0) {
+  bool bEndOfStreamFlag = true;
+  for (int32_t j = 0; j < m_iThreadCount; ++j) {
+    if (!m_pDecThrCtx[j].pCtx->bEndOfStreamFlag) {
+      bEndOfStreamFlag = false;
+    }
+  }
+  if (bEndOfStreamFlag && m_sReoderingStatus.iNumOfPicts > 0) {
     m_sReoderingStatus.iMinPOC = IMinInt32;
     for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
       if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) {
@@ -704,12 +934,15 @@
     ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
     ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
     m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
-    if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < m_pDecContext->pPicBuff->iCapacity)
-      m_pDecContext->pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+    PPicBuff pPicBuff = m_iThreadCount == 1 ? m_pDecThrCtx[0].pCtx->pPicBuff : m_pPicBuff;
+    if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < pPicBuff->iCapacity) {
+      pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+    }
     m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
     m_sReoderingStatus.iMinPOC = IMinInt32;
     --m_sReoderingStatus.iNumOfPicts;
   }
+
   return dsErrorFree;
 }
 
@@ -758,24 +991,15 @@
   }
 }
 
-DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo) {
-  DECODING_STATE iRet = dsErrorFree;
+void CWelsDecoder::BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
   if (pDstInfo->iBufferStatus == 0) {
-    return iRet;
+    return;
   }
-  ++m_pDecContext->uiDecodingTimeStamp;
-  if (m_pDecContext->pSps->uiProfileIdc != 66 && m_pDecContext->pSps->uiProfileIdc != 83) {
-    /*if (m_pDecContext->pSliceHeader->iPicOrderCntLsb == 0) {
-      m_sReoderingStatus.iLastWrittenPOC = 0;
-      return dsErrorFree;
-    }
-    if (m_sReoderingStatus.iNumOfPicts == 0 && m_pDecContext->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin
-        && m_pDecContext->eSliceType != I_SLICE) {
-      m_sReoderingStatus.iLastWrittenPOC = m_pDecContext->pSliceHeader->iPicOrderCntLsb;
-      return dsErrorFree;
-    }*/
-    if (m_sReoderingStatus.iNumOfPicts && m_pDecContext->pLastDecPicInfo->pPreviousDecodedPictureInDpb
-        && m_pDecContext->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin) {
+  m_bIsBaseline = pCtx->pSps->uiProfileIdc == 66 || pCtx->pSps->uiProfileIdc == 83;
+  if (!m_bIsBaseline) {
+    if (m_sReoderingStatus.iNumOfPicts && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb
+        && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin) {
       m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts;
       for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
         if (m_sPictInfoList[i].iPOC > IMinInt32) {
@@ -787,7 +1011,7 @@
         //This can happen when decoder moves to next GOP without being able to decoder first picture PicOrderCntLsb = 0
         bool hasGOPChanged = false;
         for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
-          if (m_sPictInfoList[i].iPOC == m_pDecContext->pSliceHeader->iPicOrderCntLsb) {
+          if (m_sPictInfoList[i].iPOC == pCtx->pSliceHeader->iPicOrderCntLsb) {
             hasGOPChanged = true;
             break;
           }
@@ -802,105 +1026,153 @@
         }
       }
     }
-    for (int32_t i = 0; i < 16; ++i) {
-      if (m_sPictInfoList[i].iPOC == IMinInt32) {
-        memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo));
-        m_sPictInfoList[i].pData[0] = ppDst[0];
-        m_sPictInfoList[i].pData[1] = ppDst[1];
-        m_sPictInfoList[i].pData[2] = ppDst[2];
-        m_sPictInfoList[i].iPOC = m_pDecContext->pSliceHeader->iPicOrderCntLsb;
-        m_sPictInfoList[i].uiDecodingTimeStamp = m_pDecContext->uiDecodingTimeStamp;
-        m_sPictInfoList[i].iPicBuffIdx = m_pDecContext->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iPicBuffIdx;
-        m_pDecContext->pPicBuff->ppPic[m_sPictInfoList[i].iPicBuffIdx]->bAvailableFlag = false;
-        m_sPictInfoList[i].bLastGOP = false;
-        pDstInfo->iBufferStatus = 0;
-        ++m_sReoderingStatus.iNumOfPicts;
-        if (i > m_sReoderingStatus.iLargestBufferedPicIndex) {
-          m_sReoderingStatus.iLargestBufferedPicIndex = i;
-        }
-        break;
+  }
+  for (int32_t i = 0; i < 16; ++i) {
+    if (m_sPictInfoList[i].iPOC == IMinInt32) {
+      memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo));
+      m_sPictInfoList[i].pData[0] = ppDst[0];
+      m_sPictInfoList[i].pData[1] = ppDst[1];
+      m_sPictInfoList[i].pData[2] = ppDst[2];
+      m_sPictInfoList[i].iPOC = pCtx->pSliceHeader->iPicOrderCntLsb;
+      m_sPictInfoList[i].uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
+      m_sPictInfoList[i].iPicBuffIdx = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iPicBuffIdx;
+      pCtx->pPicBuff->ppPic[m_sPictInfoList[i].iPicBuffIdx]->bAvailableFlag = false;
+      m_sPictInfoList[i].bLastGOP = false;
+      pDstInfo->iBufferStatus = 0;
+      ++m_sReoderingStatus.iNumOfPicts;
+      if (i > m_sReoderingStatus.iLargestBufferedPicIndex) {
+        m_sReoderingStatus.iLargestBufferedPicIndex = i;
       }
+      break;
     }
-    if (m_sReoderingStatus.iLastGOPRemainPicts > 0) {
-      m_sReoderingStatus.iMinPOC = IMinInt32;
-      for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
-        if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].bLastGOP) {
-          m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
-          m_sReoderingStatus.iPictInfoIndex = i;
-        }
-        if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC
-            && m_sPictInfoList[i].bLastGOP) {
-          m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
-          m_sReoderingStatus.iPictInfoIndex = i;
-        }
+  }
+}
+
+void CWelsDecoder::ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  PPicBuff pPicBuff = pCtx ? pCtx->pPicBuff : m_pPicBuff;
+  if (!m_bIsBaseline && m_sReoderingStatus.iLastGOPRemainPicts > 0) {
+    m_sReoderingStatus.iMinPOC = IMinInt32;
+    for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+      if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].bLastGOP) {
+        m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+        m_sReoderingStatus.iPictInfoIndex = i;
       }
-      m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
+      if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC
+          && m_sPictInfoList[i].bLastGOP) {
+        m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+        m_sReoderingStatus.iPictInfoIndex = i;
+      }
+    }
+    m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
 #if defined (_DEBUG)
 #ifdef _MOTION_VECTOR_DUMP_
-      fprintf (stderr, "Output POC: #%d\n", m_sReoderingStatus.iLastWrittenPOC);
+    fprintf (stderr, "Output POC: #%d\n", m_sReoderingStatus.iLastWrittenPOC);
 #endif
 #endif
+    memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+    ppDst[0] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[0];
+    ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
+    ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
+    m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
+    pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+    m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
+    m_sReoderingStatus.iMinPOC = IMinInt32;
+    --m_sReoderingStatus.iNumOfPicts;
+    --m_sReoderingStatus.iLastGOPRemainPicts;
+    if (m_sReoderingStatus.iLastGOPRemainPicts == 0) {
+      m_sReoderingStatus.iLastWrittenPOC = IMinInt32;
+    }
+    return;
+  }
+  if (m_sReoderingStatus.iNumOfPicts && m_bIsBaseline) {
+    uint32_t uiDecodingTimeStamp = 0;
+    for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+      if (m_sPictInfoList[i].iPOC > IMinInt32) {
+        uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp;
+        m_sReoderingStatus.iPictInfoIndex = i;
+        break;
+      }
+    }
+    for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+      if (m_sReoderingStatus.iPictInfoIndex != i && m_sPictInfoList[i].iPOC > IMinInt32
+          && m_sPictInfoList[i].sBufferInfo.uiInBsTimeStamp < uiDecodingTimeStamp) {
+        uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp;
+        m_sReoderingStatus.iPictInfoIndex = i;
+      }
+    }
+    if (uiDecodingTimeStamp > 0) {
       memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
       ppDst[0] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[0];
       ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
       ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
       m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
-      if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < m_pDecContext->pPicBuff->iCapacity)
-        m_pDecContext->pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
-      m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
-      m_sReoderingStatus.iMinPOC = IMinInt32;
+      pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
       --m_sReoderingStatus.iNumOfPicts;
-      --m_sReoderingStatus.iLastGOPRemainPicts;
-      if (m_sReoderingStatus.iLastGOPRemainPicts == 0) {
-        m_sReoderingStatus.iLastWrittenPOC = IMinInt32;
-      }
-      return iRet;
     }
-    if (m_sReoderingStatus.iNumOfPicts > 0) {
-      m_sReoderingStatus.iMinPOC = IMinInt32;
-      for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
-        if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) {
-          m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
-          m_sReoderingStatus.iPictInfoIndex = i;
-        }
-        if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) {
-          m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
-          m_sReoderingStatus.iPictInfoIndex = i;
-        }
+    return;
+  }
+  if (m_sReoderingStatus.iNumOfPicts > 0) {
+    m_sReoderingStatus.iMinPOC = IMinInt32;
+    for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+      if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) {
+        m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+        m_sReoderingStatus.iPictInfoIndex = i;
       }
+      if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) {
+        m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+        m_sReoderingStatus.iPictInfoIndex = i;
+      }
     }
-    if (m_sReoderingStatus.iMinPOC > IMinInt32) {
-      if ((m_sReoderingStatus.iLastWrittenPOC > IMinInt32
-           && m_sReoderingStatus.iMinPOC - m_sReoderingStatus.iLastWrittenPOC <= 1)
-          || m_sReoderingStatus.iMinPOC < m_pDecContext->pSliceHeader->iPicOrderCntLsb) {
-        m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
+  }
+  if (m_sReoderingStatus.iMinPOC > IMinInt32) {
+    bool isReady = false;
+    if (pCtx != NULL) {
+      isReady = (m_sReoderingStatus.iLastWrittenPOC > IMinInt32
+                 && m_sReoderingStatus.iMinPOC - m_sReoderingStatus.iLastWrittenPOC <= 1)
+                || m_sReoderingStatus.iMinPOC < pCtx->pSliceHeader->iPicOrderCntLsb;
+    } else {
+      isReady = m_sReoderingStatus.iMinPOC == 0 || (m_sReoderingStatus.iLastWrittenPOC >= 0
+                && m_sReoderingStatus.iMinPOC <= m_sReoderingStatus.iLastWrittenPOC + 2) ;
+    }
+    if (isReady) {
+      m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
 #if defined (_DEBUG)
 #ifdef _MOTION_VECTOR_DUMP_
-        fprintf (stderr, "Output POC: #%d\n", m_sReoderingStatus.iLastWrittenPOC);
+      fprintf (stderr, "Output POC: #%d\n", m_sReoderingStatus.iLastWrittenPOC);
 #endif
 #endif
-        memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
-        ppDst[0] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[0];
-        ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
-        ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
-        m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
-        if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < m_pDecContext->pPicBuff->iCapacity)
-          m_pDecContext->pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
-        m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
-        m_sReoderingStatus.iMinPOC = IMinInt32;
-        --m_sReoderingStatus.iNumOfPicts;
-        return iRet;
-      }
+      memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+      ppDst[0] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[0];
+      ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
+      ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
+      m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
+      pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+      m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
+      m_sReoderingStatus.iMinPOC = IMinInt32;
+      --m_sReoderingStatus.iNumOfPicts;
     }
   }
+}
 
+DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (PWelsDecoderContext pDecContext, unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  DECODING_STATE iRet = dsErrorFree;
+  if (pDstInfo->iBufferStatus == 1) {
+    ++pDecContext->uiDecodingTimeStamp;
+    m_bIsBaseline = pDecContext->pSps->uiProfileIdc == 66 || pDecContext->pSps->uiProfileIdc == 83;
+    if (!m_bIsBaseline) {
+      BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+      ReleaseBufferedReadyPicture (pDecContext, ppDst, pDstInfo);
+    }
+  }
   return iRet;
 }
 
-DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    SParserBsInfo* pDstInfo) {
-  if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
+DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc, const int kiSrcLen, SParserBsInfo* pDstInfo) {
+  PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+
+  if (pDecContext == NULL || pDecContext->pParam == NULL) {
     if (m_pWelsTrace != NULL) {
       WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeParser without Initialize.\n");
     }
@@ -907,14 +1179,14 @@
     return dsInitialOptExpected;
   }
 
-  if (!m_pDecContext->pParam->bParseOnly) {
+  if (!pDecContext->pParam->bParseOnly) {
     WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be true for this API calling! \n");
-    m_pDecContext->iErrorCode |= dsInvalidArgument;
+    pDecContext->iErrorCode |= dsInvalidArgument;
     return dsInvalidArgument;
   }
   int64_t iEnd, iStart = WelsTime();
-  if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
-    if (ResetDecoder())
+  if (CheckBsBuffer (pDecContext, kiSrcLen)) {
+    if (ResetDecoder (pDecContext))
       return dsOutOfMemory;
 
     return dsErrorFree;
@@ -926,58 +1198,57 @@
       WelsFflush (m_pFBS);
     }
 #endif//OUTPUT_BIT_STREAM
-    m_pDecContext->bEndOfStreamFlag = false;
+    pDecContext->bEndOfStreamFlag = false;
   } else {
     //For application MODE, the error detection should be added for safe.
     //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
-    m_pDecContext->bEndOfStreamFlag = true;
-    m_pDecContext->bInstantDecFlag = true;
+    pDecContext->bEndOfStreamFlag = true;
+    pDecContext->bInstantDecFlag = true;
   }
 
-  m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
-  m_pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here.
-  m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
-  if (!m_pDecContext->bFramePending) { //frame complete
-    m_pDecContext->pParserBsInfo->iNalNum = 0;
-    memset (m_pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER);
+  pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
+  pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here.
+  pDecContext->iFeedbackNalRefIdc = -1; //initialize
+  if (!pDecContext->bFramePending) { //frame complete
+    pDecContext->pParserBsInfo->iNalNum = 0;
+    memset (pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER);
   }
   pDstInfo->iNalNum = 0;
   pDstInfo->iSpsWidthInPixel = pDstInfo->iSpsHeightInPixel = 0;
   if (pDstInfo) {
-    m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
+    pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
     pDstInfo->uiOutBsTimeStamp = 0;
   } else {
-    m_pDecContext->uiTimeStamp = 0;
+    pDecContext->uiTimeStamp = 0;
   }
-  WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo);
-  if (m_pDecContext->iErrorCode & dsOutOfMemory) {
-    if (ResetDecoder())
+  WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo);
+  if (pDecContext->iErrorCode & dsOutOfMemory) {
+    if (ResetDecoder (pDecContext))
       return dsOutOfMemory;
     return dsErrorFree;
   }
 
-  if (!m_pDecContext->bFramePending && m_pDecContext->pParserBsInfo->iNalNum) {
-    memcpy (pDstInfo, m_pDecContext->pParserBsInfo, sizeof (SParserBsInfo));
+  if (!pDecContext->bFramePending && pDecContext->pParserBsInfo->iNalNum) {
+    memcpy (pDstInfo, pDecContext->pParserBsInfo, sizeof (SParserBsInfo));
 
-    if (m_pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count
-      m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
-      if (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-        ResetDecStatNums (m_pDecContext->pDecoderStatistics);
-        m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+    if (pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count
+      pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+      if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+        ResetDecStatNums (pDecContext->pDecoderStatistics);
+        pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
       }
     }
   }
 
-  m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
+  pDecContext->bInstantDecFlag = false; //reset no-delay flag
 
-  if (m_pDecContext->iErrorCode && m_pDecContext->bPrintFrameErrorTraceFlag) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", m_pDecContext->iErrorCode);
-    m_pDecContext->bPrintFrameErrorTraceFlag = false;
+  if (pDecContext->iErrorCode && pDecContext->bPrintFrameErrorTraceFlag) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", pDecContext->iErrorCode);
+    pDecContext->bPrintFrameErrorTraceFlag = false;
   }
   iEnd = WelsTime();
-  m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
-
-  return (DECODING_STATE) m_pDecContext->iErrorCode;
+  pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+  return (DECODING_STATE)pDecContext->iErrorCode;
 }
 
 DECODING_STATE CWelsDecoder::DecodeFrame (const unsigned char* kpSrc,
@@ -1016,6 +1287,92 @@
     int& iColorFormat) {
   DECODING_STATE state = dsErrorFree;
 
+  return state;
+}
+
+DECODING_STATE CWelsDecoder::ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx) {
+  sThreadCtx.pCtx->bHasNewSps = false;
+  sThreadCtx.pCtx->bParamSetsLostFlag = m_bParamSetsLostFlag;
+  sThreadCtx.pCtx->bFreezeOutput = m_bFreezeOutput;
+  sThreadCtx.pCtx->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp;
+  bool bPicBuffChanged = false;
+  if (m_pLastDecThrCtx != NULL && sThreadCtx.pCtx->sSpsPpsCtx.iSeqId < m_pLastDecThrCtx->pCtx->sSpsPpsCtx.iSeqId) {
+    CopySpsPps (m_pLastDecThrCtx->pCtx, sThreadCtx.pCtx);
+    sThreadCtx.pCtx->iPicQueueNumber = m_pLastDecThrCtx->pCtx->iPicQueueNumber;
+    if (sThreadCtx.pCtx->pPicBuff != m_pPicBuff) {
+      bPicBuffChanged = true;
+      sThreadCtx.pCtx->pPicBuff = m_pPicBuff;
+      sThreadCtx.pCtx->bHaveGotMemory = m_pPicBuff != NULL;
+      sThreadCtx.pCtx->iImgWidthInPixel = m_pLastDecThrCtx->pCtx->iImgWidthInPixel;
+      sThreadCtx.pCtx->iImgHeightInPixel = m_pLastDecThrCtx->pCtx->iImgHeightInPixel;
+    }
+  }
+  int32_t iRet = DecodeFrame2WithCtx (sThreadCtx.pCtx, sThreadCtx.kpSrc, sThreadCtx.kiSrcLen, sThreadCtx.ppDst,
+                                      &sThreadCtx.sDstInfo);
+
+  int32_t iErr = InitConstructAccessUnit (sThreadCtx.pCtx, &sThreadCtx.sDstInfo);
+  if (ERR_NONE != iErr) {
+    return (DECODING_STATE) (iRet | iErr);
+  }
+  if (sThreadCtx.pCtx->bNewSeqBegin) {
+    m_pPicBuff = sThreadCtx.pCtx->pPicBuff;
+  } else if (bPicBuffChanged) {
+    InitialDqLayersContext (sThreadCtx.pCtx, sThreadCtx.pCtx->pSps->iMbWidth << 4, sThreadCtx.pCtx->pSps->iMbHeight << 4);
+  }
+  m_bParamSetsLostFlag = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bParamSetsLostFlag;
+  m_bFreezeOutput = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bFreezeOutput;
+  return (DECODING_STATE)iErr;
+}
+/*
+* Run decoding picture in separate thread.
+*/
+
+int CWelsDecoder::ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  int state = dsErrorFree;
+  int32_t i, j;
+  int32_t signal = 0;
+
+  //serial using of threads
+  if (m_DecCtxActiveCount < m_iThreadCount) {
+    signal = m_DecCtxActiveCount;
+  } else {
+    signal = m_pDecThrCtxActive[0]->sThreadInfo.uiThrNum;
+  }
+
+  WAIT_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+
+  for (i = 0; i < m_DecCtxActiveCount; ++i) {
+    if (m_pDecThrCtxActive[i] == &m_pDecThrCtx[signal]) {
+      m_pDecThrCtxActive[i] = NULL;
+      for (j = i; j < m_DecCtxActiveCount - 1; j++) {
+        m_pDecThrCtxActive[j] = m_pDecThrCtxActive[j + 1];
+        m_pDecThrCtxActive[j + 1] = NULL;
+      }
+      --m_DecCtxActiveCount;
+      break;
+    }
+  }
+
+  m_pDecThrCtxActive[m_DecCtxActiveCount++] = &m_pDecThrCtx[signal];
+  if (m_pLastDecThrCtx != NULL) {
+    m_pDecThrCtx[signal].pCtx->pLastThreadCtx = m_pLastDecThrCtx;
+  }
+  m_pDecThrCtx[signal].kpSrc = const_cast<uint8_t*> (kpSrc);
+  m_pDecThrCtx[signal].kiSrcLen = kiSrcLen;
+  m_pDecThrCtx[signal].ppDst = ppDst;
+  memcpy (&m_pDecThrCtx[signal].sDstInfo, pDstInfo, sizeof (SBufferInfo));
+
+  ParseAccessUnit (m_pDecThrCtx[signal]);
+  m_pLastDecThrCtx = &m_pDecThrCtx[signal];
+  m_pDecThrCtx[signal].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN;
+  RELEASE_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsActivated);
+
+  // wait early picture
+  if (m_DecCtxActiveCount >= m_iThreadCount) {
+    WAIT_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+    RELEASE_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle);
+  }
   return state;
 }