shithub: openh264

Download patch

ref: 468bfb58ada159df62085500d43bc00ceb2e8db6
parent: c96fe5fec43be025ac6d39be5841e5d57af701da
author: JuannyWang <[email protected]>
date: Wed Apr 9 12:53:39 EDT 2014

decodeMbAux UT addition

--- /dev/null
+++ b/test/encoder/EncUT_DecodeMbAux.cpp
@@ -1,0 +1,386 @@
+#include<gtest/gtest.h>
+#include<stdlib.h>
+#include<time.h>
+#include "decode_mb_aux.h"
+#include "wels_common_basis.h"
+#include "macros.h"
+#include "cpu.h"
+
+using namespace WelsSVCEnc;
+
+
+TEST(DecodeMbAuxTest, TestIhdm_4x4_dc) {
+  short W[16],T[16],Y[16];
+  srand((uint32_t)time(NULL));
+  for(int i=0;i<16;i++)
+    W[i]=rand()%256+1;
+
+  T[0]=W[0]+W[4]+W[8]+W[12];
+  T[1]=W[1]+W[5]+W[9]+W[13];
+  T[2]=W[2]+W[6]+W[10]+W[14];
+  T[3]=W[3]+W[7]+W[11]+W[15];
+
+  T[4]=W[0]+W[4]-W[8]-W[12];
+  T[5]=W[1]+W[5]-W[9]-W[13];
+  T[6]=W[2]+W[6]-W[10]-W[14];
+  T[7]=W[3]+W[7]-W[11]-W[15];
+
+  T[8]=W[0]-W[4]-W[8]+W[12];
+  T[9]=W[1]-W[5]-W[9]+W[13];
+  T[10]=W[2]-W[6]-W[10]+W[14];
+  T[11]=W[3]-W[7]-W[11]+W[15];
+
+  T[12]=W[0]-W[4]+W[8]-W[12];
+  T[13]=W[1]-W[5]+W[9]-W[13];
+  T[14]=W[2]-W[6]+W[10]-W[14];
+  T[15]=W[3]-W[7]+W[11]-W[15];
+
+  Y[0]=T[0]+T[1]+T[2]+T[3];
+  Y[1]=T[0]+T[1]-T[2]-T[3];
+  Y[2]=T[0]-T[1]-T[2]+T[3];
+  Y[3]=T[0]-T[1]+T[2]-T[3];
+
+  Y[4]=T[4]+T[5]+T[6]+T[7];
+  Y[5]=T[4]+T[5]-T[6]-T[7];
+  Y[6]=T[4]-T[5]-T[6]+T[7];
+  Y[7]=T[4]-T[5]+T[6]-T[7];
+
+  Y[8]=T[8]+T[9]+T[10]+T[11];
+  Y[9]=T[8]+T[9]-T[10]-T[11];
+  Y[10]=T[8]-T[9]-T[10]+T[11];
+  Y[11]=T[8]-T[9]+T[10]-T[11];
+
+  Y[12]=T[12]+T[13]+T[14]+T[15];
+  Y[13]=T[12]+T[13]-T[14]-T[15];
+  Y[14]=T[12]-T[13]-T[14]+T[15];
+  Y[15]=T[12]-T[13]+T[14]-T[15];
+
+  WelsIHadamard4x4Dc(W);
+  for(int i=0;i<16;i++)
+    EXPECT_EQ( Y[i],W[i] );
+}
+
+TEST(DecodeMbAuxTest, TestDequant_4x4_luma_dc) {
+  short T[16],W[16];
+  srand((uint32_t)time(NULL));
+  for(int i=0;i<16;i++) {
+    T[i]=rand()%256+1;
+    W[i]=T[i];
+  }
+
+  //TODO: QP<18 will cause case fail, need fix and enable the test afterwards
+  for (int qp=18;qp<52;qp++) {
+    WelsDequantLumaDc4x4(W,qp);
+    for(int i=0;i<16;i++)
+      EXPECT_EQ(((T[i]*g_kuiDequantCoeff[qp%6][0]+(1 << (1 -  qp / 6))))>>(2- qp / 6),W[i]);
+   }
+}
+
+TEST(DecodeMbAuxTest, TestDequant_ihdm_4x4_c) {
+  short W[16],T[16],Y[16];
+  srand((uint32_t)time(NULL));
+  const unsigned short mf=rand()%16+1;
+  for(int i=0;i<16;i++)
+    W[i]=rand()%256+1;
+
+  T[0]=W[0]+W[4]+W[8]+W[12];
+  T[1]=W[1]+W[5]+W[9]+W[13];
+  T[2]=W[2]+W[6]+W[10]+W[14];
+  T[3]=W[3]+W[7]+W[11]+W[15];
+
+  T[4]=W[0]+W[4]-W[8]-W[12];
+  T[5]=W[1]+W[5]-W[9]-W[13];
+  T[6]=W[2]+W[6]-W[10]-W[14];
+  T[7]=W[3]+W[7]-W[11]-W[15];
+
+  T[8]=W[0]-W[4]-W[8]+W[12];
+  T[9]=W[1]-W[5]-W[9]+W[13];
+  T[10]=W[2]-W[6]-W[10]+W[14];
+  T[11]=W[3]-W[7]-W[11]+W[15];
+
+  T[12]=W[0]-W[4]+W[8]-W[12];
+  T[13]=W[1]-W[5]+W[9]-W[13];
+  T[14]=W[2]-W[6]+W[10]-W[14];
+  T[15]=W[3]-W[7]+W[11]-W[15];
+
+  Y[0]=(T[0]+T[1]+T[2]+T[3])*mf;
+  Y[1]=(T[0]+T[1]-T[2]-T[3])*mf;
+  Y[2]=(T[0]-T[1]-T[2]+T[3])*mf;
+  Y[3]=(T[0]-T[1]+T[2]-T[3])*mf;
+
+  Y[4]=(T[4]+T[5]+T[6]+T[7])*mf;
+  Y[5]=(T[4]+T[5]-T[6]-T[7])*mf;
+  Y[6]=(T[4]-T[5]-T[6]+T[7])*mf;
+  Y[7]=(T[4]-T[5]+T[6]-T[7])*mf;
+
+  Y[8]=(T[8]+T[9]+T[10]+T[11])*mf;
+  Y[9]=(T[8]+T[9]-T[10]-T[11])*mf;
+  Y[10]=(T[8]-T[9]-T[10]+T[11])*mf;
+  Y[11]=(T[8]-T[9]+T[10]-T[11])*mf;
+
+  Y[12]=(T[12]+T[13]+T[14]+T[15])*mf;
+  Y[13]=(T[12]+T[13]-T[14]-T[15])*mf;
+  Y[14]=(T[12]-T[13]-T[14]+T[15])*mf;
+  Y[15]=(T[12]-T[13]+T[14]-T[15])*mf;
+
+  WelsDequantIHadamard4x4_c(W,mf);
+  for(int i=0;i<16;i++)
+    EXPECT_EQ( Y[i],W[i] );
+}
+
+TEST(DecodeMbAuxTest, TestDequant_4x4_c) {
+  short W[16], T[16];
+  unsigned short mf[16];
+  srand((uint32_t)time(NULL));
+  for(int i=0;i<16;i++) {
+    W[i]=rand()%256+1;
+    T[i]=W[i];
+  }
+
+  for(int i=0;i<8;i++)
+    mf[i]=rand()%16+1;
+  WelsDequant4x4_c(W,mf);
+  for(int i=0;i<16;i++)
+    EXPECT_EQ( T[i]*mf[i%8],W[i] );
+}
+TEST(DecodeMbAuxTest, TestDequant_4_4x4_c) {
+  short W[64], T[64];
+  unsigned short mf[16];
+  srand((uint32_t)time(NULL));
+  for(int i=0;i<64;i++) {
+    W[i]=rand()%256+1;
+    T[i]=W[i];
+  }
+  for(int i=0;i<8;i++)
+    mf[i]=rand()%16+1;
+  WelsDequantFour4x4_c(W,mf);
+  for(int i=0;i<64;i++)
+    EXPECT_EQ( T[i]*mf[i%8],W[i] );
+}
+void WelsDequantHadamard2x2DcAnchor( int16_t* pDct, int16_t iMF) {
+  const int16_t iSumU = pDct[0] + pDct[2];
+  const int16_t iDelU =   pDct[0] -  pDct[2];
+  const int16_t iSumD = pDct[1] + pDct[3];
+  const int16_t iDelD =   pDct[1] -  pDct[3];
+  pDct[0] = (iSumU + iSumD) * iMF;
+  pDct[1] = (iSumU  -  iSumD) * iMF;
+  pDct[2] = (iDelU   + iDelD)   * iMF;
+  pDct[3] = (iDelU   -  iDelD)   * iMF;
+}
+TEST(DecodeMbAuxTest, WelsDequantIHadamard2x2Dc) {
+  int16_t iDct[4], iRefDct[4];
+  int16_t iMF;
+  srand((unsigned int)time(NULL));
+  iMF = rand() & 127;
+  for(int i = 0; i < 4; i++)
+    iDct[i] = iRefDct[i] = (rand() & 65535) - 32768;
+  WelsDequantHadamard2x2DcAnchor(iRefDct, iMF);
+  WelsDequantIHadamard2x2Dc(iDct, iMF);
+  bool ok = true;
+  for(int i = 0; i < 4; i++) {
+    if(iDct[i] != iRefDct[i]) {
+      ok = false;
+      break;
+    }
+  }
+  EXPECT_TRUE(ok);
+}
+#define FDEC_STRIDE 32
+void WelsIDctT4Anchor( uint8_t *p_dst, int16_t dct[16] ) {
+  int16_t tmp[16];
+  int32_t iStridex2 = (FDEC_STRIDE<<1);
+  int32_t iStridex3 = iStridex2 + FDEC_STRIDE;
+  uint8_t uiDst = 0;
+  int i;
+  for( i = 0; i < 4; i++ ) {
+    tmp[i<<2]     = dct[i<<2] + dct[(i<<2)+1]      + dct[(i<<2)+2] + (dct[(i<<2)+3]>>1);
+    tmp[(i<<2)+1] = dct[i<<2] + (dct[(i<<2)+1]>>1) - dct[(i<<2)+2] - dct[(i<<2)+3];
+    tmp[(i<<2)+2] = dct[i<<2] - (dct[(i<<2)+1]>>1) - dct[(i<<2)+2] + dct[(i<<2)+3];
+    tmp[(i<<2)+3] = dct[i<<2] - dct[(i<<2)+1]      + dct[(i<<2)+2] - (dct[(i<<2)+3]>>1);
+  }
+  for( i = 0; i < 4; i++ ) {
+    uiDst = p_dst[i];
+    p_dst[i]             = WelsClip1(uiDst + ((tmp[i]+tmp[4+i]+     tmp[8+i]+(tmp[12+i]>>1)+32)>>6));
+    uiDst = p_dst[i+FDEC_STRIDE];
+    p_dst[i+FDEC_STRIDE] = WelsClip1(uiDst + ((tmp[i]+(tmp[4+i]>>1)-tmp[8+i]-tmp[12+i]+32)     >>6));
+    uiDst = p_dst[i+iStridex2];
+    p_dst[i+iStridex2]   = WelsClip1(uiDst + ((tmp[i]-(tmp[4+i]>>1)-tmp[8+i]+tmp[12+i]+32)     >>6));
+    uiDst = p_dst[i+iStridex3];
+    p_dst[i+iStridex3]   = WelsClip1(uiDst + ((tmp[i]-tmp[4+i]+     tmp[8+i]-(tmp[12+i]>>1)+32)>>6));
+  }
+}
+TEST(DecodeMbAuxTest, WelsIDctT4Rec_c) {
+  int16_t iRefDct[16]; uint8_t iRefDst[16*FDEC_STRIDE];
+  ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 16, 16);
+  ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16);
+  ENFORCE_STACK_ALIGN_1D(uint8_t, iRec, 16*FDEC_STRIDE, 16);
+  srand((unsigned int)time(NULL));
+  for(int i = 0; i < 4; i++) {
+    for(int j = 0; j < 4; j++) {
+      iRefDct[i*4+j] = iDct[i*4+j] = (rand() & 65535) - 32768;
+      iPred[i*FDEC_STRIDE+j] = iRefDst[i*FDEC_STRIDE+j] = rand() & 255;
+    }
+  }
+  WelsIDctT4Anchor(iRefDst, iRefDct);
+  WelsIDctT4Rec_c(iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
+  int ok = -1;
+  for(int i = 0; i < 4; i++) {
+    for(int j = 0; j < 4; j++) {
+      if(iRec[i*FDEC_STRIDE+j] != iRefDst[i*FDEC_STRIDE+j]) {
+        ok = i*4+j;
+        break;
+      }
+    }
+  }
+  EXPECT_EQ(ok, -1);
+}
+#if defined(X86_ASM)
+TEST(DecodeMbAuxTest, WelsIDctT4Rec_mmx) {
+  int32_t iCpuCores = 0;
+  uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect(&iCpuCores);
+  if(uiCpuFeatureFlag & WELS_CPU_MMXEXT) {
+    ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 16, 16);
+    ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16);
+    ENFORCE_STACK_ALIGN_1D(uint8_t, iRecC, 16*FDEC_STRIDE, 16);
+    ENFORCE_STACK_ALIGN_1D(uint8_t, iRecM, 16*FDEC_STRIDE, 16);
+    srand((unsigned int)time(NULL));
+    for(int i = 0; i < 4; i++) {
+      for(int j = 0; j < 4; j++) {
+        iDct[i*4+j] = (rand() & ((1 << 12)-1)) - (1 << 11);
+        iPred[i*FDEC_STRIDE+j] = rand() & 255;
+      }
+    }
+    WelsIDctT4Rec_c(iRecC, FDEC_STRIDE, iPred,  FDEC_STRIDE, iDct);
+    WelsIDctT4Rec_mmx(iRecM, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
+    int ok = -1;
+    for(int i = 0; i < 4; i++) {
+      for(int j = 0; j < 4; j++) {
+        if(iRecC[i*FDEC_STRIDE+j] != iRecM[i*FDEC_STRIDE+j]) {
+          ok = i*4+j;
+          break;
+        }
+      }
+    }
+    EXPECT_EQ(ok, -1);
+  }
+}
+#endif
+void WelsIDctT8Anchor( uint8_t *p_dst, int16_t dct[4][16] ) {
+  WelsIDctT4Anchor( &p_dst[0],               dct[0] );
+  WelsIDctT4Anchor( &p_dst[4],               dct[1] );
+  WelsIDctT4Anchor( &p_dst[4*FDEC_STRIDE+0], dct[2] );
+  WelsIDctT4Anchor( &p_dst[4*FDEC_STRIDE+4], dct[3] );
+}
+TEST(DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
+  int16_t iRefDct[4][16]; uint8_t iRefDst[16*FDEC_STRIDE];
+  ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 64, 16);
+  ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16);
+  ENFORCE_STACK_ALIGN_1D(uint8_t, iRec, 16*FDEC_STRIDE, 16);
+  srand((unsigned int)time(NULL));
+  for(int k = 0; k < 4; k++)
+    for(int i = 0; i < 16; i++)
+      iRefDct[k][i] = iDct[k*16+i] = (rand() & 65535) - 32768;
+
+  for(int i = 0; i < 8; i++)
+    for(int j = 0; j < 8; j++)
+      iPred[i*FDEC_STRIDE+j] = iRefDst[i*FDEC_STRIDE+j] = rand() & 255;
+
+  WelsIDctT8Anchor(iRefDst, iRefDct);
+  WelsIDctFourT4Rec_c(iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
+  int ok = -1;
+  for(int i = 0; i < 8; i++) {
+    for(int j = 0; j < 8; j++) {
+      if(iRec[i*FDEC_STRIDE+j] != iRefDst[i*FDEC_STRIDE+j]) {
+        ok = i*8+j;
+        break;
+      }
+    }
+  }
+  EXPECT_EQ(ok, -1);
+}
+void WelsIDctRecI16x4DcAnchor( uint8_t *p_dst, int16_t dct[4] ) {
+  for(int i = 0; i < 4; i++, p_dst += FDEC_STRIDE) {
+    p_dst[0] = WelsClip1(p_dst[0] + ((dct[0]+32)>>6));
+    p_dst[1] = WelsClip1(p_dst[1] + ((dct[0]+32)>>6));
+    p_dst[2] = WelsClip1(p_dst[2] + ((dct[0]+32)>>6));
+    p_dst[3] = WelsClip1(p_dst[3] + ((dct[0]+32)>>6));
+
+    p_dst[4] = WelsClip1(p_dst[4] + ((dct[1]+32)>>6));
+    p_dst[5] = WelsClip1(p_dst[5] + ((dct[1]+32)>>6));
+    p_dst[6] = WelsClip1(p_dst[6] + ((dct[1]+32)>>6));
+    p_dst[7] = WelsClip1(p_dst[7] + ((dct[1]+32)>>6));
+
+    p_dst[8]  = WelsClip1(p_dst[8]  + ((dct[2]+32)>>6));
+    p_dst[9]  = WelsClip1(p_dst[9]  + ((dct[2]+32)>>6));
+    p_dst[10] = WelsClip1(p_dst[10] + ((dct[2]+32)>>6));
+    p_dst[11] = WelsClip1(p_dst[11] + ((dct[2]+32)>>6));
+
+    p_dst[12] = WelsClip1(p_dst[12] + ((dct[3]+32)>>6));
+    p_dst[13] = WelsClip1(p_dst[13] + ((dct[3]+32)>>6));
+    p_dst[14] = WelsClip1(p_dst[14] + ((dct[3]+32)>>6));
+    p_dst[15] = WelsClip1(p_dst[15] + ((dct[3]+32)>>6));
+  }
+}
+void WelsIDctRecI16x16DcAnchor( uint8_t *p_dst, int16_t dct[4][4] ) {
+  for( int i = 0; i < 4; i++, p_dst += 4*FDEC_STRIDE )
+    WelsIDctRecI16x4DcAnchor(&p_dst[0], dct[i]);
+}
+
+TEST(DecodeMbAuxTest, WelsIDctRecI16x16Dc_c) {
+  uint8_t iRefDst[16*FDEC_STRIDE];
+  int16_t iRefDct[4][4];
+  ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 16, 16);
+  ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16);
+  ENFORCE_STACK_ALIGN_1D(uint8_t, iRec, 16*FDEC_STRIDE, 16);
+  for(int i = 0; i < 16; i++)
+    for(int j = 0; j < 16; j++)
+      iRefDst[i*FDEC_STRIDE+j] = iPred[i*FDEC_STRIDE+j] = rand() & 255;
+
+  for(int i = 0; i < 4; i++)
+    for(int j = 0; j < 4; j++)
+      iRefDct[i][j] = iDct[i*4+j] = (rand() & 65535) - 32768;
+  WelsIDctRecI16x16DcAnchor(iRefDst, iRefDct);
+  WelsIDctRecI16x16Dc_c(iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
+  int ok = -1;
+  for(int i = 0; i < 16; i++) {
+    for(int j = 0; j < 16; j++) {
+      if(iRec[i*FDEC_STRIDE+j] != iRefDst[i*FDEC_STRIDE+j]) {
+        ok = i*16+j;
+        break;
+      }
+    }
+  }
+  EXPECT_EQ(ok, -1);
+}
+#if defined(X86_ASM)
+TEST(DecodeMbAuxTest, WelsIDctRecI16x16Dc_sse2) {
+  int32_t iCpuCores = 0;
+  uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect(&iCpuCores);
+
+  if(uiCpuFeatureFlag & WELS_CPU_SSE2) {
+    uint8_t iRefDst[16*FDEC_STRIDE];
+    int16_t iRefDct[4][4];
+    ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 16, 16);
+    ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16);
+    ENFORCE_STACK_ALIGN_1D(uint8_t, iRec, 16*FDEC_STRIDE, 16);
+    for(int i = 0; i < 16; i++)
+      for(int j = 0; j < 16; j++)
+        iRefDst[i*FDEC_STRIDE+j] = iPred[i*FDEC_STRIDE+j] = rand() & 255;
+    for(int i = 0; i < 4; i++)
+      for(int j = 0; j < 4; j++)
+        iRefDct[i][j] = iDct[i*4+j] = (rand() & 65535) - 32768;
+    WelsIDctRecI16x16DcAnchor(iRefDst, iRefDct);
+    WelsIDctRecI16x16Dc_sse2(iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
+    int ok = -1;
+    for(int i = 0; i < 16; i++) {
+      for(int j = 0; j < 16; j++) {
+        if(iRec[i*FDEC_STRIDE+j] != iRefDst[i*FDEC_STRIDE+j]) {
+          ok = i*16+j;
+          break;
+        }
+      }
+    }
+    EXPECT_EQ(ok, -1);
+  }
+}
+#endif
--- a/test/encoder/targets.mk
+++ b/test/encoder/targets.mk
@@ -1,5 +1,6 @@
 ENCODER_UNITTEST_SRCDIR=test/encoder
 ENCODER_UNITTEST_CPP_SRCS=\
+	$(ENCODER_UNITTEST_SRCDIR)/EncUT_DecodeMbAux.cpp\
 	$(ENCODER_UNITTEST_SRCDIR)/EncUT_EncoderMb.cpp\
         $(ENCODER_UNITTEST_SRCDIR)/EncUT_EncoderMbAux.cpp\
 	$(ENCODER_UNITTEST_SRCDIR)/EncUT_ExpandPic.cpp\
@@ -13,4 +14,3 @@
 OBJS += $(ENCODER_UNITTEST_OBJS)
 $(ENCODER_UNITTEST_SRCDIR)/%.$(OBJ): $(ENCODER_UNITTEST_SRCDIR)/%.cpp
 	$(QUIET_CXX)$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_UNITTEST_CFLAGS) $(ENCODER_UNITTEST_INCLUDES) -c $(CXX_O) $<
-