shithub: openh264

Download patch

ref: b9adbcf37cf478268ad4647d1028a9dbd0332797
parent: 8764231784b251029d9bb2a83383e98e9c42905c
author: Sindre Aamås <[email protected]>
date: Mon Jan 18 15:25:46 EST 2016

[UT] Add missing SSE2 4x4 IDCT test

IDCT input is defined in such a way that the intermediate values
cannot legally overflow an int16_t. The use of random values
as input causes such overflows. This results in implementation-
dependent output depending on which type is used to hold
intermediate results. Use a template for the test reference
implementation to test implementations with different
intermediate representation.

--- a/test/encoder/EncUT_DecodeMbAux.cpp
+++ b/test/encoder/EncUT_DecodeMbAux.cpp
@@ -179,6 +179,7 @@
   EXPECT_TRUE (ok);
 }
 #define FDEC_STRIDE 32
+template<typename clip_t>
 void WelsIDctT4Anchor (uint8_t* p_dst, int16_t dct[16]) {
   int16_t tmp[16];
   int32_t iStridex2 = (FDEC_STRIDE << 1);
@@ -193,13 +194,13 @@
   }
   for (i = 0; i < 4; i++) {
     uiDst = p_dst[i];
-    p_dst[i]             = WelsClip1 (uiDst + ((tmp[i] + tmp[4 + i] +     tmp[8 + i] + (tmp[12 + i] >> 1) + 32) >> 6));
+    p_dst[i]             = WelsClip1 (uiDst + (clip_t (tmp[i] + tmp[4 + i] +     tmp[8 + i] + (tmp[12 + i] >> 1) + 32) >> 6));
     uiDst = p_dst[i + FDEC_STRIDE];
-    p_dst[i + FDEC_STRIDE] = WelsClip1 (uiDst + ((tmp[i] + (tmp[4 + i] >> 1) - tmp[8 + i] - tmp[12 + i] + 32)     >> 6));
+    p_dst[i + FDEC_STRIDE] = WelsClip1 (uiDst + (clip_t (tmp[i] + (tmp[4 + i] >> 1) - tmp[8 + i] - tmp[12 + i] + 32)     >> 6));
     uiDst = p_dst[i + iStridex2];
-    p_dst[i + iStridex2]   = WelsClip1 (uiDst + ((tmp[i] - (tmp[4 + i] >> 1) - tmp[8 + i] + tmp[12 + i] + 32)     >> 6));
+    p_dst[i + iStridex2]   = WelsClip1 (uiDst + (clip_t (tmp[i] - (tmp[4 + i] >> 1) - tmp[8 + i] + tmp[12 + i] + 32)     >> 6));
     uiDst = p_dst[i + iStridex3];
-    p_dst[i + iStridex3]   = WelsClip1 (uiDst + ((tmp[i] - tmp[4 + i] +     tmp[8 + i] - (tmp[12 + i] >> 1) + 32) >> 6));
+    p_dst[i + iStridex3]   = WelsClip1 (uiDst + (clip_t (tmp[i] - tmp[4 + i] +     tmp[8 + i] - (tmp[12 + i] >> 1) + 32) >> 6));
   }
 }
 TEST (DecodeMbAuxTest, WelsIDctT4Rec_c) {
@@ -214,7 +215,7 @@
       iPred[i * FDEC_STRIDE + j] = iRefDst[i * FDEC_STRIDE + j] = rand() & 255;
     }
   }
-  WelsIDctT4Anchor (iRefDst, iRefDct);
+  WelsIDctT4Anchor<int32_t> (iRefDst, iRefDct);
   WelsIDctT4Rec_c (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
   int ok = -1;
   for (int i = 0; i < 4; i++) {
@@ -257,13 +258,15 @@
   }
 }
 #endif
+template<typename clip_t>
 void WelsIDctT8Anchor (uint8_t* p_dst, int16_t dct[4][16]) {
-  WelsIDctT4Anchor (&p_dst[0],               dct[0]);
-  WelsIDctT4Anchor (&p_dst[4],               dct[1]);
-  WelsIDctT4Anchor (&p_dst[4 * FDEC_STRIDE + 0], dct[2]);
-  WelsIDctT4Anchor (&p_dst[4 * FDEC_STRIDE + 4], dct[3]);
+  WelsIDctT4Anchor<clip_t> (&p_dst[0],                   dct[0]);
+  WelsIDctT4Anchor<clip_t> (&p_dst[4],                   dct[1]);
+  WelsIDctT4Anchor<clip_t> (&p_dst[4 * FDEC_STRIDE + 0], dct[2]);
+  WelsIDctT4Anchor<clip_t> (&p_dst[4 * FDEC_STRIDE + 4], dct[3]);
 }
-TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
+template<typename clip_t>
+void TestIDctFourT4Rec (void (*func) (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct)) {
   int16_t iRefDct[4][16];
   uint8_t iRefDst[16 * FDEC_STRIDE];
   ENFORCE_STACK_ALIGN_1D (int16_t, iDct, 64, 16);
@@ -277,8 +280,8 @@
     for (int j = 0; j < 8; j++)
       iPred[i * FDEC_STRIDE + j] = iRefDst[i * FDEC_STRIDE + j] = rand() & 255;
 
-  WelsIDctT8Anchor (iRefDst, iRefDct);
-  WelsIDctFourT4Rec_c (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
+  WelsIDctT8Anchor<clip_t> (iRefDst, iRefDct);
+  func (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
   int ok = -1;
   for (int i = 0; i < 8; i++) {
     for (int j = 0; j < 8; j++) {
@@ -290,6 +293,9 @@
   }
   EXPECT_EQ (ok, -1);
 }
+TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
+  TestIDctFourT4Rec<int32_t> (WelsIDctFourT4Rec_c);
+}
 void WelsIDctRecI16x4DcAnchor (uint8_t* p_dst, int16_t dct[4]) {
   for (int i = 0; i < 4; i++, p_dst += FDEC_STRIDE) {
     p_dst[0] = WelsClip1 (p_dst[0] + ((dct[0] + 32) >> 6));
@@ -345,6 +351,9 @@
   EXPECT_EQ (ok, -1);
 }
 #if defined(X86_ASM)
+TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_sse2) {
+  TestIDctFourT4Rec<int16_t> (WelsIDctFourT4Rec_sse2);
+}
 TEST (DecodeMbAuxTest, WelsIDctRecI16x16Dc_sse2) {
   int32_t iCpuCores = 0;
   uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);