shithub: libvpx

Download patch

ref: d8985f5360515f3c8384505ff1d357f4dc20bb89
parent: bea691b5c90e146e8ede1f61d443e399c7d3bef8
parent: 176c291d9c1b92f7612ce1da08fd7cbdf55e4446
author: Jingning Han <[email protected]>
date: Fri Jun 12 15:03:20 EDT 2015

Merge "Fix potential overflow issue in hadamard_16x16()"

--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -88,11 +88,13 @@
                           int16_t *coeff) {
   int idx;
   for (idx = 0; idx < 4; ++idx) {
+    // src_diff: 9 bit, dynamic range [-255, 255]
     int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
                                 + (idx & 0x01) * 8;
     vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
   }
 
+  // coeff: 15 bit, dynamic range [-16320, 16320]
   for (idx = 0; idx < 64; ++idx) {
     int16_t a0 = coeff[0];
     int16_t a1 = coeff[64];
@@ -99,15 +101,15 @@
     int16_t a2 = coeff[128];
     int16_t a3 = coeff[192];
 
-    int16_t b0 = a0 + a1;
-    int16_t b1 = a0 - a1;
-    int16_t b2 = a2 + a3;
-    int16_t b3 = a2 - a3;
+    int16_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]
+    int16_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range
+    int16_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]
+    int16_t b3 = (a2 - a3) >> 1;
 
-    coeff[0]   = (b0 + b2) >> 1;
-    coeff[64]  = (b1 + b3) >> 1;
-    coeff[128] = (b0 - b2) >> 1;
-    coeff[192] = (b1 - b3) >> 1;
+    coeff[0]   = b0 + b2;  // 16 bit, [-32640, 32640]
+    coeff[64]  = b1 + b3;
+    coeff[128] = b0 - b2;
+    coeff[192] = b1 - b3;
 
     ++coeff;
   }
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -264,17 +264,18 @@
     __m128i b2 = _mm_add_epi16(coeff2, coeff3);
     __m128i b3 = _mm_sub_epi16(coeff2, coeff3);
 
+    b0 = _mm_srai_epi16(b0, 1);
+    b1 = _mm_srai_epi16(b1, 1);
+    b2 = _mm_srai_epi16(b2, 1);
+    b3 = _mm_srai_epi16(b3, 1);
+
     coeff0 = _mm_add_epi16(b0, b2);
     coeff1 = _mm_add_epi16(b1, b3);
-    coeff0 = _mm_srai_epi16(coeff0, 1);
-    coeff1 = _mm_srai_epi16(coeff1, 1);
     _mm_store_si128((__m128i *)coeff, coeff0);
     _mm_store_si128((__m128i *)(coeff + 64), coeff1);
 
     coeff2 = _mm_sub_epi16(b0, b2);
     coeff3 = _mm_sub_epi16(b1, b3);
-    coeff2 = _mm_srai_epi16(coeff2, 1);
-    coeff3 = _mm_srai_epi16(coeff3, 1);
     _mm_store_si128((__m128i *)(coeff + 128), coeff2);
     _mm_store_si128((__m128i *)(coeff + 192), coeff3);