ref: d8985f5360515f3c8384505ff1d357f4dc20bb89
parent: bea691b5c90e146e8ede1f61d443e399c7d3bef8
parent: 176c291d9c1b92f7612ce1da08fd7cbdf55e4446
author: Jingning Han <[email protected]>
date: Fri Jun 12 15:03:20 EDT 2015
Merge "Fix potential overflow issue in hadamard_16x16()"
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -88,11 +88,13 @@
int16_t *coeff) {
int idx;
for (idx = 0; idx < 4; ++idx) {
+ // src_diff: 9 bit, dynamic range [-255, 255]
int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
+ (idx & 0x01) * 8;
vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
}
+ // coeff: 15 bit, dynamic range [-16320, 16320]
for (idx = 0; idx < 64; ++idx) {
int16_t a0 = coeff[0];
int16_t a1 = coeff[64];
@@ -99,15 +101,15 @@
int16_t a2 = coeff[128];
int16_t a3 = coeff[192];
- int16_t b0 = a0 + a1;
- int16_t b1 = a0 - a1;
- int16_t b2 = a2 + a3;
- int16_t b3 = a2 - a3;
+ int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640]
+ int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range
+ int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320]
+ int16_t b3 = (a2 - a3) >> 1;
- coeff[0] = (b0 + b2) >> 1;
- coeff[64] = (b1 + b3) >> 1;
- coeff[128] = (b0 - b2) >> 1;
- coeff[192] = (b1 - b3) >> 1;
+ coeff[0] = b0 + b2; // 16 bit, [-32640, 32640]
+ coeff[64] = b1 + b3;
+ coeff[128] = b0 - b2;
+ coeff[192] = b1 - b3;
++coeff;
}
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -264,17 +264,18 @@
__m128i b2 = _mm_add_epi16(coeff2, coeff3);
__m128i b3 = _mm_sub_epi16(coeff2, coeff3);
+ b0 = _mm_srai_epi16(b0, 1);
+ b1 = _mm_srai_epi16(b1, 1);
+ b2 = _mm_srai_epi16(b2, 1);
+ b3 = _mm_srai_epi16(b3, 1);
+
coeff0 = _mm_add_epi16(b0, b2);
coeff1 = _mm_add_epi16(b1, b3);
- coeff0 = _mm_srai_epi16(coeff0, 1);
- coeff1 = _mm_srai_epi16(coeff1, 1);
_mm_store_si128((__m128i *)coeff, coeff0);
_mm_store_si128((__m128i *)(coeff + 64), coeff1);
coeff2 = _mm_sub_epi16(b0, b2);
coeff3 = _mm_sub_epi16(b1, b3);
- coeff2 = _mm_srai_epi16(coeff2, 1);
- coeff3 = _mm_srai_epi16(coeff3, 1);
_mm_store_si128((__m128i *)(coeff + 128), coeff2);
_mm_store_si128((__m128i *)(coeff + 192), coeff3);