ref: 540318d3f8b6a89e3aeaac87f88b392b73e5c64a
parent: 6e5115e12e916e7d787e0daf2a30a7c610947871
parent: a5210082013a5f9ac868f918a02ecf3766bc107c
author: Jingning Han <[email protected]>
date: Tue Mar 3 14:04:34 EST 2015
Merge "Scale the normalization factor depending on the block size"
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -32,12 +32,13 @@
void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
const int ref_stride, const int height) {
int idx;
+ const int norm_factor = MAX(8, height >> 1);
for (idx = 0; idx < 16; ++idx) {
int i;
hbuf[idx] = 0;
for (i = 0; i < height; ++i)
hbuf[idx] += ref[i * ref_stride];
- hbuf[idx] /= 32;
+ hbuf[idx] /= norm_factor;
++ref;
}
}
@@ -45,9 +46,10 @@
int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
int idx;
int16_t sum = 0;
+ const int norm_factor = MAX(8, width >> 1);
for (idx = 0; idx < width; ++idx)
sum += ref[idx];
- return sum / 32;
+ return sum / norm_factor;
}
int vp9_vector_var_c(int16_t const *ref, int16_t const *src,
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -90,8 +90,16 @@
s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1);
- s0 = _mm_srai_epi16(s0, 5);
- s1 = _mm_srai_epi16(s1, 5);
+ if (height == 64) {
+ s0 = _mm_srai_epi16(s0, 5);
+ s1 = _mm_srai_epi16(s1, 5);
+ } else if (height == 32) {
+ s0 = _mm_srai_epi16(s0, 4);
+ s1 = _mm_srai_epi16(s1, 4);
+ } else {
+ s0 = _mm_srai_epi16(s0, 3);
+ s1 = _mm_srai_epi16(s1, 3);
+ }
_mm_store_si128((__m128i *)hbuf, s0);
hbuf += 8;
@@ -104,6 +112,7 @@
__m128i s0 = _mm_sad_epu8(src_line, zero);
__m128i s1;
int i;
+ const int norm_factor = 3 + (width >> 5);
for (i = 16; i < width; i += 16) {
ref += 16;
@@ -115,7 +124,7 @@
s1 = _mm_srli_si128(s0, 8);
s0 = _mm_adds_epu16(s0, s1);
- return (_mm_extract_epi16(s0, 0)) >> 5;
+ return _mm_extract_epi16(s0, 0) >> norm_factor;
}
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src,