ref: 0615a196e220afed1f175f5e27801a9f334c4488
parent: 3bc94cd2eb37b9b70085dccd8dafdf6cd3819552
parent: 6356d21a47e2547ab3c657052e36939bde047cb3
author: JackyChen <[email protected]>
date: Fri Oct 17 07:16:22 EDT 2014
Merge "vp9_denoiser_sse2.c: solve windows build error."
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -41,13 +41,13 @@
static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
const uint8_t *mc_running_avg_y,
uint8_t *running_avg_y,
- const __m128i k_0,
- const __m128i k_4,
- const __m128i k_8,
- const __m128i k_16,
- const __m128i l3,
- const __m128i l32,
- const __m128i l21,
+ const __m128i *k_0,
+ const __m128i *k_4,
+ const __m128i *k_8,
+ const __m128i *k_16,
+ const __m128i *l3,
+ const __m128i *l32,
+ const __m128i *l21,
__m128i acc_diff) {
// Calculate differences
const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
@@ -57,24 +57,24 @@
const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
// Obtain the sign. FF if diff is negative.
- const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
+ const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, *k_0);
// Clamp absolute difference to 16 to be used to get mask. Doing this
// allows us to use _mm_cmpgt_epi8, which operates on signed byte.
const __m128i clamped_absdiff = _mm_min_epu8(
- _mm_or_si128(pdiff, ndiff), k_16);
+ _mm_or_si128(pdiff, ndiff), *k_16);
// Get masks for l2 l1 and l0 adjustments.
- const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
- const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
- const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
+ const __m128i mask2 = _mm_cmpgt_epi8(*k_16, clamped_absdiff);
+ const __m128i mask1 = _mm_cmpgt_epi8(*k_8, clamped_absdiff);
+ const __m128i mask0 = _mm_cmpgt_epi8(*k_4, clamped_absdiff);
// Get adjustments for l2, l1, and l0.
- __m128i adj2 = _mm_and_si128(mask2, l32);
- const __m128i adj1 = _mm_and_si128(mask1, l21);
+ __m128i adj2 = _mm_and_si128(mask2, *l32);
+ const __m128i adj1 = _mm_and_si128(mask1, *l21);
const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
__m128i adj, padj, nadj;
// Combine the adjustments and get absolute adjustments.
adj2 = _mm_add_epi8(adj2, adj1);
- adj = _mm_sub_epi8(l3, adj2);
+ adj = _mm_sub_epi8(*l3, adj2);
adj = _mm_andnot_si128(mask0, adj);
adj = _mm_or_si128(adj, adj0);
@@ -178,8 +178,8 @@
acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
mc_running_buffer[r],
running_buffer[r],
- k_0, k_4, k_8, k_16,
- l3, l32, l21, acc_diff);
+ &k_0, &k_4, &k_8, &k_16,
+ &l3, &l32, &l21, acc_diff);
vpx_memcpy(running_avg_y, running_buffer[r], 4);
vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 4, 4);
vpx_memcpy(running_avg_y + avg_y_stride * 2,
@@ -279,8 +279,8 @@
acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
mc_running_buffer[r],
running_buffer[r],
- k_0, k_4, k_8, k_16,
- l3, l32, l21, acc_diff);
+ &k_0, &k_4, &k_8, &k_16,
+ &l3, &l32, &l21, acc_diff);
vpx_memcpy(running_avg_y, running_buffer[r], 8);
vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 8, 8);
// Update pointers for next iteration.
@@ -368,8 +368,8 @@
acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2(
sig, mc_running_avg_y,
running_avg_y,
- k_0, k_4, k_8, k_16,
- l3, l32, l21, acc_diff[c>>4][r>>4]);
+ &k_0, &k_4, &k_8, &k_16,
+ &l3, &l32, &l21, acc_diff[c>>4][r>>4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;