ref: 9e75c01353ef1e47b5fddc5ad9dde65a9abeeed3
parent: 99adf8b22ed827f81e9501dd5068ca8a5d5d2d2e
parent: 3da752fe0083d2acf2c8436a5da4805883df086f
author: Kaustubh Raste <[email protected]>
date: Wed Oct 12 22:12:33 EDT 2016
Merge "Optimize vpx_mbpost_proc_across_ip_msa function"
--- a/vpx_dsp/mips/deblock_msa.c
+++ b/vpx_dsp/mips/deblock_msa.c
@@ -454,7 +454,7 @@
v16u8 tmp = { 0 };
v16i8 zero = { 0 };
v8u16 sum_h, src_r_h, src_l_h;
- v4u32 src_r_w, src_l_w;
+ v4u32 src_r_w;
v4i32 flimit_vec;
flimit_vec = __msa_fill_w(flimit);
@@ -473,9 +473,8 @@
src[15] = 0;
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
src_r_w = __msa_dotp_u_w(src_r_h, src_r_h);
- src_l_w = __msa_dotp_u_w(src_l_h, src_l_h);
+ src_r_w += __msa_dotp_u_w(src_l_h, src_l_h);
sum_sq = HADD_SW_S32(src_r_w);
- sum_sq += HADD_SW_S32(src_l_w);
sum_h = __msa_hadd_u_h(src, src);
sum = HADD_UH_U32(sum_h);
{