ref: 266868a40b704745de60db4f3a7867f462864f67
parent: bf43b4c4b47eff60e52e9d036de8618b4a1fece1
author: Yi Luo <[email protected]>
date: Mon Mar 20 13:18:10 EDT 2017
Make butterfly_self() signature consistent with butterfly() - Refer to patch: 48fca113d inv_txfm_ssse3,butterfly: fix win32 abi compatibility. - Change four butterfly() calls to butterfly_self(), to simplify the operations. Change-Id: Ib2a8cfe6cddcaf0a59e6e6270d8380055ea42ef3
--- a/vpx_dsp/x86/inv_txfm_ssse3.c
+++ b/vpx_dsp/x86/inv_txfm_ssse3.c
@@ -420,14 +420,14 @@
*y1 = _mm_packs_epi32(tmp2, tmp3);
}
-static INLINE void butterfly_self(__m128i *x0, __m128i *x1, const __m128i c0,
- const __m128i c1) {
+static INLINE void butterfly_self(__m128i *x0, __m128i *x1, const __m128i *c0,
+ const __m128i *c1) {
__m128i tmp0, tmp1, tmp2, tmp3, u0, u1;
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
u0 = _mm_unpacklo_epi16(*x0, *x1);
u1 = _mm_unpackhi_epi16(*x0, *x1);
- BUTTERFLY_PAIR(u0, u1, c0, c1);
+ BUTTERFLY_PAIR(u0, u1, *c0, *c1);
*x0 = _mm_packs_epi32(tmp0, tmp1);
*x1 = _mm_packs_epi32(tmp2, tmp3);
}
@@ -480,8 +480,8 @@
v13 = _mm_sub_epi16(u5, u7);
v14 = _mm_add_epi16(u5, u7);
- butterfly_self(&v10, &v13, stg6_0, stg4_0);
- butterfly_self(&v11, &v12, stg6_0, stg4_0);
+ butterfly_self(&v10, &v13, &stg6_0, &stg4_0);
+ butterfly_self(&v11, &v12, &stg6_0, &stg4_0);
// 1, 14
x1 = _mm_mulhrs_epi16(in[0], stk4_0); // stp1[1], stk4_1 = stk4_0
@@ -580,39 +580,39 @@
u29 = _mm_sub_epi16(v30, v29);
u30 = _mm_add_epi16(v29, v30);
- butterfly_self(&u18, &u29, stg4_4, stg4_5);
- butterfly_self(&u19, &u28, stg4_4, stg4_5);
- butterfly_self(&u20, &u27, stg4_6, stg4_4);
- butterfly_self(&u21, &u26, stg4_6, stg4_4);
+ butterfly_self(&u18, &u29, &stg4_4, &stg4_5);
+ butterfly_self(&u19, &u28, &stg4_4, &stg4_5);
+ butterfly_self(&u20, &u27, &stg4_6, &stg4_4);
+ butterfly_self(&u21, &u26, &stg4_6, &stg4_4);
stp1[16] = _mm_add_epi16(u16, u23);
- v23 = _mm_sub_epi16(u16, u23);
+ stp1[23] = _mm_sub_epi16(u16, u23);
stp1[17] = _mm_add_epi16(u17, u22);
- v22 = _mm_sub_epi16(u17, u22);
+ stp1[22] = _mm_sub_epi16(u17, u22);
stp1[18] = _mm_add_epi16(u18, u21);
- v21 = _mm_sub_epi16(u18, u21);
+ stp1[21] = _mm_sub_epi16(u18, u21);
stp1[19] = _mm_add_epi16(u19, u20);
- v20 = _mm_sub_epi16(u19, u20);
+ stp1[20] = _mm_sub_epi16(u19, u20);
- v24 = _mm_sub_epi16(u31, u24);
+ stp1[24] = _mm_sub_epi16(u31, u24);
stp1[31] = _mm_add_epi16(u24, u31);
- v25 = _mm_sub_epi16(u30, u25);
+ stp1[25] = _mm_sub_epi16(u30, u25);
stp1[30] = _mm_add_epi16(u25, u30);
- v26 = _mm_sub_epi16(u29, u26);
+ stp1[26] = _mm_sub_epi16(u29, u26);
stp1[29] = _mm_add_epi16(u26, u29);
- v27 = _mm_sub_epi16(u28, u27);
+ stp1[27] = _mm_sub_epi16(u28, u27);
stp1[28] = _mm_add_epi16(u27, u28);
- butterfly(&v20, &v27, &stg6_0, &stg4_0, &stp1[20], &stp1[27]);
- butterfly(&v21, &v26, &stg6_0, &stg4_0, &stp1[21], &stp1[26]);
- butterfly(&v22, &v25, &stg6_0, &stg4_0, &stp1[22], &stp1[25]);
- butterfly(&v23, &v24, &stg6_0, &stg4_0, &stp1[23], &stp1[24]);
+ butterfly_self(&stp1[20], &stp1[27], &stg6_0, &stg4_0);
+ butterfly_self(&stp1[21], &stp1[26], &stg6_0, &stg4_0);
+ butterfly_self(&stp1[22], &stp1[25], &stg6_0, &stg4_0);
+ butterfly_self(&stp1[23], &stp1[24], &stg6_0, &stg4_0);
}
// Only upper-left 8x8 has non-zero coeff
@@ -774,8 +774,8 @@
const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
- butterfly_self(&v9, &v14, stg4_4, stg4_5);
- butterfly_self(&v10, &v13, stg4_6, stg4_4);
+ butterfly_self(&v9, &v14, &stg4_4, &stg4_5);
+ butterfly_self(&v10, &v13, &stg4_6, &stg4_4);
}
out[0] = _mm_add_epi16(v8, v11);
@@ -790,8 +790,8 @@
{
const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
- butterfly_self(&out[2], &out[5], stg6_0, stg4_0);
- butterfly_self(&out[3], &out[4], stg6_0, stg4_0);
+ butterfly_self(&out[2], &out[5], &stg6_0, &stg4_0);
+ butterfly_self(&out[3], &out[4], &stg6_0, &stg4_0);
}
}
@@ -882,10 +882,10 @@
const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64);
const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
- butterfly_self(&v17, &v30, stg3_4, stg3_5);
- butterfly_self(&v18, &v29, stg3_6, stg3_4);
- butterfly_self(&v21, &v26, stg3_8, stg3_9);
- butterfly_self(&v22, &v25, stg3_10, stg3_8);
+ butterfly_self(&v17, &v30, &stg3_4, &stg3_5);
+ butterfly_self(&v18, &v29, &stg3_6, &stg3_4);
+ butterfly_self(&v21, &v26, &stg3_8, &stg3_9);
+ butterfly_self(&v22, &v25, &stg3_10, &stg3_8);
}
u16 = _mm_add_epi16(v16, v19);
@@ -910,10 +910,10 @@
const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
- butterfly_self(&u18, &u29, stg4_4, stg4_5);
- butterfly_self(&u19, &u28, stg4_4, stg4_5);
- butterfly_self(&u20, &u27, stg4_6, stg4_4);
- butterfly_self(&u21, &u26, stg4_6, stg4_4);
+ butterfly_self(&u18, &u29, &stg4_4, &stg4_5);
+ butterfly_self(&u19, &u28, &stg4_4, &stg4_5);
+ butterfly_self(&u20, &u27, &stg4_6, &stg4_4);
+ butterfly_self(&u21, &u26, &stg4_6, &stg4_4);
}
out[0] = _mm_add_epi16(u16, u23);
@@ -1069,8 +1069,8 @@
const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
- butterfly_self(&v9, &v14, stg4_4, stg4_5);
- butterfly_self(&v10, &v13, stg4_6, stg4_4);
+ butterfly_self(&v9, &v14, &stg4_4, &stg4_5);
+ butterfly_self(&v10, &v13, &stg4_6, &stg4_4);
}
out[0] = _mm_add_epi16(v8, v11);
@@ -1086,8 +1086,8 @@
{
const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
- butterfly_self(&out[2], &out[5], stg6_0, stg4_0);
- butterfly_self(&out[3], &out[4], stg6_0, stg4_0);
+ butterfly_self(&out[2], &out[5], &stg6_0, &stg4_0);
+ butterfly_self(&out[3], &out[4], &stg6_0, &stg4_0);
}
}
@@ -1208,10 +1208,10 @@
const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64);
const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64);
const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
- butterfly_self(&v17, &v30, stg3_4, stg3_5);
- butterfly_self(&v18, &v29, stg3_6, stg3_4);
- butterfly_self(&v21, &v26, stg3_8, stg3_9);
- butterfly_self(&v22, &v25, stg3_10, stg3_8);
+ butterfly_self(&v17, &v30, &stg3_4, &stg3_5);
+ butterfly_self(&v18, &v29, &stg3_6, &stg3_4);
+ butterfly_self(&v21, &v26, &stg3_8, &stg3_9);
+ butterfly_self(&v22, &v25, &stg3_10, &stg3_8);
}
u16 = _mm_add_epi16(v16, v19);
@@ -1237,10 +1237,10 @@
const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
- butterfly_self(&u18, &u29, stg4_4, stg4_5);
- butterfly_self(&u19, &u28, stg4_4, stg4_5);
- butterfly_self(&u20, &u27, stg4_6, stg4_4);
- butterfly_self(&u21, &u26, stg4_6, stg4_4);
+ butterfly_self(&u18, &u29, &stg4_4, &stg4_5);
+ butterfly_self(&u19, &u28, &stg4_4, &stg4_5);
+ butterfly_self(&u20, &u27, &stg4_6, &stg4_4);
+ butterfly_self(&u21, &u26, &stg4_6, &stg4_4);
}
out[0] = _mm_add_epi16(u16, u23);
@@ -1264,10 +1264,10 @@
{
const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
- butterfly_self(&out[4], &out[11], stg6_0, stg4_0);
- butterfly_self(&out[5], &out[10], stg6_0, stg4_0);
- butterfly_self(&out[6], &out[9], stg6_0, stg4_0);
- butterfly_self(&out[7], &out[8], stg6_0, stg4_0);
+ butterfly_self(&out[4], &out[11], &stg6_0, &stg4_0);
+ butterfly_self(&out[5], &out[10], &stg6_0, &stg4_0);
+ butterfly_self(&out[6], &out[9], &stg6_0, &stg4_0);
+ butterfly_self(&out[7], &out[8], &stg6_0, &stg4_0);
}
}