ref: 8366b414dd73a298d22910574c9e988865ac0e12
parent: f075fdc474898329eaa37ccc47433d6ad45792de
author: Jian Zhou <[email protected]>
date: Fri Dec 18 07:45:23 EST 2015
Code clean of highbd_dc_predictor_4x4 MMX replaced with SSE2, same performance. Change-Id: Ic57855254e26757191933c948fac6aa047fadafc
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -141,7 +141,7 @@
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
@@ -162,7 +162,7 @@
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
@@ -194,7 +194,7 @@
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
10),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
@@ -218,7 +218,7 @@
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
@@ -251,7 +251,7 @@
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
12),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
@@ -275,7 +275,7 @@
#else
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -294,7 +294,7 @@
specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_4x4/;
--- a/vpx_dsp/x86/highbd_intrapred_sse2.asm
+++ b/vpx_dsp/x86/highbd_intrapred_sse2.asm
@@ -17,24 +17,20 @@
pw_32: times 4 dd 32
SECTION .text
-INIT_MMX sse
+INIT_XMM sse2
cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
GET_GOT goffsetq
movq m0, [aboveq]
movq m2, [leftq]
- DEFINE_ARGS dst, stride, one
- mov oned, 0x0001
- pxor m1, m1
- movd m3, oned
- pshufw m3, m3, 0x0
paddw m0, m2
- pmaddwd m0, m3
- packssdw m0, m1
- pmaddwd m0, m3
+ pshuflw m1, m0, 0xe
+ paddw m0, m1
+ pshuflw m1, m0, 0x1
+ paddw m0, m1
paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
- pshufw m0, m0, 0x0
+ pshuflw m0, m0, 0x0
movq [dstq ], m0
movq [dstq+strideq*2], m0
lea dstq, [dstq+strideq*4]