ref: 6a94d6ad8ed853225e0b19a6a5a68f60ae88495c
parent: 49bf2e2ffec18738b240778c7549820aac174cf4
parent: 0aef1bc89804078d52e4dea87c30b16f2656760e
author: Yaowu Xu <[email protected]>
date: Sat Jan 30 23:38:39 EST 2016
Merge "Enable sse2 version of inverse wht for hbd build"
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -699,7 +699,7 @@
specialize qw/vpx_iwht4x4_1_add/;
add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_iwht4x4_16_add/;
+ specialize qw/vpx_iwht4x4_16_add/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct4x4_1_add/;
--- a/vpx_dsp/x86/inv_wht_sse2.asm
+++ b/vpx_dsp/x86/inv_wht_sse2.asm
@@ -82,9 +82,15 @@
INIT_XMM sse2
cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride
+%if CONFIG_VP9_HIGHBITDEPTH
mova m0, [inputq + 0]
+ packssdw m0, [inputq + 16]
+ mova m1, [inputq + 32]
+ packssdw m1, [inputq + 48]
+%else
+ mova m0, [inputq + 0]
mova m1, [inputq + 16]
-
+%endif
psraw m0, 2
psraw m1, 2