ref: 242157c756314827ad9244952c7253e8900b9626
parent: a0e8b45fee3990283013a8c7685f4397da48c08c
parent: 384e37e32b95d7b30a529da4b10c41f15aa3bd80
author: Jingning Han <[email protected]>
date: Thu Jul 25 04:49:37 EDT 2013
Merge "SSE2 inverse 4x4 2D-DCT with DC only"
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -292,7 +292,7 @@
# dct
#
prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct4x4_1_add
+specialize vp9_short_idct4x4_1_add sse2
prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct4x4_add sse2
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -148,6 +148,23 @@
RECON_AND_STORE4X4(dest, input3);
}
+void vp9_short_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+ __m128i dc_value;
+ const __m128i zero = _mm_setzero_si128();
+ int a;
+
+ a = dct_const_round_shift(input[0] * cospi_16_64);
+ a = dct_const_round_shift(a * cospi_16_64);
+ a = ROUND_POWER_OF_TWO(a, 4);
+
+ dc_value = _mm_set1_epi16(a);
+
+ RECON_AND_STORE4X4(dest, dc_value);
+ RECON_AND_STORE4X4(dest, dc_value);
+ RECON_AND_STORE4X4(dest, dc_value);
+ RECON_AND_STORE4X4(dest, dc_value);
+}
+
void vp9_idct4_1d_sse2(int16_t *input, int16_t *output) {
const __m128i zero = _mm_setzero_si128();
const __m128i c1 = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,